1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/devcom.h"
39 #include "mlx5_core.h"
40 #include "eswitch.h"
41 #include "esw/acl/ofld.h"
42 #include "lag.h"
43 #include "mp.h"
44 #include "mpesw.h"
45
46 enum {
47 MLX5_LAG_EGRESS_PORT_1 = 1,
48 MLX5_LAG_EGRESS_PORT_2,
49 };
50
51 /* General purpose, use for short periods of time.
52 * Beware of lock dependencies (preferably, no locks should be acquired
53 * under it).
54 */
55 static DEFINE_SPINLOCK(lag_lock);
56
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
58 {
59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
61
62 if (mode == MLX5_LAG_MODE_MPESW)
63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
64
65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
66 }
67
lag_active_port_bits(struct mlx5_lag * ldev)68 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
69 {
70 u8 enabled_ports[MLX5_MAX_PORTS] = {};
71 u8 active_port = 0;
72 int num_enabled;
73 int idx;
74
75 mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports,
76 &num_enabled);
77 for (idx = 0; idx < num_enabled; idx++)
78 active_port |= BIT_MASK(enabled_ports[idx]);
79
80 return active_port;
81 }
82
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 * ports,int mode,unsigned long flags)83 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
84 unsigned long flags)
85 {
86 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
87 &flags);
88 int port_sel_mode = get_port_sel_mode(mode, flags);
89 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
90 void *lag_ctx;
91
92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
95
96 switch (port_sel_mode) {
97 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
100 break;
101 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
102 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
103 break;
104
105 MLX5_SET(lagc, lag_ctx, active_port,
106 lag_active_port_bits(mlx5_lag_dev(dev)));
107 break;
108 default:
109 break;
110 }
111 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
112
113 return mlx5_cmd_exec_in(dev, create_lag, in);
114 }
115
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 num_ports,u8 * ports)116 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports,
117 u8 *ports)
118 {
119 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
120 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
121
122 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
123 MLX5_SET(modify_lag_in, in, field_select, 0x1);
124
125 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
126 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
127
128 return mlx5_cmd_exec_in(dev, modify_lag, in);
129 }
130
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)131 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
132 {
133 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
134
135 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
136
137 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
138 }
139 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
140
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)141 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
142 {
143 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
144
145 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
146
147 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
148 }
149 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
150
mlx5_infer_tx_disabled(struct lag_tracker * tracker,u8 num_ports,u8 * ports,int * num_disabled)151 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
152 u8 *ports, int *num_disabled)
153 {
154 int i;
155
156 *num_disabled = 0;
157 for (i = 0; i < num_ports; i++) {
158 if (!tracker->netdev_state[i].tx_enabled ||
159 !tracker->netdev_state[i].link_up)
160 ports[(*num_disabled)++] = i;
161 }
162 }
163
mlx5_infer_tx_enabled(struct lag_tracker * tracker,u8 num_ports,u8 * ports,int * num_enabled)164 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
165 u8 *ports, int *num_enabled)
166 {
167 int i;
168
169 *num_enabled = 0;
170 for (i = 0; i < num_ports; i++) {
171 if (tracker->netdev_state[i].tx_enabled &&
172 tracker->netdev_state[i].link_up)
173 ports[(*num_enabled)++] = i;
174 }
175
176 if (*num_enabled == 0)
177 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled);
178 }
179
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)180 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
181 struct mlx5_lag *ldev,
182 struct lag_tracker *tracker,
183 unsigned long flags)
184 {
185 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
186 u8 enabled_ports[MLX5_MAX_PORTS] = {};
187 int written = 0;
188 int num_enabled;
189 int idx;
190 int err;
191 int i;
192 int j;
193
194 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
195 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports,
196 &num_enabled);
197 for (i = 0; i < num_enabled; i++) {
198 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
199 if (err != 3)
200 return;
201 written += err;
202 }
203 buf[written - 2] = 0;
204 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
205 } else {
206 for (i = 0; i < ldev->ports; i++) {
207 for (j = 0; j < ldev->buckets; j++) {
208 idx = i * ldev->buckets + j;
209 err = scnprintf(buf + written, 10,
210 " port %d:%d", i + 1, ldev->v2p_map[idx]);
211 if (err != 9)
212 return;
213 written += err;
214 }
215 }
216 mlx5_core_info(dev, "lag map:%s\n", buf);
217 }
218 }
219
220 static int mlx5_lag_netdev_event(struct notifier_block *this,
221 unsigned long event, void *ptr);
222 static void mlx5_do_bond_work(struct work_struct *work);
223
mlx5_ldev_free(struct kref * ref)224 static void mlx5_ldev_free(struct kref *ref)
225 {
226 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
227
228 if (ldev->nb.notifier_call)
229 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
230 mlx5_lag_mp_cleanup(ldev);
231 cancel_delayed_work_sync(&ldev->bond_work);
232 destroy_workqueue(ldev->wq);
233 mutex_destroy(&ldev->lock);
234 kfree(ldev);
235 }
236
mlx5_ldev_put(struct mlx5_lag * ldev)237 static void mlx5_ldev_put(struct mlx5_lag *ldev)
238 {
239 kref_put(&ldev->ref, mlx5_ldev_free);
240 }
241
mlx5_ldev_get(struct mlx5_lag * ldev)242 static void mlx5_ldev_get(struct mlx5_lag *ldev)
243 {
244 kref_get(&ldev->ref);
245 }
246
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)247 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
248 {
249 struct mlx5_lag *ldev;
250 int err;
251
252 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
253 if (!ldev)
254 return NULL;
255
256 ldev->wq = create_singlethread_workqueue("mlx5_lag");
257 if (!ldev->wq) {
258 kfree(ldev);
259 return NULL;
260 }
261
262 kref_init(&ldev->ref);
263 mutex_init(&ldev->lock);
264 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
265
266 ldev->nb.notifier_call = mlx5_lag_netdev_event;
267 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
268 ldev->nb.notifier_call = NULL;
269 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
270 }
271 ldev->mode = MLX5_LAG_MODE_NONE;
272
273 err = mlx5_lag_mp_init(ldev);
274 if (err)
275 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
276 err);
277
278 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
279 ldev->buckets = 1;
280
281 return ldev;
282 }
283
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)284 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
285 struct net_device *ndev)
286 {
287 int i;
288
289 for (i = 0; i < ldev->ports; i++)
290 if (ldev->pf[i].netdev == ndev)
291 return i;
292
293 return -ENOENT;
294 }
295
__mlx5_lag_is_roce(struct mlx5_lag * ldev)296 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
297 {
298 return ldev->mode == MLX5_LAG_MODE_ROCE;
299 }
300
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)301 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
302 {
303 return ldev->mode == MLX5_LAG_MODE_SRIOV;
304 }
305
306 /* Create a mapping between steering slots and active ports.
307 * As we have ldev->buckets slots per port first assume the native
308 * mapping should be used.
309 * If there are ports that are disabled fill the relevant slots
310 * with mapping that points to active ports.
311 */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 num_ports,u8 buckets,u8 * ports)312 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
313 u8 num_ports,
314 u8 buckets,
315 u8 *ports)
316 {
317 int disabled[MLX5_MAX_PORTS] = {};
318 int enabled[MLX5_MAX_PORTS] = {};
319 int disabled_ports_num = 0;
320 int enabled_ports_num = 0;
321 int idx;
322 u32 rand;
323 int i;
324 int j;
325
326 for (i = 0; i < num_ports; i++) {
327 if (tracker->netdev_state[i].tx_enabled &&
328 tracker->netdev_state[i].link_up)
329 enabled[enabled_ports_num++] = i;
330 else
331 disabled[disabled_ports_num++] = i;
332 }
333
334 /* Use native mapping by default where each port's buckets
335 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
336 */
337 for (i = 0; i < num_ports; i++)
338 for (j = 0; j < buckets; j++) {
339 idx = i * buckets + j;
340 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i;
341 }
342
343 /* If all ports are disabled/enabled keep native mapping */
344 if (enabled_ports_num == num_ports ||
345 disabled_ports_num == num_ports)
346 return;
347
348 /* Go over the disabled ports and for each assign a random active port */
349 for (i = 0; i < disabled_ports_num; i++) {
350 for (j = 0; j < buckets; j++) {
351 get_random_bytes(&rand, 4);
352 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
353 }
354 }
355 }
356
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)357 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
358 {
359 int i;
360
361 for (i = 0; i < ldev->ports; i++)
362 if (ldev->pf[i].has_drop)
363 return true;
364 return false;
365 }
366
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)367 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
368 {
369 int i;
370
371 for (i = 0; i < ldev->ports; i++) {
372 if (!ldev->pf[i].has_drop)
373 continue;
374
375 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
376 MLX5_VPORT_UPLINK);
377 ldev->pf[i].has_drop = false;
378 }
379 }
380
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)381 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
382 struct lag_tracker *tracker)
383 {
384 u8 disabled_ports[MLX5_MAX_PORTS] = {};
385 struct mlx5_core_dev *dev;
386 int disabled_index;
387 int num_disabled;
388 int err;
389 int i;
390
391 /* First delete the current drop rule so there won't be any dropped
392 * packets
393 */
394 mlx5_lag_drop_rule_cleanup(ldev);
395
396 if (!ldev->tracker.has_inactive)
397 return;
398
399 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled);
400
401 for (i = 0; i < num_disabled; i++) {
402 disabled_index = disabled_ports[i];
403 dev = ldev->pf[disabled_index].dev;
404 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
405 MLX5_VPORT_UPLINK);
406 if (!err)
407 ldev->pf[disabled_index].has_drop = true;
408 else
409 mlx5_core_err(dev,
410 "Failed to create lag drop rule, error: %d", err);
411 }
412 }
413
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)414 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
415 {
416 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
417 void *lag_ctx;
418
419 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
420
421 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
422 MLX5_SET(modify_lag_in, in, field_select, 0x2);
423
424 MLX5_SET(lagc, lag_ctx, active_port, ports);
425
426 return mlx5_cmd_exec_in(dev, modify_lag, in);
427 }
428
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)429 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
430 {
431 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
432 u8 active_ports;
433 int ret;
434
435 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
436 ret = mlx5_lag_port_sel_modify(ldev, ports);
437 if (ret ||
438 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
439 return ret;
440
441 active_ports = lag_active_port_bits(ldev);
442
443 return mlx5_cmd_modify_active_port(dev0, active_ports);
444 }
445 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports);
446 }
447
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)448 void mlx5_modify_lag(struct mlx5_lag *ldev,
449 struct lag_tracker *tracker)
450 {
451 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
452 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
453 int idx;
454 int err;
455 int i;
456 int j;
457
458 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports);
459
460 for (i = 0; i < ldev->ports; i++) {
461 for (j = 0; j < ldev->buckets; j++) {
462 idx = i * ldev->buckets + j;
463 if (ports[idx] == ldev->v2p_map[idx])
464 continue;
465 err = _mlx5_modify_lag(ldev, ports);
466 if (err) {
467 mlx5_core_err(dev0,
468 "Failed to modify LAG (%d)\n",
469 err);
470 return;
471 }
472 memcpy(ldev->v2p_map, ports, sizeof(ports));
473
474 mlx5_lag_print_mapping(dev0, ldev, tracker,
475 ldev->mode_flags);
476 break;
477 }
478 }
479
480 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
481 !(ldev->mode == MLX5_LAG_MODE_ROCE))
482 mlx5_lag_drop_rule_setup(ldev, tracker);
483 }
484
mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag * ldev,unsigned long * flags)485 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
486 unsigned long *flags)
487 {
488 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
489
490 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
491 if (ldev->ports > 2)
492 return -EINVAL;
493 return 0;
494 }
495
496 if (ldev->ports > 2)
497 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
498
499 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
500
501 return 0;
502 }
503
mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long * flags)504 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
505 struct lag_tracker *tracker,
506 enum mlx5_lag_mode mode,
507 unsigned long *flags)
508 {
509 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
510
511 if (mode == MLX5_LAG_MODE_MPESW)
512 return;
513
514 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
515 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)
516 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
517 }
518
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)519 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
520 struct lag_tracker *tracker, bool shared_fdb,
521 unsigned long *flags)
522 {
523 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
524
525 *flags = 0;
526 if (shared_fdb) {
527 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
528 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
529 }
530
531 if (mode == MLX5_LAG_MODE_MPESW)
532 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
533
534 if (roce_lag)
535 return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
536
537 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
538 return 0;
539 }
540
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)541 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
542 {
543 int port_sel_mode = get_port_sel_mode(mode, flags);
544
545 switch (port_sel_mode) {
546 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
547 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
548 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
549 default: return "invalid";
550 }
551 }
552
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)553 static int mlx5_create_lag(struct mlx5_lag *ldev,
554 struct lag_tracker *tracker,
555 enum mlx5_lag_mode mode,
556 unsigned long flags)
557 {
558 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
559 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
560 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
561 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
562 int err;
563
564 if (tracker)
565 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
566 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
567 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
568
569 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
570 if (err) {
571 mlx5_core_err(dev0,
572 "Failed to create LAG (%d)\n",
573 err);
574 return err;
575 }
576
577 if (shared_fdb) {
578 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
579 dev1->priv.eswitch);
580 if (err)
581 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
582 else
583 mlx5_core_info(dev0, "Operation mode is single FDB\n");
584 }
585
586 if (err) {
587 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
588 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
589 mlx5_core_err(dev0,
590 "Failed to deactivate RoCE LAG; driver restart required\n");
591 }
592
593 return err;
594 }
595
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)596 int mlx5_activate_lag(struct mlx5_lag *ldev,
597 struct lag_tracker *tracker,
598 enum mlx5_lag_mode mode,
599 bool shared_fdb)
600 {
601 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
602 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
603 unsigned long flags = 0;
604 int err;
605
606 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
607 if (err)
608 return err;
609
610 if (mode != MLX5_LAG_MODE_MPESW) {
611 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map);
612 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
613 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
614 ldev->v2p_map);
615 if (err) {
616 mlx5_core_err(dev0,
617 "Failed to create LAG port selection(%d)\n",
618 err);
619 return err;
620 }
621 }
622 }
623
624 err = mlx5_create_lag(ldev, tracker, mode, flags);
625 if (err) {
626 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
627 mlx5_lag_port_sel_destroy(ldev);
628 if (roce_lag)
629 mlx5_core_err(dev0,
630 "Failed to activate RoCE LAG\n");
631 else
632 mlx5_core_err(dev0,
633 "Failed to activate VF LAG\n"
634 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
635 return err;
636 }
637
638 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
639 !roce_lag)
640 mlx5_lag_drop_rule_setup(ldev, tracker);
641
642 ldev->mode = mode;
643 ldev->mode_flags = flags;
644 return 0;
645 }
646
mlx5_deactivate_lag(struct mlx5_lag * ldev)647 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
648 {
649 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
650 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
651 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
652 bool roce_lag = __mlx5_lag_is_roce(ldev);
653 unsigned long flags = ldev->mode_flags;
654 int err;
655
656 ldev->mode = MLX5_LAG_MODE_NONE;
657 ldev->mode_flags = 0;
658 mlx5_lag_mp_reset(ldev);
659
660 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
661 mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
662 dev1->priv.eswitch);
663 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
664 }
665
666 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
667 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
668 if (err) {
669 if (roce_lag) {
670 mlx5_core_err(dev0,
671 "Failed to deactivate RoCE LAG; driver restart required\n");
672 } else {
673 mlx5_core_err(dev0,
674 "Failed to deactivate VF LAG; driver restart required\n"
675 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
676 }
677 return err;
678 }
679
680 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
681 mlx5_lag_port_sel_destroy(ldev);
682 if (mlx5_lag_has_drop_rule(ldev))
683 mlx5_lag_drop_rule_cleanup(ldev);
684
685 return 0;
686 }
687
688 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
mlx5_lag_check_prereq(struct mlx5_lag * ldev)689 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
690 {
691 #ifdef CONFIG_MLX5_ESWITCH
692 struct mlx5_core_dev *dev;
693 u8 mode;
694 #endif
695 int i;
696
697 for (i = 0; i < ldev->ports; i++)
698 if (!ldev->pf[i].dev)
699 return false;
700
701 #ifdef CONFIG_MLX5_ESWITCH
702 for (i = 0; i < ldev->ports; i++) {
703 dev = ldev->pf[i].dev;
704 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
705 return false;
706 }
707
708 dev = ldev->pf[MLX5_LAG_P1].dev;
709 mode = mlx5_eswitch_mode(dev);
710 for (i = 0; i < ldev->ports; i++)
711 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
712 return false;
713
714 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS)
715 return false;
716 #else
717 for (i = 0; i < ldev->ports; i++)
718 if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
719 return false;
720 #endif
721 return true;
722 }
723
mlx5_lag_add_devices(struct mlx5_lag * ldev)724 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
725 {
726 int i;
727
728 for (i = 0; i < ldev->ports; i++) {
729 if (!ldev->pf[i].dev)
730 continue;
731
732 if (ldev->pf[i].dev->priv.flags &
733 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
734 continue;
735
736 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
737 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
738 }
739 }
740
mlx5_lag_remove_devices(struct mlx5_lag * ldev)741 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
742 {
743 int i;
744
745 for (i = 0; i < ldev->ports; i++) {
746 if (!ldev->pf[i].dev)
747 continue;
748
749 if (ldev->pf[i].dev->priv.flags &
750 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
751 continue;
752
753 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
754 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
755 }
756 }
757
mlx5_disable_lag(struct mlx5_lag * ldev)758 void mlx5_disable_lag(struct mlx5_lag *ldev)
759 {
760 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
761 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
762 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
763 bool roce_lag;
764 int err;
765 int i;
766
767 roce_lag = __mlx5_lag_is_roce(ldev);
768
769 if (shared_fdb) {
770 mlx5_lag_remove_devices(ldev);
771 } else if (roce_lag) {
772 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
773 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
774 mlx5_rescan_drivers_locked(dev0);
775 }
776 for (i = 1; i < ldev->ports; i++)
777 mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
778 }
779
780 err = mlx5_deactivate_lag(ldev);
781 if (err)
782 return;
783
784 if (shared_fdb || roce_lag)
785 mlx5_lag_add_devices(ldev);
786
787 if (shared_fdb) {
788 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
789 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
790 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
791 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
792 }
793 }
794
mlx5_shared_fdb_supported(struct mlx5_lag * ldev)795 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
796 {
797 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
798 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
799
800 if (is_mdev_switchdev_mode(dev0) &&
801 is_mdev_switchdev_mode(dev1) &&
802 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
803 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
804 mlx5_devcom_is_paired(dev0->priv.devcom,
805 MLX5_DEVCOM_ESW_OFFLOADS) &&
806 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
807 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
808 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
809 return true;
810
811 return false;
812 }
813
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)814 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
815 {
816 bool roce_lag = true;
817 int i;
818
819 for (i = 0; i < ldev->ports; i++)
820 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
821
822 #ifdef CONFIG_MLX5_ESWITCH
823 for (i = 0; i < ldev->ports; i++)
824 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
825 #endif
826
827 return roce_lag;
828 }
829
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)830 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
831 {
832 return do_bond && __mlx5_lag_is_active(ldev) &&
833 ldev->mode != MLX5_LAG_MODE_MPESW;
834 }
835
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)836 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
837 {
838 return !do_bond && __mlx5_lag_is_active(ldev) &&
839 ldev->mode != MLX5_LAG_MODE_MPESW;
840 }
841
mlx5_do_bond(struct mlx5_lag * ldev)842 static void mlx5_do_bond(struct mlx5_lag *ldev)
843 {
844 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
845 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
846 struct lag_tracker tracker = { };
847 bool do_bond, roce_lag;
848 int err;
849 int i;
850
851 if (!mlx5_lag_is_ready(ldev)) {
852 do_bond = false;
853 } else {
854 /* VF LAG is in multipath mode, ignore bond change requests */
855 if (mlx5_lag_is_multipath(dev0))
856 return;
857
858 tracker = ldev->tracker;
859
860 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
861 }
862
863 if (do_bond && !__mlx5_lag_is_active(ldev)) {
864 bool shared_fdb = mlx5_shared_fdb_supported(ldev);
865
866 roce_lag = mlx5_lag_is_roce_lag(ldev);
867
868 if (shared_fdb || roce_lag)
869 mlx5_lag_remove_devices(ldev);
870
871 err = mlx5_activate_lag(ldev, &tracker,
872 roce_lag ? MLX5_LAG_MODE_ROCE :
873 MLX5_LAG_MODE_SRIOV,
874 shared_fdb);
875 if (err) {
876 if (shared_fdb || roce_lag)
877 mlx5_lag_add_devices(ldev);
878
879 return;
880 } else if (roce_lag) {
881 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
882 mlx5_rescan_drivers_locked(dev0);
883 for (i = 1; i < ldev->ports; i++)
884 mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
885 } else if (shared_fdb) {
886 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
887 mlx5_rescan_drivers_locked(dev0);
888
889 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
890 if (!err)
891 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
892
893 if (err) {
894 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
895 mlx5_rescan_drivers_locked(dev0);
896 mlx5_deactivate_lag(ldev);
897 mlx5_lag_add_devices(ldev);
898 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
899 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
900 mlx5_core_err(dev0, "Failed to enable lag\n");
901 return;
902 }
903 }
904 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
905 mlx5_modify_lag(ldev, &tracker);
906 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
907 mlx5_disable_lag(ldev);
908 }
909 }
910
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)911 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
912 {
913 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
914 }
915
mlx5_do_bond_work(struct work_struct * work)916 static void mlx5_do_bond_work(struct work_struct *work)
917 {
918 struct delayed_work *delayed_work = to_delayed_work(work);
919 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
920 bond_work);
921 int status;
922
923 status = mlx5_dev_list_trylock();
924 if (!status) {
925 mlx5_queue_bond_work(ldev, HZ);
926 return;
927 }
928
929 mutex_lock(&ldev->lock);
930 if (ldev->mode_changes_in_progress) {
931 mutex_unlock(&ldev->lock);
932 mlx5_dev_list_unlock();
933 mlx5_queue_bond_work(ldev, HZ);
934 return;
935 }
936
937 mlx5_do_bond(ldev);
938 mutex_unlock(&ldev->lock);
939 mlx5_dev_list_unlock();
940 }
941
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)942 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
943 struct lag_tracker *tracker,
944 struct netdev_notifier_changeupper_info *info)
945 {
946 struct net_device *upper = info->upper_dev, *ndev_tmp;
947 struct netdev_lag_upper_info *lag_upper_info = NULL;
948 bool is_bonded, is_in_lag, mode_supported;
949 bool has_inactive = 0;
950 struct slave *slave;
951 u8 bond_status = 0;
952 int num_slaves = 0;
953 int changed = 0;
954 int idx;
955
956 if (!netif_is_lag_master(upper))
957 return 0;
958
959 if (info->linking)
960 lag_upper_info = info->upper_info;
961
962 /* The event may still be of interest if the slave does not belong to
963 * us, but is enslaved to a master which has one or more of our netdevs
964 * as slaves (e.g., if a new slave is added to a master that bonds two
965 * of our netdevs, we should unbond).
966 */
967 rcu_read_lock();
968 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
969 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
970 if (idx >= 0) {
971 slave = bond_slave_get_rcu(ndev_tmp);
972 if (slave)
973 has_inactive |= bond_is_slave_inactive(slave);
974 bond_status |= (1 << idx);
975 }
976
977 num_slaves++;
978 }
979 rcu_read_unlock();
980
981 /* None of this lagdev's netdevs are slaves of this master. */
982 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
983 return 0;
984
985 if (lag_upper_info) {
986 tracker->tx_type = lag_upper_info->tx_type;
987 tracker->hash_type = lag_upper_info->hash_type;
988 }
989
990 tracker->has_inactive = has_inactive;
991 /* Determine bonding status:
992 * A device is considered bonded if both its physical ports are slaves
993 * of the same lag master, and only them.
994 */
995 is_in_lag = num_slaves == ldev->ports &&
996 bond_status == GENMASK(ldev->ports - 1, 0);
997
998 /* Lag mode must be activebackup or hash. */
999 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1000 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1001
1002 is_bonded = is_in_lag && mode_supported;
1003 if (tracker->is_bonded != is_bonded) {
1004 tracker->is_bonded = is_bonded;
1005 changed = 1;
1006 }
1007
1008 if (!is_in_lag)
1009 return changed;
1010
1011 if (!mlx5_lag_is_ready(ldev))
1012 NL_SET_ERR_MSG_MOD(info->info.extack,
1013 "Can't activate LAG offload, PF is configured with more than 64 VFs");
1014 else if (!mode_supported)
1015 NL_SET_ERR_MSG_MOD(info->info.extack,
1016 "Can't activate LAG offload, TX type isn't supported");
1017
1018 return changed;
1019 }
1020
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1021 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1022 struct lag_tracker *tracker,
1023 struct net_device *ndev,
1024 struct netdev_notifier_changelowerstate_info *info)
1025 {
1026 struct netdev_lag_lower_state_info *lag_lower_info;
1027 int idx;
1028
1029 if (!netif_is_lag_port(ndev))
1030 return 0;
1031
1032 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1033 if (idx < 0)
1034 return 0;
1035
1036 /* This information is used to determine virtual to physical
1037 * port mapping.
1038 */
1039 lag_lower_info = info->lower_state_info;
1040 if (!lag_lower_info)
1041 return 0;
1042
1043 tracker->netdev_state[idx] = *lag_lower_info;
1044
1045 return 1;
1046 }
1047
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1048 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1049 struct lag_tracker *tracker,
1050 struct net_device *ndev)
1051 {
1052 struct net_device *ndev_tmp;
1053 struct slave *slave;
1054 bool has_inactive = 0;
1055 int idx;
1056
1057 if (!netif_is_lag_master(ndev))
1058 return 0;
1059
1060 rcu_read_lock();
1061 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1062 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1063 if (idx < 0)
1064 continue;
1065
1066 slave = bond_slave_get_rcu(ndev_tmp);
1067 if (slave)
1068 has_inactive |= bond_is_slave_inactive(slave);
1069 }
1070 rcu_read_unlock();
1071
1072 if (tracker->has_inactive == has_inactive)
1073 return 0;
1074
1075 tracker->has_inactive = has_inactive;
1076
1077 return 1;
1078 }
1079
1080 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1081 static int mlx5_lag_netdev_event(struct notifier_block *this,
1082 unsigned long event, void *ptr)
1083 {
1084 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1085 struct lag_tracker tracker;
1086 struct mlx5_lag *ldev;
1087 int changed = 0;
1088
1089 if (event != NETDEV_CHANGEUPPER &&
1090 event != NETDEV_CHANGELOWERSTATE &&
1091 event != NETDEV_CHANGEINFODATA)
1092 return NOTIFY_DONE;
1093
1094 ldev = container_of(this, struct mlx5_lag, nb);
1095
1096 tracker = ldev->tracker;
1097
1098 switch (event) {
1099 case NETDEV_CHANGEUPPER:
1100 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1101 break;
1102 case NETDEV_CHANGELOWERSTATE:
1103 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1104 ndev, ptr);
1105 break;
1106 case NETDEV_CHANGEINFODATA:
1107 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1108 break;
1109 }
1110
1111 ldev->tracker = tracker;
1112
1113 if (changed)
1114 mlx5_queue_bond_work(ldev, 0);
1115
1116 return NOTIFY_DONE;
1117 }
1118
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1119 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1120 struct mlx5_core_dev *dev,
1121 struct net_device *netdev)
1122 {
1123 unsigned int fn = mlx5_get_dev_index(dev);
1124 unsigned long flags;
1125
1126 if (fn >= ldev->ports)
1127 return;
1128
1129 spin_lock_irqsave(&lag_lock, flags);
1130 ldev->pf[fn].netdev = netdev;
1131 ldev->tracker.netdev_state[fn].link_up = 0;
1132 ldev->tracker.netdev_state[fn].tx_enabled = 0;
1133 spin_unlock_irqrestore(&lag_lock, flags);
1134 }
1135
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1136 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1137 struct net_device *netdev)
1138 {
1139 unsigned long flags;
1140 int i;
1141
1142 spin_lock_irqsave(&lag_lock, flags);
1143 for (i = 0; i < ldev->ports; i++) {
1144 if (ldev->pf[i].netdev == netdev) {
1145 ldev->pf[i].netdev = NULL;
1146 break;
1147 }
1148 }
1149 spin_unlock_irqrestore(&lag_lock, flags);
1150 }
1151
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1152 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1153 struct mlx5_core_dev *dev)
1154 {
1155 unsigned int fn = mlx5_get_dev_index(dev);
1156
1157 if (fn >= ldev->ports)
1158 return;
1159
1160 ldev->pf[fn].dev = dev;
1161 dev->priv.lag = ldev;
1162 }
1163
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1164 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1165 struct mlx5_core_dev *dev)
1166 {
1167 int i;
1168
1169 for (i = 0; i < ldev->ports; i++)
1170 if (ldev->pf[i].dev == dev)
1171 break;
1172
1173 if (i == ldev->ports)
1174 return;
1175
1176 ldev->pf[i].dev = NULL;
1177 dev->priv.lag = NULL;
1178 }
1179
1180 /* Must be called with intf_mutex held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1181 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1182 {
1183 struct mlx5_lag *ldev = NULL;
1184 struct mlx5_core_dev *tmp_dev;
1185
1186 tmp_dev = mlx5_get_next_phys_dev_lag(dev);
1187 if (tmp_dev)
1188 ldev = mlx5_lag_dev(tmp_dev);
1189
1190 if (!ldev) {
1191 ldev = mlx5_lag_dev_alloc(dev);
1192 if (!ldev) {
1193 mlx5_core_err(dev, "Failed to alloc lag dev\n");
1194 return 0;
1195 }
1196 mlx5_ldev_add_mdev(ldev, dev);
1197 return 0;
1198 }
1199
1200 mutex_lock(&ldev->lock);
1201 if (ldev->mode_changes_in_progress) {
1202 mutex_unlock(&ldev->lock);
1203 return -EAGAIN;
1204 }
1205 mlx5_ldev_get(ldev);
1206 mlx5_ldev_add_mdev(ldev, dev);
1207 mutex_unlock(&ldev->lock);
1208
1209 return 0;
1210 }
1211
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1212 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1213 {
1214 struct mlx5_lag *ldev;
1215
1216 ldev = mlx5_lag_dev(dev);
1217 if (!ldev)
1218 return;
1219
1220 /* mdev is being removed, might as well remove debugfs
1221 * as early as possible.
1222 */
1223 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1224 recheck:
1225 mutex_lock(&ldev->lock);
1226 if (ldev->mode_changes_in_progress) {
1227 mutex_unlock(&ldev->lock);
1228 msleep(100);
1229 goto recheck;
1230 }
1231 mlx5_ldev_remove_mdev(ldev, dev);
1232 mutex_unlock(&ldev->lock);
1233 mlx5_ldev_put(ldev);
1234 }
1235
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1236 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1237 {
1238 int err;
1239
1240 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
1241 !MLX5_CAP_GEN(dev, lag_master) ||
1242 (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS ||
1243 MLX5_CAP_GEN(dev, num_lag_ports) <= 1))
1244 return;
1245
1246 recheck:
1247 mlx5_dev_list_lock();
1248 err = __mlx5_lag_dev_add_mdev(dev);
1249 mlx5_dev_list_unlock();
1250
1251 if (err) {
1252 msleep(100);
1253 goto recheck;
1254 }
1255 mlx5_ldev_add_debugfs(dev);
1256 }
1257
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1258 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1259 struct net_device *netdev)
1260 {
1261 struct mlx5_lag *ldev;
1262 bool lag_is_active;
1263
1264 ldev = mlx5_lag_dev(dev);
1265 if (!ldev)
1266 return;
1267
1268 mutex_lock(&ldev->lock);
1269 mlx5_ldev_remove_netdev(ldev, netdev);
1270 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1271
1272 lag_is_active = __mlx5_lag_is_active(ldev);
1273 mutex_unlock(&ldev->lock);
1274
1275 if (lag_is_active)
1276 mlx5_queue_bond_work(ldev, 0);
1277 }
1278
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1279 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1280 struct net_device *netdev)
1281 {
1282 struct mlx5_lag *ldev;
1283 int i;
1284
1285 ldev = mlx5_lag_dev(dev);
1286 if (!ldev)
1287 return;
1288
1289 mutex_lock(&ldev->lock);
1290 mlx5_ldev_add_netdev(ldev, dev, netdev);
1291
1292 for (i = 0; i < ldev->ports; i++)
1293 if (!ldev->pf[i].netdev)
1294 break;
1295
1296 if (i >= ldev->ports)
1297 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1298 mutex_unlock(&ldev->lock);
1299 mlx5_queue_bond_work(ldev, 0);
1300 }
1301
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1302 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1303 {
1304 struct mlx5_lag *ldev;
1305 unsigned long flags;
1306 bool res;
1307
1308 spin_lock_irqsave(&lag_lock, flags);
1309 ldev = mlx5_lag_dev(dev);
1310 res = ldev && __mlx5_lag_is_roce(ldev);
1311 spin_unlock_irqrestore(&lag_lock, flags);
1312
1313 return res;
1314 }
1315 EXPORT_SYMBOL(mlx5_lag_is_roce);
1316
mlx5_lag_is_active(struct mlx5_core_dev * dev)1317 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1318 {
1319 struct mlx5_lag *ldev;
1320 unsigned long flags;
1321 bool res;
1322
1323 spin_lock_irqsave(&lag_lock, flags);
1324 ldev = mlx5_lag_dev(dev);
1325 res = ldev && __mlx5_lag_is_active(ldev);
1326 spin_unlock_irqrestore(&lag_lock, flags);
1327
1328 return res;
1329 }
1330 EXPORT_SYMBOL(mlx5_lag_is_active);
1331
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)1332 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
1333 {
1334 struct mlx5_lag *ldev;
1335 unsigned long flags;
1336 bool res = 0;
1337
1338 spin_lock_irqsave(&lag_lock, flags);
1339 ldev = mlx5_lag_dev(dev);
1340 if (ldev)
1341 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
1342 spin_unlock_irqrestore(&lag_lock, flags);
1343
1344 return res;
1345 }
1346 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
1347
mlx5_lag_is_master(struct mlx5_core_dev * dev)1348 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1349 {
1350 struct mlx5_lag *ldev;
1351 unsigned long flags;
1352 bool res;
1353
1354 spin_lock_irqsave(&lag_lock, flags);
1355 ldev = mlx5_lag_dev(dev);
1356 res = ldev && __mlx5_lag_is_active(ldev) &&
1357 dev == ldev->pf[MLX5_LAG_P1].dev;
1358 spin_unlock_irqrestore(&lag_lock, flags);
1359
1360 return res;
1361 }
1362 EXPORT_SYMBOL(mlx5_lag_is_master);
1363
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1364 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1365 {
1366 struct mlx5_lag *ldev;
1367 unsigned long flags;
1368 bool res;
1369
1370 spin_lock_irqsave(&lag_lock, flags);
1371 ldev = mlx5_lag_dev(dev);
1372 res = ldev && __mlx5_lag_is_sriov(ldev);
1373 spin_unlock_irqrestore(&lag_lock, flags);
1374
1375 return res;
1376 }
1377 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1378
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1379 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1380 {
1381 struct mlx5_lag *ldev;
1382 unsigned long flags;
1383 bool res;
1384
1385 spin_lock_irqsave(&lag_lock, flags);
1386 ldev = mlx5_lag_dev(dev);
1387 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1388 spin_unlock_irqrestore(&lag_lock, flags);
1389
1390 return res;
1391 }
1392 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1393
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1394 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1395 {
1396 struct mlx5_lag *ldev;
1397
1398 ldev = mlx5_lag_dev(dev);
1399 if (!ldev)
1400 return;
1401
1402 mlx5_dev_list_lock();
1403 mutex_lock(&ldev->lock);
1404
1405 ldev->mode_changes_in_progress++;
1406 if (__mlx5_lag_is_active(ldev))
1407 mlx5_disable_lag(ldev);
1408
1409 mutex_unlock(&ldev->lock);
1410 mlx5_dev_list_unlock();
1411 }
1412
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1413 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1414 {
1415 struct mlx5_lag *ldev;
1416
1417 ldev = mlx5_lag_dev(dev);
1418 if (!ldev)
1419 return;
1420
1421 mutex_lock(&ldev->lock);
1422 ldev->mode_changes_in_progress--;
1423 mutex_unlock(&ldev->lock);
1424 mlx5_queue_bond_work(ldev, 0);
1425 }
1426
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)1427 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
1428 {
1429 struct net_device *ndev = NULL;
1430 struct mlx5_lag *ldev;
1431 unsigned long flags;
1432 int i;
1433
1434 spin_lock_irqsave(&lag_lock, flags);
1435 ldev = mlx5_lag_dev(dev);
1436
1437 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1438 goto unlock;
1439
1440 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1441 for (i = 0; i < ldev->ports; i++)
1442 if (ldev->tracker.netdev_state[i].tx_enabled)
1443 ndev = ldev->pf[i].netdev;
1444 if (!ndev)
1445 ndev = ldev->pf[ldev->ports - 1].netdev;
1446 } else {
1447 ndev = ldev->pf[MLX5_LAG_P1].netdev;
1448 }
1449 if (ndev)
1450 dev_hold(ndev);
1451
1452 unlock:
1453 spin_unlock_irqrestore(&lag_lock, flags);
1454
1455 return ndev;
1456 }
1457 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
1458
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1459 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1460 struct net_device *slave)
1461 {
1462 struct mlx5_lag *ldev;
1463 unsigned long flags;
1464 u8 port = 0;
1465 int i;
1466
1467 spin_lock_irqsave(&lag_lock, flags);
1468 ldev = mlx5_lag_dev(dev);
1469 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1470 goto unlock;
1471
1472 for (i = 0; i < ldev->ports; i++) {
1473 if (ldev->pf[MLX5_LAG_P1].netdev == slave) {
1474 port = i;
1475 break;
1476 }
1477 }
1478
1479 port = ldev->v2p_map[port * ldev->buckets];
1480
1481 unlock:
1482 spin_unlock_irqrestore(&lag_lock, flags);
1483 return port;
1484 }
1485 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1486
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1487 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1488 {
1489 struct mlx5_lag *ldev;
1490
1491 ldev = mlx5_lag_dev(dev);
1492 if (!ldev)
1493 return 0;
1494
1495 return ldev->ports;
1496 }
1497 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1498
mlx5_lag_get_peer_mdev(struct mlx5_core_dev * dev)1499 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1500 {
1501 struct mlx5_core_dev *peer_dev = NULL;
1502 struct mlx5_lag *ldev;
1503 unsigned long flags;
1504
1505 spin_lock_irqsave(&lag_lock, flags);
1506 ldev = mlx5_lag_dev(dev);
1507 if (!ldev)
1508 goto unlock;
1509
1510 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1511 ldev->pf[MLX5_LAG_P2].dev :
1512 ldev->pf[MLX5_LAG_P1].dev;
1513
1514 unlock:
1515 spin_unlock_irqrestore(&lag_lock, flags);
1516 return peer_dev;
1517 }
1518 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1519
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1520 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1521 u64 *values,
1522 int num_counters,
1523 size_t *offsets)
1524 {
1525 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1526 struct mlx5_core_dev **mdev;
1527 struct mlx5_lag *ldev;
1528 unsigned long flags;
1529 int num_ports;
1530 int ret, i, j;
1531 void *out;
1532
1533 out = kvzalloc(outlen, GFP_KERNEL);
1534 if (!out)
1535 return -ENOMEM;
1536
1537 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1538 if (!mdev) {
1539 ret = -ENOMEM;
1540 goto free_out;
1541 }
1542
1543 memset(values, 0, sizeof(*values) * num_counters);
1544
1545 spin_lock_irqsave(&lag_lock, flags);
1546 ldev = mlx5_lag_dev(dev);
1547 if (ldev && __mlx5_lag_is_active(ldev)) {
1548 num_ports = ldev->ports;
1549 for (i = 0; i < ldev->ports; i++)
1550 mdev[i] = ldev->pf[i].dev;
1551 } else {
1552 num_ports = 1;
1553 mdev[MLX5_LAG_P1] = dev;
1554 }
1555 spin_unlock_irqrestore(&lag_lock, flags);
1556
1557 for (i = 0; i < num_ports; ++i) {
1558 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1559
1560 MLX5_SET(query_cong_statistics_in, in, opcode,
1561 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1562 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1563 out);
1564 if (ret)
1565 goto free_mdev;
1566
1567 for (j = 0; j < num_counters; ++j)
1568 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1569 }
1570
1571 free_mdev:
1572 kvfree(mdev);
1573 free_out:
1574 kvfree(out);
1575 return ret;
1576 }
1577 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1578