1 /*
2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/bpf_trace.h>
34 #include <net/xdp_sock_drv.h>
35 #include "en/xdp.h"
36 #include "en/params.h"
37
mlx5e_xdp_max_mtu(struct mlx5e_params * params,struct mlx5e_xsk_param * xsk)38 int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
39 {
40 int hr = mlx5e_get_linear_rq_headroom(params, xsk);
41
42 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
43 * The condition checked in mlx5e_rx_is_linear_skb is:
44 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
45 * (Note that hw_mtu == sw_mtu + hard_mtu.)
46 * What is returned from this function is:
47 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
48 * After assigning sw_mtu := max_mtu, the left side of (1) turns to
49 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
50 * because both PAGE_SIZE and S are already aligned. Any number greater
51 * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
52 * so max_mtu is the maximum MTU allowed.
53 */
54
55 return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
56 }
57
58 static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq * sq,struct mlx5e_rq * rq,struct xdp_buff * xdp)59 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
60 struct xdp_buff *xdp)
61 {
62 struct page *page = virt_to_page(xdp->data);
63 struct skb_shared_info *sinfo = NULL;
64 struct mlx5e_xmit_data xdptxd;
65 struct mlx5e_xdp_info xdpi;
66 struct xdp_frame *xdpf;
67 dma_addr_t dma_addr;
68 int i;
69
70 xdpf = xdp_convert_buff_to_frame(xdp);
71 if (unlikely(!xdpf))
72 return false;
73
74 xdptxd.data = xdpf->data;
75 xdptxd.len = xdpf->len;
76
77 if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
78 /* The xdp_buff was in the UMEM and was copied into a newly
79 * allocated page. The UMEM page was returned via the ZCA, and
80 * this new page has to be mapped at this point and has to be
81 * unmapped and returned via xdp_return_frame on completion.
82 */
83
84 /* Prevent double recycling of the UMEM page. Even in case this
85 * function returns false, the xdp_buff shouldn't be recycled,
86 * as it was already done in xdp_convert_zc_to_xdp_frame.
87 */
88 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
89
90 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
91
92 dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
93 DMA_TO_DEVICE);
94 if (dma_mapping_error(sq->pdev, dma_addr)) {
95 xdp_return_frame(xdpf);
96 return false;
97 }
98
99 xdptxd.dma_addr = dma_addr;
100 xdpi.frame.xdpf = xdpf;
101 xdpi.frame.dma_addr = dma_addr;
102
103 if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
104 mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
105 return false;
106
107 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
108 return true;
109 }
110
111 /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
112 * that points to the same memory region as the original xdp_buff. It
113 * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
114 * mode.
115 */
116
117 xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
118 xdpi.page.rq = rq;
119
120 dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
121 dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
122
123 if (unlikely(xdp_frame_has_frags(xdpf))) {
124 sinfo = xdp_get_shared_info_from_frame(xdpf);
125
126 for (i = 0; i < sinfo->nr_frags; i++) {
127 skb_frag_t *frag = &sinfo->frags[i];
128 dma_addr_t addr;
129 u32 len;
130
131 addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
132 skb_frag_off(frag);
133 len = skb_frag_size(frag);
134 dma_sync_single_for_device(sq->pdev, addr, len,
135 DMA_BIDIRECTIONAL);
136 }
137 }
138
139 xdptxd.dma_addr = dma_addr;
140
141 if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
142 mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
143 return false;
144
145 xdpi.page.page = page;
146 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
147
148 if (unlikely(xdp_frame_has_frags(xdpf))) {
149 for (i = 0; i < sinfo->nr_frags; i++) {
150 skb_frag_t *frag = &sinfo->frags[i];
151
152 xdpi.page.page = skb_frag_page(frag);
153 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
154 }
155 }
156
157 return true;
158 }
159
mlx5e_xdp_rx_timestamp(const struct xdp_md * ctx,u64 * timestamp)160 static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
161 {
162 const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
163
164 if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp)))
165 return -EOPNOTSUPP;
166
167 *timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time,
168 _ctx->rq->clock, get_cqe_ts(_ctx->cqe));
169 return 0;
170 }
171
mlx5e_xdp_rx_hash(const struct xdp_md * ctx,u32 * hash)172 static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
173 {
174 const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
175
176 if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)))
177 return -EOPNOTSUPP;
178
179 *hash = be32_to_cpu(_ctx->cqe->rss_hash_result);
180 return 0;
181 }
182
183 const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = {
184 .xmo_rx_timestamp = mlx5e_xdp_rx_timestamp,
185 .xmo_rx_hash = mlx5e_xdp_rx_hash,
186 };
187
188 /* returns true if packet was consumed by xdp */
mlx5e_xdp_handle(struct mlx5e_rq * rq,struct bpf_prog * prog,struct mlx5e_xdp_buff * mxbuf)189 bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
190 struct bpf_prog *prog, struct mlx5e_xdp_buff *mxbuf)
191 {
192 struct xdp_buff *xdp = &mxbuf->xdp;
193 u32 act;
194 int err;
195
196 act = bpf_prog_run_xdp(prog, xdp);
197 switch (act) {
198 case XDP_PASS:
199 return false;
200 case XDP_TX:
201 if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, xdp)))
202 goto xdp_abort;
203 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
204 return true;
205 case XDP_REDIRECT:
206 /* When XDP enabled then page-refcnt==1 here */
207 err = xdp_do_redirect(rq->netdev, xdp, prog);
208 if (unlikely(err))
209 goto xdp_abort;
210 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
211 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
212 if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
213 mlx5e_page_dma_unmap(rq, virt_to_page(xdp->data));
214 rq->stats->xdp_redirect++;
215 return true;
216 default:
217 bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
218 fallthrough;
219 case XDP_ABORTED:
220 xdp_abort:
221 trace_xdp_exception(rq->netdev, prog, act);
222 fallthrough;
223 case XDP_DROP:
224 rq->stats->xdp_drop++;
225 return true;
226 }
227 }
228
mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq * sq,u16 size)229 static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
230 {
231 struct mlx5_wq_cyc *wq = &sq->wq;
232 u16 pi, contig_wqebbs;
233
234 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
235 contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
236 if (unlikely(contig_wqebbs < size)) {
237 struct mlx5e_xdp_wqe_info *wi, *edge_wi;
238
239 wi = &sq->db.wqe_info[pi];
240 edge_wi = wi + contig_wqebbs;
241
242 /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
243 for (; wi < edge_wi; wi++) {
244 *wi = (struct mlx5e_xdp_wqe_info) {
245 .num_wqebbs = 1,
246 .num_pkts = 0,
247 };
248 mlx5e_post_nop(wq, sq->sqn, &sq->pc);
249 }
250 sq->stats->nops += contig_wqebbs;
251
252 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
253 }
254
255 return pi;
256 }
257
mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq * sq)258 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
259 {
260 struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
261 struct mlx5e_xdpsq_stats *stats = sq->stats;
262 struct mlx5e_tx_wqe *wqe;
263 u16 pi;
264
265 pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
266 wqe = MLX5E_TX_FETCH_WQE(sq, pi);
267 net_prefetchw(wqe->data);
268
269 *session = (struct mlx5e_tx_mpwqe) {
270 .wqe = wqe,
271 .bytes_count = 0,
272 .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
273 .pkt_count = 0,
274 .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
275 };
276
277 stats->mpwqe++;
278 }
279
mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq * sq)280 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
281 {
282 struct mlx5_wq_cyc *wq = &sq->wq;
283 struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
284 struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
285 u16 ds_count = session->ds_count;
286 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
287 struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
288
289 cseg->opmod_idx_opcode =
290 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
291 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
292
293 wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
294 wi->num_pkts = session->pkt_count;
295
296 sq->pc += wi->num_wqebbs;
297
298 sq->doorbell_cseg = cseg;
299
300 session->wqe = NULL; /* Close session */
301 }
302
303 enum {
304 MLX5E_XDP_CHECK_OK = 1,
305 MLX5E_XDP_CHECK_START_MPWQE = 2,
306 };
307
mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq * sq)308 INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
309 {
310 if (unlikely(!sq->mpwqe.wqe)) {
311 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
312 sq->stop_room))) {
313 /* SQ is full, ring doorbell */
314 mlx5e_xmit_xdp_doorbell(sq);
315 sq->stats->full++;
316 return -EBUSY;
317 }
318
319 return MLX5E_XDP_CHECK_START_MPWQE;
320 }
321
322 return MLX5E_XDP_CHECK_OK;
323 }
324
325 INDIRECT_CALLABLE_SCOPE bool
326 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
327 struct skb_shared_info *sinfo, int check_result);
328
329 INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq * sq,struct mlx5e_xmit_data * xdptxd,struct skb_shared_info * sinfo,int check_result)330 mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
331 struct skb_shared_info *sinfo, int check_result)
332 {
333 struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
334 struct mlx5e_xdpsq_stats *stats = sq->stats;
335
336 if (unlikely(sinfo)) {
337 /* MPWQE is enabled, but a multi-buffer packet is queued for
338 * transmission. MPWQE can't send fragmented packets, so close
339 * the current session and fall back to a regular WQE.
340 */
341 if (unlikely(sq->mpwqe.wqe))
342 mlx5e_xdp_mpwqe_complete(sq);
343 return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
344 }
345
346 if (unlikely(xdptxd->len > sq->hw_mtu)) {
347 stats->err++;
348 return false;
349 }
350
351 if (!check_result)
352 check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
353 if (unlikely(check_result < 0))
354 return false;
355
356 if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
357 /* Start the session when nothing can fail, so it's guaranteed
358 * that if there is an active session, it has at least one dseg,
359 * and it's safe to complete it at any time.
360 */
361 mlx5e_xdp_mpwqe_session_start(sq);
362 }
363
364 mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
365
366 if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
367 mlx5e_xdp_mpwqe_complete(sq);
368
369 stats->xmit++;
370 return true;
371 }
372
mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq * sq,int stop_room)373 static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
374 {
375 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
376 /* SQ is full, ring doorbell */
377 mlx5e_xmit_xdp_doorbell(sq);
378 sq->stats->full++;
379 return -EBUSY;
380 }
381
382 return MLX5E_XDP_CHECK_OK;
383 }
384
mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq * sq)385 INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
386 {
387 return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
388 }
389
390 INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq * sq,struct mlx5e_xmit_data * xdptxd,struct skb_shared_info * sinfo,int check_result)391 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
392 struct skb_shared_info *sinfo, int check_result)
393 {
394 struct mlx5_wq_cyc *wq = &sq->wq;
395 struct mlx5_wqe_ctrl_seg *cseg;
396 struct mlx5_wqe_data_seg *dseg;
397 struct mlx5_wqe_eth_seg *eseg;
398 struct mlx5e_tx_wqe *wqe;
399
400 dma_addr_t dma_addr = xdptxd->dma_addr;
401 u32 dma_len = xdptxd->len;
402 u16 ds_cnt, inline_hdr_sz;
403 u8 num_wqebbs = 1;
404 int num_frags = 0;
405 u16 pi;
406
407 struct mlx5e_xdpsq_stats *stats = sq->stats;
408
409 if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
410 stats->err++;
411 return false;
412 }
413
414 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
415 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
416 ds_cnt++;
417
418 /* check_result must be 0 if sinfo is passed. */
419 if (!check_result) {
420 int stop_room = 1;
421
422 if (unlikely(sinfo)) {
423 ds_cnt += sinfo->nr_frags;
424 num_frags = sinfo->nr_frags;
425 num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
426 /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
427 * enough to hold all fragments.
428 */
429 stop_room = MLX5E_STOP_ROOM(num_wqebbs);
430 }
431
432 check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
433 }
434 if (unlikely(check_result < 0))
435 return false;
436
437 pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
438 wqe = mlx5_wq_cyc_get_wqe(wq, pi);
439 net_prefetchw(wqe);
440
441 cseg = &wqe->ctrl;
442 eseg = &wqe->eth;
443 dseg = wqe->data;
444
445 inline_hdr_sz = 0;
446
447 /* copy the inline part if required */
448 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
449 memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
450 memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
451 MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
452 dma_len -= MLX5E_XDP_MIN_INLINE;
453 dma_addr += MLX5E_XDP_MIN_INLINE;
454 inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
455 dseg++;
456 }
457
458 /* write the dma part */
459 dseg->addr = cpu_to_be64(dma_addr);
460 dseg->byte_count = cpu_to_be32(dma_len);
461
462 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
463
464 if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
465 u8 num_pkts = 1 + num_frags;
466 int i;
467
468 memset(&cseg->trailer, 0, sizeof(cseg->trailer));
469 memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
470
471 eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
472 dseg->lkey = sq->mkey_be;
473
474 for (i = 0; i < num_frags; i++) {
475 skb_frag_t *frag = &sinfo->frags[i];
476 dma_addr_t addr;
477
478 addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
479 skb_frag_off(frag);
480
481 dseg++;
482 dseg->addr = cpu_to_be64(addr);
483 dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
484 dseg->lkey = sq->mkey_be;
485 }
486
487 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
488
489 sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
490 .num_wqebbs = num_wqebbs,
491 .num_pkts = num_pkts,
492 };
493
494 sq->pc += num_wqebbs;
495 } else {
496 cseg->fm_ce_se = 0;
497
498 sq->pc++;
499 }
500
501 sq->doorbell_cseg = cseg;
502
503 stats->xmit++;
504 return true;
505 }
506
mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq * sq,struct mlx5e_xdp_wqe_info * wi,u32 * xsk_frames,bool recycle,struct xdp_frame_bulk * bq)507 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
508 struct mlx5e_xdp_wqe_info *wi,
509 u32 *xsk_frames,
510 bool recycle,
511 struct xdp_frame_bulk *bq)
512 {
513 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
514 u16 i;
515
516 for (i = 0; i < wi->num_pkts; i++) {
517 struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
518
519 switch (xdpi.mode) {
520 case MLX5E_XDP_XMIT_MODE_FRAME:
521 /* XDP_TX from the XSK RQ and XDP_REDIRECT */
522 dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
523 xdpi.frame.xdpf->len, DMA_TO_DEVICE);
524 xdp_return_frame_bulk(xdpi.frame.xdpf, bq);
525 break;
526 case MLX5E_XDP_XMIT_MODE_PAGE:
527 /* XDP_TX from the regular RQ */
528 mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
529 break;
530 case MLX5E_XDP_XMIT_MODE_XSK:
531 /* AF_XDP send */
532 (*xsk_frames)++;
533 break;
534 default:
535 WARN_ON_ONCE(true);
536 }
537 }
538 }
539
mlx5e_poll_xdpsq_cq(struct mlx5e_cq * cq)540 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
541 {
542 struct xdp_frame_bulk bq;
543 struct mlx5e_xdpsq *sq;
544 struct mlx5_cqe64 *cqe;
545 u32 xsk_frames = 0;
546 u16 sqcc;
547 int i;
548
549 xdp_frame_bulk_init(&bq);
550
551 sq = container_of(cq, struct mlx5e_xdpsq, cq);
552
553 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
554 return false;
555
556 cqe = mlx5_cqwq_get_cqe(&cq->wq);
557 if (!cqe)
558 return false;
559
560 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
561 * otherwise a cq overrun may occur
562 */
563 sqcc = sq->cc;
564
565 i = 0;
566 do {
567 struct mlx5e_xdp_wqe_info *wi;
568 u16 wqe_counter, ci;
569 bool last_wqe;
570
571 mlx5_cqwq_pop(&cq->wq);
572
573 wqe_counter = be16_to_cpu(cqe->wqe_counter);
574
575 do {
576 last_wqe = (sqcc == wqe_counter);
577 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
578 wi = &sq->db.wqe_info[ci];
579
580 sqcc += wi->num_wqebbs;
581
582 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq);
583 } while (!last_wqe);
584
585 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
586 netdev_WARN_ONCE(sq->channel->netdev,
587 "Bad OP in XDPSQ CQE: 0x%x\n",
588 get_cqe_opcode(cqe));
589 mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
590 (struct mlx5_err_cqe *)cqe);
591 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
592 }
593 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
594
595 xdp_flush_frame_bulk(&bq);
596
597 if (xsk_frames)
598 xsk_tx_completed(sq->xsk_pool, xsk_frames);
599
600 sq->stats->cqes += i;
601
602 mlx5_cqwq_update_db_record(&cq->wq);
603
604 /* ensure cq space is freed before enabling more cqes */
605 wmb();
606
607 sq->cc = sqcc;
608 return (i == MLX5E_TX_CQ_POLL_BUDGET);
609 }
610
mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq * sq)611 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
612 {
613 struct xdp_frame_bulk bq;
614 u32 xsk_frames = 0;
615
616 xdp_frame_bulk_init(&bq);
617
618 rcu_read_lock(); /* need for xdp_return_frame_bulk */
619
620 while (sq->cc != sq->pc) {
621 struct mlx5e_xdp_wqe_info *wi;
622 u16 ci;
623
624 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
625 wi = &sq->db.wqe_info[ci];
626
627 sq->cc += wi->num_wqebbs;
628
629 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq);
630 }
631
632 xdp_flush_frame_bulk(&bq);
633 rcu_read_unlock();
634
635 if (xsk_frames)
636 xsk_tx_completed(sq->xsk_pool, xsk_frames);
637 }
638
mlx5e_xdp_xmit(struct net_device * dev,int n,struct xdp_frame ** frames,u32 flags)639 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
640 u32 flags)
641 {
642 struct mlx5e_priv *priv = netdev_priv(dev);
643 struct mlx5e_xdpsq *sq;
644 int nxmit = 0;
645 int sq_num;
646 int i;
647
648 /* this flag is sufficient, no need to test internal sq state */
649 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
650 return -ENETDOWN;
651
652 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
653 return -EINVAL;
654
655 sq_num = smp_processor_id();
656
657 if (unlikely(sq_num >= priv->channels.num))
658 return -ENXIO;
659
660 sq = &priv->channels.c[sq_num]->xdpsq;
661
662 for (i = 0; i < n; i++) {
663 struct xdp_frame *xdpf = frames[i];
664 struct mlx5e_xmit_data xdptxd;
665 struct mlx5e_xdp_info xdpi;
666 bool ret;
667
668 xdptxd.data = xdpf->data;
669 xdptxd.len = xdpf->len;
670 xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
671 xdptxd.len, DMA_TO_DEVICE);
672
673 if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr)))
674 break;
675
676 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
677 xdpi.frame.xdpf = xdpf;
678 xdpi.frame.dma_addr = xdptxd.dma_addr;
679
680 ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
681 mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
682 if (unlikely(!ret)) {
683 dma_unmap_single(sq->pdev, xdptxd.dma_addr,
684 xdptxd.len, DMA_TO_DEVICE);
685 break;
686 }
687 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
688 nxmit++;
689 }
690
691 if (flags & XDP_XMIT_FLUSH) {
692 if (sq->mpwqe.wqe)
693 mlx5e_xdp_mpwqe_complete(sq);
694 mlx5e_xmit_xdp_doorbell(sq);
695 }
696
697 return nxmit;
698 }
699
mlx5e_xdp_rx_poll_complete(struct mlx5e_rq * rq)700 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
701 {
702 struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
703
704 if (xdpsq->mpwqe.wqe)
705 mlx5e_xdp_mpwqe_complete(xdpsq);
706
707 mlx5e_xmit_xdp_doorbell(xdpsq);
708
709 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) {
710 xdp_do_flush_map();
711 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
712 }
713 }
714
mlx5e_set_xmit_fp(struct mlx5e_xdpsq * sq,bool is_mpw)715 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
716 {
717 sq->xmit_xdp_frame_check = is_mpw ?
718 mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
719 sq->xmit_xdp_frame = is_mpw ?
720 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
721 }
722