1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include "rx.h"
5 #include "en/xdp.h"
6 #include <net/xdp_sock_drv.h>
7 #include <linux/filter.h>
8 
9 /* RX data path */
10 
xsk_buff_to_mxbuf(struct xdp_buff * xdp)11 static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp)
12 {
13 	/* mlx5e_xdp_buff shares its layout with xdp_buff_xsk
14 	 * and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb
15 	 */
16 	return (struct mlx5e_xdp_buff *)xdp;
17 }
18 
mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq * rq,u16 ix)19 int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
20 {
21 	struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
22 	struct mlx5e_icosq *icosq = rq->icosq;
23 	struct mlx5_wq_cyc *wq = &icosq->wq;
24 	struct mlx5e_umr_wqe *umr_wqe;
25 	int batch, i;
26 	u32 offset; /* 17-bit value with MTT. */
27 	u16 pi;
28 
29 	if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
30 		goto err;
31 
32 	BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
33 	XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff);
34 	batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
35 				     rq->mpwqe.pages_per_wqe);
36 
37 	/* If batch < pages_per_wqe, either:
38 	 * 1. Some (or all) descriptors were invalid.
39 	 * 2. dma_need_sync is true, and it fell back to allocating one frame.
40 	 * In either case, try to continue allocating frames one by one, until
41 	 * the first error, which will mean there are no more valid descriptors.
42 	 */
43 	for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
44 		wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
45 		if (unlikely(!wi->alloc_units[batch].xsk))
46 			goto err_reuse_batch;
47 	}
48 
49 	pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
50 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
51 	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
52 
53 	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
54 		for (i = 0; i < batch; i++) {
55 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
56 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
57 
58 			umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
59 				.ptag = cpu_to_be64(addr | MLX5_EN_WR),
60 			};
61 			mxbuf->rq = rq;
62 		}
63 	} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
64 		for (i = 0; i < batch; i++) {
65 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
66 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
67 
68 			umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
69 				.key = rq->mkey_be,
70 				.va = cpu_to_be64(addr),
71 			};
72 			mxbuf->rq = rq;
73 		}
74 	} else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
75 		u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
76 
77 		for (i = 0; i < batch; i++) {
78 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
79 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
80 
81 			umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
82 				.key = rq->mkey_be,
83 				.va = cpu_to_be64(addr),
84 			};
85 			umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
86 				.key = rq->mkey_be,
87 				.va = cpu_to_be64(addr + mapping_size),
88 			};
89 			umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
90 				.key = rq->mkey_be,
91 				.va = cpu_to_be64(addr + mapping_size * 2),
92 			};
93 			umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
94 				.key = rq->mkey_be,
95 				.va = cpu_to_be64(rq->wqe_overflow.addr),
96 			};
97 			mxbuf->rq = rq;
98 		}
99 	} else {
100 		__be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
101 					      rq->xsk_pool->chunk_size);
102 		__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
103 
104 		for (i = 0; i < batch; i++) {
105 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
106 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
107 
108 			umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
109 				.key = rq->mkey_be,
110 				.va = cpu_to_be64(addr),
111 				.bcount = frame_size,
112 			};
113 			umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
114 				.key = rq->mkey_be,
115 				.va = cpu_to_be64(rq->wqe_overflow.addr),
116 				.bcount = pad_size,
117 			};
118 			mxbuf->rq = rq;
119 		}
120 	}
121 
122 	bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
123 	wi->consumed_strides = 0;
124 
125 	umr_wqe->ctrl.opmod_idx_opcode =
126 		cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
127 
128 	/* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
129 	offset = ix * rq->mpwqe.mtts_per_wqe;
130 	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
131 		offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
132 	else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
133 		offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
134 	else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
135 		offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
136 	umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
137 
138 	icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
139 		.wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
140 		.num_wqebbs = rq->mpwqe.umr_wqebbs,
141 		.umr.rq = rq,
142 	};
143 
144 	icosq->pc += rq->mpwqe.umr_wqebbs;
145 
146 	icosq->doorbell_cseg = &umr_wqe->ctrl;
147 
148 	return 0;
149 
150 err_reuse_batch:
151 	while (--batch >= 0)
152 		xsk_buff_free(wi->alloc_units[batch].xsk);
153 
154 err:
155 	rq->stats->buff_alloc_err++;
156 	return -ENOMEM;
157 }
158 
mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq * rq,u16 ix,int wqe_bulk)159 int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
160 {
161 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
162 	struct xdp_buff **buffs;
163 	u32 contig, alloc;
164 	int i;
165 
166 	/* mlx5e_init_frags_partition creates a 1:1 mapping between
167 	 * rq->wqe.frags and rq->wqe.alloc_units, which allows us to
168 	 * allocate XDP buffers straight into alloc_units.
169 	 */
170 	BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
171 		     sizeof(rq->wqe.alloc_units[0].xsk));
172 	buffs = (struct xdp_buff **)rq->wqe.alloc_units;
173 	contig = mlx5_wq_cyc_get_size(wq) - ix;
174 	if (wqe_bulk <= contig) {
175 		alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
176 	} else {
177 		alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
178 		if (likely(alloc == contig))
179 			alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
180 	}
181 
182 	for (i = 0; i < alloc; i++) {
183 		int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
184 		struct mlx5e_wqe_frag_info *frag;
185 		struct mlx5e_rx_wqe_cyc *wqe;
186 		dma_addr_t addr;
187 
188 		wqe = mlx5_wq_cyc_get_wqe(wq, j);
189 		/* Assumes log_num_frags == 0. */
190 		frag = &rq->wqe.frags[j];
191 
192 		addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
193 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
194 	}
195 
196 	return alloc;
197 }
198 
mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq * rq,u16 ix,int wqe_bulk)199 int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
200 {
201 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
202 	int i;
203 
204 	for (i = 0; i < wqe_bulk; i++) {
205 		int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
206 		struct mlx5e_wqe_frag_info *frag;
207 		struct mlx5e_rx_wqe_cyc *wqe;
208 		dma_addr_t addr;
209 
210 		wqe = mlx5_wq_cyc_get_wqe(wq, j);
211 		/* Assumes log_num_frags == 0. */
212 		frag = &rq->wqe.frags[j];
213 
214 		frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
215 		if (unlikely(!frag->au->xsk))
216 			return i;
217 
218 		addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
219 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
220 	}
221 
222 	return wqe_bulk;
223 }
224 
mlx5e_xsk_construct_skb(struct mlx5e_rq * rq,struct xdp_buff * xdp)225 static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
226 {
227 	u32 totallen = xdp->data_end - xdp->data_meta;
228 	u32 metalen = xdp->data - xdp->data_meta;
229 	struct sk_buff *skb;
230 
231 	skb = napi_alloc_skb(rq->cq.napi, totallen);
232 	if (unlikely(!skb)) {
233 		rq->stats->buff_alloc_err++;
234 		return NULL;
235 	}
236 
237 	skb_put_data(skb, xdp->data_meta, totallen);
238 
239 	if (metalen) {
240 		skb_metadata_set(skb, metalen);
241 		__skb_pull(skb, metalen);
242 	}
243 
244 	return skb;
245 }
246 
mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq * rq,struct mlx5e_mpw_info * wi,struct mlx5_cqe64 * cqe,u16 cqe_bcnt,u32 head_offset,u32 page_idx)247 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
248 						    struct mlx5e_mpw_info *wi,
249 						    struct mlx5_cqe64 *cqe,
250 						    u16 cqe_bcnt,
251 						    u32 head_offset,
252 						    u32 page_idx)
253 {
254 	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[page_idx].xsk);
255 	struct bpf_prog *prog;
256 
257 	/* Check packet size. Note LRO doesn't use linear SKB */
258 	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
259 		rq->stats->oversize_pkts_sw_drop++;
260 		return NULL;
261 	}
262 
263 	/* head_offset is not used in this function, because xdp->data and the
264 	 * DMA address point directly to the necessary place. Furthermore, in
265 	 * the current implementation, UMR pages are mapped to XSK frames, so
266 	 * head_offset should always be 0.
267 	 */
268 	WARN_ON_ONCE(head_offset);
269 
270 	/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
271 	mxbuf->cqe = cqe;
272 	xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
273 	xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
274 	net_prefetch(mxbuf->xdp.data);
275 
276 	/* Possible flows:
277 	 * - XDP_REDIRECT to XSKMAP:
278 	 *   The page is owned by the userspace from now.
279 	 * - XDP_TX and other XDP_REDIRECTs:
280 	 *   The page was returned by ZCA and recycled.
281 	 * - XDP_DROP:
282 	 *   Recycle the page.
283 	 * - XDP_PASS:
284 	 *   Allocate an SKB, copy the data and recycle the page.
285 	 *
286 	 * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
287 	 * size is the same as the Driver RX Ring's size, and pages for WQEs are
288 	 * allocated first from the Reuse Ring, so it has enough space.
289 	 */
290 
291 	prog = rcu_dereference(rq->xdp_prog);
292 	if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
293 		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
294 			__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
295 		return NULL; /* page/packet was consumed by XDP */
296 	}
297 
298 	/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
299 	 * frame. On SKB allocation failure, NULL is returned.
300 	 */
301 	return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
302 }
303 
mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq * rq,struct mlx5e_wqe_frag_info * wi,struct mlx5_cqe64 * cqe,u32 cqe_bcnt)304 struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
305 					      struct mlx5e_wqe_frag_info *wi,
306 					      struct mlx5_cqe64 *cqe,
307 					      u32 cqe_bcnt)
308 {
309 	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->au->xsk);
310 	struct bpf_prog *prog;
311 
312 	/* wi->offset is not used in this function, because xdp->data and the
313 	 * DMA address point directly to the necessary place. Furthermore, the
314 	 * XSK allocator allocates frames per packet, instead of pages, so
315 	 * wi->offset should always be 0.
316 	 */
317 	WARN_ON_ONCE(wi->offset);
318 
319 	/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
320 	mxbuf->cqe = cqe;
321 	xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
322 	xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
323 	net_prefetch(mxbuf->xdp.data);
324 
325 	prog = rcu_dereference(rq->xdp_prog);
326 	if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf)))
327 		return NULL; /* page/packet was consumed by XDP */
328 
329 	/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
330 	 * will be handled by mlx5e_free_rx_wqe.
331 	 * On SKB allocation failure, NULL is returned.
332 	 */
333 	return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
334 }
335