1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include "rx.h"
5 #include "en/xdp.h"
6 #include <net/xdp_sock_drv.h>
7 #include <linux/filter.h>
8
9 /* RX data path */
10
xsk_buff_to_mxbuf(struct xdp_buff * xdp)11 static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp)
12 {
13 /* mlx5e_xdp_buff shares its layout with xdp_buff_xsk
14 * and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb
15 */
16 return (struct mlx5e_xdp_buff *)xdp;
17 }
18
mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq * rq,u16 ix)19 int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
20 {
21 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
22 struct mlx5e_icosq *icosq = rq->icosq;
23 struct mlx5_wq_cyc *wq = &icosq->wq;
24 struct mlx5e_umr_wqe *umr_wqe;
25 int batch, i;
26 u32 offset; /* 17-bit value with MTT. */
27 u16 pi;
28
29 if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
30 goto err;
31
32 BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
33 XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff);
34 batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
35 rq->mpwqe.pages_per_wqe);
36
37 /* If batch < pages_per_wqe, either:
38 * 1. Some (or all) descriptors were invalid.
39 * 2. dma_need_sync is true, and it fell back to allocating one frame.
40 * In either case, try to continue allocating frames one by one, until
41 * the first error, which will mean there are no more valid descriptors.
42 */
43 for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
44 wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
45 if (unlikely(!wi->alloc_units[batch].xsk))
46 goto err_reuse_batch;
47 }
48
49 pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
50 umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
51 memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
52
53 if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
54 for (i = 0; i < batch; i++) {
55 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
56 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
57
58 umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
59 .ptag = cpu_to_be64(addr | MLX5_EN_WR),
60 };
61 mxbuf->rq = rq;
62 }
63 } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
64 for (i = 0; i < batch; i++) {
65 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
66 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
67
68 umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
69 .key = rq->mkey_be,
70 .va = cpu_to_be64(addr),
71 };
72 mxbuf->rq = rq;
73 }
74 } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
75 u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
76
77 for (i = 0; i < batch; i++) {
78 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
79 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
80
81 umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
82 .key = rq->mkey_be,
83 .va = cpu_to_be64(addr),
84 };
85 umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
86 .key = rq->mkey_be,
87 .va = cpu_to_be64(addr + mapping_size),
88 };
89 umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
90 .key = rq->mkey_be,
91 .va = cpu_to_be64(addr + mapping_size * 2),
92 };
93 umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
94 .key = rq->mkey_be,
95 .va = cpu_to_be64(rq->wqe_overflow.addr),
96 };
97 mxbuf->rq = rq;
98 }
99 } else {
100 __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
101 rq->xsk_pool->chunk_size);
102 __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
103
104 for (i = 0; i < batch; i++) {
105 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
106 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
107
108 umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
109 .key = rq->mkey_be,
110 .va = cpu_to_be64(addr),
111 .bcount = frame_size,
112 };
113 umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
114 .key = rq->mkey_be,
115 .va = cpu_to_be64(rq->wqe_overflow.addr),
116 .bcount = pad_size,
117 };
118 mxbuf->rq = rq;
119 }
120 }
121
122 bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
123 wi->consumed_strides = 0;
124
125 umr_wqe->ctrl.opmod_idx_opcode =
126 cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
127
128 /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
129 offset = ix * rq->mpwqe.mtts_per_wqe;
130 if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
131 offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
132 else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
133 offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
134 else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
135 offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
136 umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
137
138 icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
139 .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
140 .num_wqebbs = rq->mpwqe.umr_wqebbs,
141 .umr.rq = rq,
142 };
143
144 icosq->pc += rq->mpwqe.umr_wqebbs;
145
146 icosq->doorbell_cseg = &umr_wqe->ctrl;
147
148 return 0;
149
150 err_reuse_batch:
151 while (--batch >= 0)
152 xsk_buff_free(wi->alloc_units[batch].xsk);
153
154 err:
155 rq->stats->buff_alloc_err++;
156 return -ENOMEM;
157 }
158
mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq * rq,u16 ix,int wqe_bulk)159 int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
160 {
161 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
162 struct xdp_buff **buffs;
163 u32 contig, alloc;
164 int i;
165
166 /* mlx5e_init_frags_partition creates a 1:1 mapping between
167 * rq->wqe.frags and rq->wqe.alloc_units, which allows us to
168 * allocate XDP buffers straight into alloc_units.
169 */
170 BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
171 sizeof(rq->wqe.alloc_units[0].xsk));
172 buffs = (struct xdp_buff **)rq->wqe.alloc_units;
173 contig = mlx5_wq_cyc_get_size(wq) - ix;
174 if (wqe_bulk <= contig) {
175 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
176 } else {
177 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
178 if (likely(alloc == contig))
179 alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
180 }
181
182 for (i = 0; i < alloc; i++) {
183 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
184 struct mlx5e_wqe_frag_info *frag;
185 struct mlx5e_rx_wqe_cyc *wqe;
186 dma_addr_t addr;
187
188 wqe = mlx5_wq_cyc_get_wqe(wq, j);
189 /* Assumes log_num_frags == 0. */
190 frag = &rq->wqe.frags[j];
191
192 addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
193 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
194 }
195
196 return alloc;
197 }
198
mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq * rq,u16 ix,int wqe_bulk)199 int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
200 {
201 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
202 int i;
203
204 for (i = 0; i < wqe_bulk; i++) {
205 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
206 struct mlx5e_wqe_frag_info *frag;
207 struct mlx5e_rx_wqe_cyc *wqe;
208 dma_addr_t addr;
209
210 wqe = mlx5_wq_cyc_get_wqe(wq, j);
211 /* Assumes log_num_frags == 0. */
212 frag = &rq->wqe.frags[j];
213
214 frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
215 if (unlikely(!frag->au->xsk))
216 return i;
217
218 addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
219 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
220 }
221
222 return wqe_bulk;
223 }
224
mlx5e_xsk_construct_skb(struct mlx5e_rq * rq,struct xdp_buff * xdp)225 static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
226 {
227 u32 totallen = xdp->data_end - xdp->data_meta;
228 u32 metalen = xdp->data - xdp->data_meta;
229 struct sk_buff *skb;
230
231 skb = napi_alloc_skb(rq->cq.napi, totallen);
232 if (unlikely(!skb)) {
233 rq->stats->buff_alloc_err++;
234 return NULL;
235 }
236
237 skb_put_data(skb, xdp->data_meta, totallen);
238
239 if (metalen) {
240 skb_metadata_set(skb, metalen);
241 __skb_pull(skb, metalen);
242 }
243
244 return skb;
245 }
246
mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq * rq,struct mlx5e_mpw_info * wi,struct mlx5_cqe64 * cqe,u16 cqe_bcnt,u32 head_offset,u32 page_idx)247 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
248 struct mlx5e_mpw_info *wi,
249 struct mlx5_cqe64 *cqe,
250 u16 cqe_bcnt,
251 u32 head_offset,
252 u32 page_idx)
253 {
254 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[page_idx].xsk);
255 struct bpf_prog *prog;
256
257 /* Check packet size. Note LRO doesn't use linear SKB */
258 if (unlikely(cqe_bcnt > rq->hw_mtu)) {
259 rq->stats->oversize_pkts_sw_drop++;
260 return NULL;
261 }
262
263 /* head_offset is not used in this function, because xdp->data and the
264 * DMA address point directly to the necessary place. Furthermore, in
265 * the current implementation, UMR pages are mapped to XSK frames, so
266 * head_offset should always be 0.
267 */
268 WARN_ON_ONCE(head_offset);
269
270 /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
271 mxbuf->cqe = cqe;
272 xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
273 xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
274 net_prefetch(mxbuf->xdp.data);
275
276 /* Possible flows:
277 * - XDP_REDIRECT to XSKMAP:
278 * The page is owned by the userspace from now.
279 * - XDP_TX and other XDP_REDIRECTs:
280 * The page was returned by ZCA and recycled.
281 * - XDP_DROP:
282 * Recycle the page.
283 * - XDP_PASS:
284 * Allocate an SKB, copy the data and recycle the page.
285 *
286 * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
287 * size is the same as the Driver RX Ring's size, and pages for WQEs are
288 * allocated first from the Reuse Ring, so it has enough space.
289 */
290
291 prog = rcu_dereference(rq->xdp_prog);
292 if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
293 if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
294 __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
295 return NULL; /* page/packet was consumed by XDP */
296 }
297
298 /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
299 * frame. On SKB allocation failure, NULL is returned.
300 */
301 return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
302 }
303
mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq * rq,struct mlx5e_wqe_frag_info * wi,struct mlx5_cqe64 * cqe,u32 cqe_bcnt)304 struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
305 struct mlx5e_wqe_frag_info *wi,
306 struct mlx5_cqe64 *cqe,
307 u32 cqe_bcnt)
308 {
309 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->au->xsk);
310 struct bpf_prog *prog;
311
312 /* wi->offset is not used in this function, because xdp->data and the
313 * DMA address point directly to the necessary place. Furthermore, the
314 * XSK allocator allocates frames per packet, instead of pages, so
315 * wi->offset should always be 0.
316 */
317 WARN_ON_ONCE(wi->offset);
318
319 /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
320 mxbuf->cqe = cqe;
321 xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
322 xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
323 net_prefetch(mxbuf->xdp.data);
324
325 prog = rcu_dereference(rq->xdp_prog);
326 if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf)))
327 return NULL; /* page/packet was consumed by XDP */
328
329 /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
330 * will be handled by mlx5e_free_rx_wqe.
331 * On SKB allocation failure, NULL is returned.
332 */
333 return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
334 }
335