1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4 * AF_XDP user-space access library.
5 *
6 * Copyright(c) 2018 - 2019 Intel Corporation.
7 *
8 * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
9 */
10
11 #include <errno.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <arpa/inet.h>
16 #include <asm/barrier.h>
17 #include <linux/compiler.h>
18 #include <linux/ethtool.h>
19 #include <linux/filter.h>
20 #include <linux/if_ether.h>
21 #include <linux/if_packet.h>
22 #include <linux/if_xdp.h>
23 #include <linux/kernel.h>
24 #include <linux/list.h>
25 #include <linux/sockios.h>
26 #include <net/if.h>
27 #include <sys/ioctl.h>
28 #include <sys/mman.h>
29 #include <sys/socket.h>
30 #include <sys/types.h>
31 #include <linux/if_link.h>
32
33 #include <bpf/bpf.h>
34 #include <bpf/libbpf.h>
35 #include "xsk.h"
36 #include "bpf_util.h"
37
38 #ifndef SOL_XDP
39 #define SOL_XDP 283
40 #endif
41
42 #ifndef AF_XDP
43 #define AF_XDP 44
44 #endif
45
46 #ifndef PF_XDP
47 #define PF_XDP AF_XDP
48 #endif
49
50 #define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
51
52 #define XSKMAP_SIZE 1
53
54 struct xsk_umem {
55 struct xsk_ring_prod *fill_save;
56 struct xsk_ring_cons *comp_save;
57 char *umem_area;
58 struct xsk_umem_config config;
59 int fd;
60 int refcount;
61 struct list_head ctx_list;
62 bool rx_ring_setup_done;
63 bool tx_ring_setup_done;
64 };
65
66 struct xsk_ctx {
67 struct xsk_ring_prod *fill;
68 struct xsk_ring_cons *comp;
69 __u32 queue_id;
70 struct xsk_umem *umem;
71 int refcount;
72 int ifindex;
73 struct list_head list;
74 };
75
76 struct xsk_socket {
77 struct xsk_ring_cons *rx;
78 struct xsk_ring_prod *tx;
79 struct xsk_ctx *ctx;
80 struct xsk_socket_config config;
81 int fd;
82 };
83
xsk_umem__fd(const struct xsk_umem * umem)84 int xsk_umem__fd(const struct xsk_umem *umem)
85 {
86 return umem ? umem->fd : -EINVAL;
87 }
88
xsk_socket__fd(const struct xsk_socket * xsk)89 int xsk_socket__fd(const struct xsk_socket *xsk)
90 {
91 return xsk ? xsk->fd : -EINVAL;
92 }
93
xsk_page_aligned(void * buffer)94 static bool xsk_page_aligned(void *buffer)
95 {
96 unsigned long addr = (unsigned long)buffer;
97
98 return !(addr & (getpagesize() - 1));
99 }
100
xsk_set_umem_config(struct xsk_umem_config * cfg,const struct xsk_umem_config * usr_cfg)101 static void xsk_set_umem_config(struct xsk_umem_config *cfg,
102 const struct xsk_umem_config *usr_cfg)
103 {
104 if (!usr_cfg) {
105 cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
106 cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
107 cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
108 cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
109 cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
110 return;
111 }
112
113 cfg->fill_size = usr_cfg->fill_size;
114 cfg->comp_size = usr_cfg->comp_size;
115 cfg->frame_size = usr_cfg->frame_size;
116 cfg->frame_headroom = usr_cfg->frame_headroom;
117 cfg->flags = usr_cfg->flags;
118 }
119
xsk_set_xdp_socket_config(struct xsk_socket_config * cfg,const struct xsk_socket_config * usr_cfg)120 static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
121 const struct xsk_socket_config *usr_cfg)
122 {
123 if (!usr_cfg) {
124 cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
125 cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
126 cfg->bind_flags = 0;
127 return 0;
128 }
129
130 cfg->rx_size = usr_cfg->rx_size;
131 cfg->tx_size = usr_cfg->tx_size;
132 cfg->bind_flags = usr_cfg->bind_flags;
133
134 return 0;
135 }
136
xsk_get_mmap_offsets(int fd,struct xdp_mmap_offsets * off)137 static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
138 {
139 socklen_t optlen;
140 int err;
141
142 optlen = sizeof(*off);
143 err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
144 if (err)
145 return err;
146
147 if (optlen == sizeof(*off))
148 return 0;
149
150 return -EINVAL;
151 }
152
xsk_create_umem_rings(struct xsk_umem * umem,int fd,struct xsk_ring_prod * fill,struct xsk_ring_cons * comp)153 static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
154 struct xsk_ring_prod *fill,
155 struct xsk_ring_cons *comp)
156 {
157 struct xdp_mmap_offsets off;
158 void *map;
159 int err;
160
161 err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
162 &umem->config.fill_size,
163 sizeof(umem->config.fill_size));
164 if (err)
165 return -errno;
166
167 err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
168 &umem->config.comp_size,
169 sizeof(umem->config.comp_size));
170 if (err)
171 return -errno;
172
173 err = xsk_get_mmap_offsets(fd, &off);
174 if (err)
175 return -errno;
176
177 map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
178 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
179 XDP_UMEM_PGOFF_FILL_RING);
180 if (map == MAP_FAILED)
181 return -errno;
182
183 fill->mask = umem->config.fill_size - 1;
184 fill->size = umem->config.fill_size;
185 fill->producer = map + off.fr.producer;
186 fill->consumer = map + off.fr.consumer;
187 fill->flags = map + off.fr.flags;
188 fill->ring = map + off.fr.desc;
189 fill->cached_cons = umem->config.fill_size;
190
191 map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
192 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
193 XDP_UMEM_PGOFF_COMPLETION_RING);
194 if (map == MAP_FAILED) {
195 err = -errno;
196 goto out_mmap;
197 }
198
199 comp->mask = umem->config.comp_size - 1;
200 comp->size = umem->config.comp_size;
201 comp->producer = map + off.cr.producer;
202 comp->consumer = map + off.cr.consumer;
203 comp->flags = map + off.cr.flags;
204 comp->ring = map + off.cr.desc;
205
206 return 0;
207
208 out_mmap:
209 munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
210 return err;
211 }
212
xsk_umem__create(struct xsk_umem ** umem_ptr,void * umem_area,__u64 size,struct xsk_ring_prod * fill,struct xsk_ring_cons * comp,const struct xsk_umem_config * usr_config)213 int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
214 __u64 size, struct xsk_ring_prod *fill,
215 struct xsk_ring_cons *comp,
216 const struct xsk_umem_config *usr_config)
217 {
218 struct xdp_umem_reg mr;
219 struct xsk_umem *umem;
220 int err;
221
222 if (!umem_area || !umem_ptr || !fill || !comp)
223 return -EFAULT;
224 if (!size && !xsk_page_aligned(umem_area))
225 return -EINVAL;
226
227 umem = calloc(1, sizeof(*umem));
228 if (!umem)
229 return -ENOMEM;
230
231 umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
232 if (umem->fd < 0) {
233 err = -errno;
234 goto out_umem_alloc;
235 }
236
237 umem->umem_area = umem_area;
238 INIT_LIST_HEAD(&umem->ctx_list);
239 xsk_set_umem_config(&umem->config, usr_config);
240
241 memset(&mr, 0, sizeof(mr));
242 mr.addr = (uintptr_t)umem_area;
243 mr.len = size;
244 mr.chunk_size = umem->config.frame_size;
245 mr.headroom = umem->config.frame_headroom;
246 mr.flags = umem->config.flags;
247
248 err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
249 if (err) {
250 err = -errno;
251 goto out_socket;
252 }
253
254 err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
255 if (err)
256 goto out_socket;
257
258 umem->fill_save = fill;
259 umem->comp_save = comp;
260 *umem_ptr = umem;
261 return 0;
262
263 out_socket:
264 close(umem->fd);
265 out_umem_alloc:
266 free(umem);
267 return err;
268 }
269
xsk_is_in_mode(u32 ifindex,int mode)270 bool xsk_is_in_mode(u32 ifindex, int mode)
271 {
272 LIBBPF_OPTS(bpf_xdp_query_opts, opts);
273 int ret;
274
275 ret = bpf_xdp_query(ifindex, mode, &opts);
276 if (ret) {
277 printf("XDP mode query returned error %s\n", strerror(errno));
278 return false;
279 }
280
281 if (mode == XDP_FLAGS_DRV_MODE)
282 return opts.attach_mode == XDP_ATTACHED_DRV;
283 else if (mode == XDP_FLAGS_SKB_MODE)
284 return opts.attach_mode == XDP_ATTACHED_SKB;
285
286 return false;
287 }
288
xsk_attach_xdp_program(struct bpf_program * prog,int ifindex,u32 xdp_flags)289 int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
290 {
291 int prog_fd;
292
293 prog_fd = bpf_program__fd(prog);
294 return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
295 }
296
xsk_detach_xdp_program(int ifindex,u32 xdp_flags)297 void xsk_detach_xdp_program(int ifindex, u32 xdp_flags)
298 {
299 bpf_xdp_detach(ifindex, xdp_flags, NULL);
300 }
301
xsk_clear_xskmap(struct bpf_map * map)302 void xsk_clear_xskmap(struct bpf_map *map)
303 {
304 u32 index = 0;
305 int map_fd;
306
307 map_fd = bpf_map__fd(map);
308 bpf_map_delete_elem(map_fd, &index);
309 }
310
xsk_update_xskmap(struct bpf_map * map,struct xsk_socket * xsk)311 int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk)
312 {
313 int map_fd, sock_fd;
314 u32 index = 0;
315
316 map_fd = bpf_map__fd(map);
317 sock_fd = xsk_socket__fd(xsk);
318
319 return bpf_map_update_elem(map_fd, &index, &sock_fd, 0);
320 }
321
xsk_get_ctx(struct xsk_umem * umem,int ifindex,__u32 queue_id)322 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
323 __u32 queue_id)
324 {
325 struct xsk_ctx *ctx;
326
327 if (list_empty(&umem->ctx_list))
328 return NULL;
329
330 list_for_each_entry(ctx, &umem->ctx_list, list) {
331 if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
332 ctx->refcount++;
333 return ctx;
334 }
335 }
336
337 return NULL;
338 }
339
xsk_put_ctx(struct xsk_ctx * ctx,bool unmap)340 static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
341 {
342 struct xsk_umem *umem = ctx->umem;
343 struct xdp_mmap_offsets off;
344 int err;
345
346 if (--ctx->refcount)
347 return;
348
349 if (!unmap)
350 goto out_free;
351
352 err = xsk_get_mmap_offsets(umem->fd, &off);
353 if (err)
354 goto out_free;
355
356 munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
357 sizeof(__u64));
358 munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
359 sizeof(__u64));
360
361 out_free:
362 list_del(&ctx->list);
363 free(ctx);
364 }
365
xsk_create_ctx(struct xsk_socket * xsk,struct xsk_umem * umem,int ifindex,__u32 queue_id,struct xsk_ring_prod * fill,struct xsk_ring_cons * comp)366 static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
367 struct xsk_umem *umem, int ifindex,
368 __u32 queue_id,
369 struct xsk_ring_prod *fill,
370 struct xsk_ring_cons *comp)
371 {
372 struct xsk_ctx *ctx;
373 int err;
374
375 ctx = calloc(1, sizeof(*ctx));
376 if (!ctx)
377 return NULL;
378
379 if (!umem->fill_save) {
380 err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
381 if (err) {
382 free(ctx);
383 return NULL;
384 }
385 } else if (umem->fill_save != fill || umem->comp_save != comp) {
386 /* Copy over rings to new structs. */
387 memcpy(fill, umem->fill_save, sizeof(*fill));
388 memcpy(comp, umem->comp_save, sizeof(*comp));
389 }
390
391 ctx->ifindex = ifindex;
392 ctx->refcount = 1;
393 ctx->umem = umem;
394 ctx->queue_id = queue_id;
395
396 ctx->fill = fill;
397 ctx->comp = comp;
398 list_add(&ctx->list, &umem->ctx_list);
399 return ctx;
400 }
401
xsk_socket__create_shared(struct xsk_socket ** xsk_ptr,int ifindex,__u32 queue_id,struct xsk_umem * umem,struct xsk_ring_cons * rx,struct xsk_ring_prod * tx,struct xsk_ring_prod * fill,struct xsk_ring_cons * comp,const struct xsk_socket_config * usr_config)402 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
403 int ifindex,
404 __u32 queue_id, struct xsk_umem *umem,
405 struct xsk_ring_cons *rx,
406 struct xsk_ring_prod *tx,
407 struct xsk_ring_prod *fill,
408 struct xsk_ring_cons *comp,
409 const struct xsk_socket_config *usr_config)
410 {
411 bool unmap, rx_setup_done = false, tx_setup_done = false;
412 void *rx_map = NULL, *tx_map = NULL;
413 struct sockaddr_xdp sxdp = {};
414 struct xdp_mmap_offsets off;
415 struct xsk_socket *xsk;
416 struct xsk_ctx *ctx;
417 int err;
418
419 if (!umem || !xsk_ptr || !(rx || tx))
420 return -EFAULT;
421
422 unmap = umem->fill_save != fill;
423
424 xsk = calloc(1, sizeof(*xsk));
425 if (!xsk)
426 return -ENOMEM;
427
428 err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
429 if (err)
430 goto out_xsk_alloc;
431
432 if (umem->refcount++ > 0) {
433 xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
434 if (xsk->fd < 0) {
435 err = -errno;
436 goto out_xsk_alloc;
437 }
438 } else {
439 xsk->fd = umem->fd;
440 rx_setup_done = umem->rx_ring_setup_done;
441 tx_setup_done = umem->tx_ring_setup_done;
442 }
443
444 ctx = xsk_get_ctx(umem, ifindex, queue_id);
445 if (!ctx) {
446 if (!fill || !comp) {
447 err = -EFAULT;
448 goto out_socket;
449 }
450
451 ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp);
452 if (!ctx) {
453 err = -ENOMEM;
454 goto out_socket;
455 }
456 }
457 xsk->ctx = ctx;
458
459 if (rx && !rx_setup_done) {
460 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
461 &xsk->config.rx_size,
462 sizeof(xsk->config.rx_size));
463 if (err) {
464 err = -errno;
465 goto out_put_ctx;
466 }
467 if (xsk->fd == umem->fd)
468 umem->rx_ring_setup_done = true;
469 }
470 if (tx && !tx_setup_done) {
471 err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
472 &xsk->config.tx_size,
473 sizeof(xsk->config.tx_size));
474 if (err) {
475 err = -errno;
476 goto out_put_ctx;
477 }
478 if (xsk->fd == umem->fd)
479 umem->tx_ring_setup_done = true;
480 }
481
482 err = xsk_get_mmap_offsets(xsk->fd, &off);
483 if (err) {
484 err = -errno;
485 goto out_put_ctx;
486 }
487
488 if (rx) {
489 rx_map = mmap(NULL, off.rx.desc +
490 xsk->config.rx_size * sizeof(struct xdp_desc),
491 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
492 xsk->fd, XDP_PGOFF_RX_RING);
493 if (rx_map == MAP_FAILED) {
494 err = -errno;
495 goto out_put_ctx;
496 }
497
498 rx->mask = xsk->config.rx_size - 1;
499 rx->size = xsk->config.rx_size;
500 rx->producer = rx_map + off.rx.producer;
501 rx->consumer = rx_map + off.rx.consumer;
502 rx->flags = rx_map + off.rx.flags;
503 rx->ring = rx_map + off.rx.desc;
504 rx->cached_prod = *rx->producer;
505 rx->cached_cons = *rx->consumer;
506 }
507 xsk->rx = rx;
508
509 if (tx) {
510 tx_map = mmap(NULL, off.tx.desc +
511 xsk->config.tx_size * sizeof(struct xdp_desc),
512 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
513 xsk->fd, XDP_PGOFF_TX_RING);
514 if (tx_map == MAP_FAILED) {
515 err = -errno;
516 goto out_mmap_rx;
517 }
518
519 tx->mask = xsk->config.tx_size - 1;
520 tx->size = xsk->config.tx_size;
521 tx->producer = tx_map + off.tx.producer;
522 tx->consumer = tx_map + off.tx.consumer;
523 tx->flags = tx_map + off.tx.flags;
524 tx->ring = tx_map + off.tx.desc;
525 tx->cached_prod = *tx->producer;
526 /* cached_cons is r->size bigger than the real consumer pointer
527 * See xsk_prod_nb_free
528 */
529 tx->cached_cons = *tx->consumer + xsk->config.tx_size;
530 }
531 xsk->tx = tx;
532
533 sxdp.sxdp_family = PF_XDP;
534 sxdp.sxdp_ifindex = ctx->ifindex;
535 sxdp.sxdp_queue_id = ctx->queue_id;
536 if (umem->refcount > 1) {
537 sxdp.sxdp_flags |= XDP_SHARED_UMEM;
538 sxdp.sxdp_shared_umem_fd = umem->fd;
539 } else {
540 sxdp.sxdp_flags = xsk->config.bind_flags;
541 }
542
543 err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
544 if (err) {
545 err = -errno;
546 goto out_mmap_tx;
547 }
548
549 *xsk_ptr = xsk;
550 umem->fill_save = NULL;
551 umem->comp_save = NULL;
552 return 0;
553
554 out_mmap_tx:
555 if (tx)
556 munmap(tx_map, off.tx.desc +
557 xsk->config.tx_size * sizeof(struct xdp_desc));
558 out_mmap_rx:
559 if (rx)
560 munmap(rx_map, off.rx.desc +
561 xsk->config.rx_size * sizeof(struct xdp_desc));
562 out_put_ctx:
563 xsk_put_ctx(ctx, unmap);
564 out_socket:
565 if (--umem->refcount)
566 close(xsk->fd);
567 out_xsk_alloc:
568 free(xsk);
569 return err;
570 }
571
xsk_socket__create(struct xsk_socket ** xsk_ptr,int ifindex,__u32 queue_id,struct xsk_umem * umem,struct xsk_ring_cons * rx,struct xsk_ring_prod * tx,const struct xsk_socket_config * usr_config)572 int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex,
573 __u32 queue_id, struct xsk_umem *umem,
574 struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
575 const struct xsk_socket_config *usr_config)
576 {
577 if (!umem)
578 return -EFAULT;
579
580 return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem,
581 rx, tx, umem->fill_save,
582 umem->comp_save, usr_config);
583 }
584
xsk_umem__delete(struct xsk_umem * umem)585 int xsk_umem__delete(struct xsk_umem *umem)
586 {
587 struct xdp_mmap_offsets off;
588 int err;
589
590 if (!umem)
591 return 0;
592
593 if (umem->refcount)
594 return -EBUSY;
595
596 err = xsk_get_mmap_offsets(umem->fd, &off);
597 if (!err && umem->fill_save && umem->comp_save) {
598 munmap(umem->fill_save->ring - off.fr.desc,
599 off.fr.desc + umem->config.fill_size * sizeof(__u64));
600 munmap(umem->comp_save->ring - off.cr.desc,
601 off.cr.desc + umem->config.comp_size * sizeof(__u64));
602 }
603
604 close(umem->fd);
605 free(umem);
606
607 return 0;
608 }
609
xsk_socket__delete(struct xsk_socket * xsk)610 void xsk_socket__delete(struct xsk_socket *xsk)
611 {
612 size_t desc_sz = sizeof(struct xdp_desc);
613 struct xdp_mmap_offsets off;
614 struct xsk_umem *umem;
615 struct xsk_ctx *ctx;
616 int err;
617
618 if (!xsk)
619 return;
620
621 ctx = xsk->ctx;
622 umem = ctx->umem;
623
624 xsk_put_ctx(ctx, true);
625
626 err = xsk_get_mmap_offsets(xsk->fd, &off);
627 if (!err) {
628 if (xsk->rx) {
629 munmap(xsk->rx->ring - off.rx.desc,
630 off.rx.desc + xsk->config.rx_size * desc_sz);
631 }
632 if (xsk->tx) {
633 munmap(xsk->tx->ring - off.tx.desc,
634 off.tx.desc + xsk->config.tx_size * desc_sz);
635 }
636 }
637
638 umem->refcount--;
639 /* Do not close an fd that also has an associated umem connected
640 * to it.
641 */
642 if (xsk->fd != umem->fd)
643 close(xsk->fd);
644 free(xsk);
645 }
646