1 /* Evaluate MSG_ZEROCOPY
2 *
3 * Send traffic between two processes over one of the supported
4 * protocols and modes:
5 *
6 * PF_INET/PF_INET6
7 * - SOCK_STREAM
8 * - SOCK_DGRAM
9 * - SOCK_DGRAM with UDP_CORK
10 * - SOCK_RAW
11 * - SOCK_RAW with IP_HDRINCL
12 *
13 * PF_PACKET
14 * - SOCK_DGRAM
15 * - SOCK_RAW
16 *
17 * PF_RDS
18 * - SOCK_SEQPACKET
19 *
20 * Start this program on two connected hosts, one in send mode and
21 * the other with option '-r' to put it in receiver mode.
22 *
23 * If zerocopy mode ('-z') is enabled, the sender will verify that
24 * the kernel queues completions on the error queue for all zerocopy
25 * transfers.
26 */
27
28 #define _GNU_SOURCE
29
30 #include <arpa/inet.h>
31 #include <error.h>
32 #include <errno.h>
33 #include <limits.h>
34 #include <linux/errqueue.h>
35 #include <linux/if_packet.h>
36 #include <linux/ipv6.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <net/ethernet.h>
40 #include <net/if.h>
41 #include <netinet/ip.h>
42 #include <netinet/ip6.h>
43 #include <netinet/tcp.h>
44 #include <netinet/udp.h>
45 #include <poll.h>
46 #include <sched.h>
47 #include <stdbool.h>
48 #include <stdio.h>
49 #include <stdint.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <sys/ioctl.h>
53 #include <sys/socket.h>
54 #include <sys/stat.h>
55 #include <sys/time.h>
56 #include <sys/types.h>
57 #include <sys/wait.h>
58 #include <unistd.h>
59 #include <linux/rds.h>
60
61 #ifndef SO_EE_ORIGIN_ZEROCOPY
62 #define SO_EE_ORIGIN_ZEROCOPY 5
63 #endif
64
65 #ifndef SO_ZEROCOPY
66 #define SO_ZEROCOPY 60
67 #endif
68
69 #ifndef SO_EE_CODE_ZEROCOPY_COPIED
70 #define SO_EE_CODE_ZEROCOPY_COPIED 1
71 #endif
72
73 #ifndef MSG_ZEROCOPY
74 #define MSG_ZEROCOPY 0x4000000
75 #endif
76
77 static int cfg_cork;
78 static bool cfg_cork_mixed;
79 static int cfg_cpu = -1; /* default: pin to last cpu */
80 static int cfg_expect_zerocopy = -1;
81 static int cfg_family = PF_UNSPEC;
82 static int cfg_ifindex = 1;
83 static int cfg_payload_len;
84 static int cfg_port = 8000;
85 static bool cfg_rx;
86 static int cfg_runtime_ms = 4200;
87 static int cfg_verbose;
88 static int cfg_waittime_ms = 500;
89 static int cfg_notification_limit = 32;
90 static bool cfg_zerocopy;
91
92 static socklen_t cfg_alen;
93 static struct sockaddr_storage cfg_dst_addr;
94 static struct sockaddr_storage cfg_src_addr;
95
96 static int exitcode;
97 static char payload[IP_MAXPACKET];
98 static long packets, bytes, completions, expected_completions;
99 static uint32_t next_completion;
100 static uint32_t sends_since_notify;
101
gettimeofday_ms(void)102 static unsigned long gettimeofday_ms(void)
103 {
104 struct timeval tv;
105
106 gettimeofday(&tv, NULL);
107 return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
108 }
109
get_ip_csum(const uint16_t * start,int num_words)110 static uint16_t get_ip_csum(const uint16_t *start, int num_words)
111 {
112 unsigned long sum = 0;
113 int i;
114
115 for (i = 0; i < num_words; i++)
116 sum += start[i];
117
118 while (sum >> 16)
119 sum = (sum & 0xFFFF) + (sum >> 16);
120
121 return ~sum;
122 }
123
do_setcpu(int cpu)124 static int do_setcpu(int cpu)
125 {
126 cpu_set_t mask;
127
128 CPU_ZERO(&mask);
129 CPU_SET(cpu, &mask);
130 if (sched_setaffinity(0, sizeof(mask), &mask))
131 fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
132 else if (cfg_verbose)
133 fprintf(stderr, "cpu: %u\n", cpu);
134
135 return 0;
136 }
137
do_setsockopt(int fd,int level,int optname,int val)138 static void do_setsockopt(int fd, int level, int optname, int val)
139 {
140 if (setsockopt(fd, level, optname, &val, sizeof(val)))
141 error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
142 }
143
do_poll(int fd,int events)144 static int do_poll(int fd, int events)
145 {
146 struct pollfd pfd;
147 int ret;
148
149 pfd.events = events;
150 pfd.revents = 0;
151 pfd.fd = fd;
152
153 ret = poll(&pfd, 1, cfg_waittime_ms);
154 if (ret == -1)
155 error(1, errno, "poll");
156
157 return ret && (pfd.revents & events);
158 }
159
do_accept(int fd)160 static int do_accept(int fd)
161 {
162 int fda = fd;
163
164 fd = accept(fda, NULL, NULL);
165 if (fd == -1)
166 error(1, errno, "accept");
167 if (close(fda))
168 error(1, errno, "close listen sock");
169
170 return fd;
171 }
172
add_zcopy_cookie(struct msghdr * msg,uint32_t cookie)173 static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
174 {
175 struct cmsghdr *cm;
176
177 if (!msg->msg_control)
178 error(1, errno, "NULL cookie");
179 cm = (void *)msg->msg_control;
180 cm->cmsg_len = CMSG_LEN(sizeof(cookie));
181 cm->cmsg_level = SOL_RDS;
182 cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
183 memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
184 }
185
do_sendmsg(int fd,struct msghdr * msg,bool do_zerocopy,int domain)186 static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
187 {
188 int ret, len, i, flags;
189 static uint32_t cookie;
190 char ckbuf[CMSG_SPACE(sizeof(cookie))];
191
192 len = 0;
193 for (i = 0; i < msg->msg_iovlen; i++)
194 len += msg->msg_iov[i].iov_len;
195
196 flags = MSG_DONTWAIT;
197 if (do_zerocopy) {
198 flags |= MSG_ZEROCOPY;
199 if (domain == PF_RDS) {
200 memset(&msg->msg_control, 0, sizeof(msg->msg_control));
201 msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
202 msg->msg_control = (struct cmsghdr *)ckbuf;
203 add_zcopy_cookie(msg, ++cookie);
204 }
205 }
206
207 ret = sendmsg(fd, msg, flags);
208 if (ret == -1 && errno == EAGAIN)
209 return false;
210 if (ret == -1)
211 error(1, errno, "send");
212 if (cfg_verbose && ret != len)
213 fprintf(stderr, "send: ret=%u != %u\n", ret, len);
214 sends_since_notify++;
215
216 if (len) {
217 packets++;
218 bytes += ret;
219 if (do_zerocopy && ret)
220 expected_completions++;
221 }
222 if (do_zerocopy && domain == PF_RDS) {
223 msg->msg_control = NULL;
224 msg->msg_controllen = 0;
225 }
226
227 return true;
228 }
229
do_sendmsg_corked(int fd,struct msghdr * msg)230 static void do_sendmsg_corked(int fd, struct msghdr *msg)
231 {
232 bool do_zerocopy = cfg_zerocopy;
233 int i, payload_len, extra_len;
234
235 /* split up the packet. for non-multiple, make first buffer longer */
236 payload_len = cfg_payload_len / cfg_cork;
237 extra_len = cfg_payload_len - (cfg_cork * payload_len);
238
239 do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
240
241 for (i = 0; i < cfg_cork; i++) {
242
243 /* in mixed-frags mode, alternate zerocopy and copy frags
244 * start with non-zerocopy, to ensure attach later works
245 */
246 if (cfg_cork_mixed)
247 do_zerocopy = (i & 1);
248
249 msg->msg_iov[0].iov_len = payload_len + extra_len;
250 extra_len = 0;
251
252 do_sendmsg(fd, msg, do_zerocopy,
253 (cfg_dst_addr.ss_family == AF_INET ?
254 PF_INET : PF_INET6));
255 }
256
257 do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
258 }
259
setup_iph(struct iphdr * iph,uint16_t payload_len)260 static int setup_iph(struct iphdr *iph, uint16_t payload_len)
261 {
262 struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
263 struct sockaddr_in *saddr = (void *) &cfg_src_addr;
264
265 memset(iph, 0, sizeof(*iph));
266
267 iph->version = 4;
268 iph->tos = 0;
269 iph->ihl = 5;
270 iph->ttl = 2;
271 iph->saddr = saddr->sin_addr.s_addr;
272 iph->daddr = daddr->sin_addr.s_addr;
273 iph->protocol = IPPROTO_EGP;
274 iph->tot_len = htons(sizeof(*iph) + payload_len);
275 iph->check = get_ip_csum((void *) iph, iph->ihl << 1);
276
277 return sizeof(*iph);
278 }
279
setup_ip6h(struct ipv6hdr * ip6h,uint16_t payload_len)280 static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
281 {
282 struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
283 struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
284
285 memset(ip6h, 0, sizeof(*ip6h));
286
287 ip6h->version = 6;
288 ip6h->payload_len = htons(payload_len);
289 ip6h->nexthdr = IPPROTO_EGP;
290 ip6h->hop_limit = 2;
291 ip6h->saddr = saddr->sin6_addr;
292 ip6h->daddr = daddr->sin6_addr;
293
294 return sizeof(*ip6h);
295 }
296
297
setup_sockaddr(int domain,const char * str_addr,struct sockaddr_storage * sockaddr)298 static void setup_sockaddr(int domain, const char *str_addr,
299 struct sockaddr_storage *sockaddr)
300 {
301 struct sockaddr_in6 *addr6 = (void *) sockaddr;
302 struct sockaddr_in *addr4 = (void *) sockaddr;
303
304 switch (domain) {
305 case PF_INET:
306 memset(addr4, 0, sizeof(*addr4));
307 addr4->sin_family = AF_INET;
308 addr4->sin_port = htons(cfg_port);
309 if (str_addr &&
310 inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
311 error(1, 0, "ipv4 parse error: %s", str_addr);
312 break;
313 case PF_INET6:
314 memset(addr6, 0, sizeof(*addr6));
315 addr6->sin6_family = AF_INET6;
316 addr6->sin6_port = htons(cfg_port);
317 if (str_addr &&
318 inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
319 error(1, 0, "ipv6 parse error: %s", str_addr);
320 break;
321 default:
322 error(1, 0, "illegal domain");
323 }
324 }
325
do_setup_tx(int domain,int type,int protocol)326 static int do_setup_tx(int domain, int type, int protocol)
327 {
328 int fd;
329
330 fd = socket(domain, type, protocol);
331 if (fd == -1)
332 error(1, errno, "socket t");
333
334 do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
335 if (cfg_zerocopy)
336 do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
337
338 if (domain != PF_PACKET && domain != PF_RDS)
339 if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
340 error(1, errno, "connect");
341
342 if (domain == PF_RDS) {
343 if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
344 error(1, errno, "bind");
345 }
346
347 return fd;
348 }
349
do_process_zerocopy_cookies(struct rds_zcopy_cookies * ck)350 static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
351 {
352 int i;
353
354 if (ck->num > RDS_MAX_ZCOOKIES)
355 error(1, 0, "Returned %d cookies, max expected %d\n",
356 ck->num, RDS_MAX_ZCOOKIES);
357 for (i = 0; i < ck->num; i++)
358 if (cfg_verbose >= 2)
359 fprintf(stderr, "%d\n", ck->cookies[i]);
360 return ck->num;
361 }
362
do_recvmsg_completion(int fd)363 static bool do_recvmsg_completion(int fd)
364 {
365 char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
366 struct rds_zcopy_cookies *ck;
367 struct cmsghdr *cmsg;
368 struct msghdr msg;
369 bool ret = false;
370
371 memset(&msg, 0, sizeof(msg));
372 msg.msg_control = cmsgbuf;
373 msg.msg_controllen = sizeof(cmsgbuf);
374
375 if (recvmsg(fd, &msg, MSG_DONTWAIT))
376 return ret;
377
378 if (msg.msg_flags & MSG_CTRUNC)
379 error(1, errno, "recvmsg notification: truncated");
380
381 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
382 if (cmsg->cmsg_level == SOL_RDS &&
383 cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
384
385 ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
386 completions += do_process_zerocopy_cookies(ck);
387 ret = true;
388 break;
389 }
390 error(0, 0, "ignoring cmsg at level %d type %d\n",
391 cmsg->cmsg_level, cmsg->cmsg_type);
392 }
393 return ret;
394 }
395
do_recv_completion(int fd,int domain)396 static bool do_recv_completion(int fd, int domain)
397 {
398 struct sock_extended_err *serr;
399 struct msghdr msg = {};
400 struct cmsghdr *cm;
401 uint32_t hi, lo, range;
402 int ret, zerocopy;
403 char control[100];
404
405 if (domain == PF_RDS)
406 return do_recvmsg_completion(fd);
407
408 msg.msg_control = control;
409 msg.msg_controllen = sizeof(control);
410
411 ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
412 if (ret == -1 && errno == EAGAIN)
413 return false;
414 if (ret == -1)
415 error(1, errno, "recvmsg notification");
416 if (msg.msg_flags & MSG_CTRUNC)
417 error(1, errno, "recvmsg notification: truncated");
418
419 cm = CMSG_FIRSTHDR(&msg);
420 if (!cm)
421 error(1, 0, "cmsg: no cmsg");
422 if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
423 (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
424 (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
425 error(1, 0, "serr: wrong type: %d.%d",
426 cm->cmsg_level, cm->cmsg_type);
427
428 serr = (void *) CMSG_DATA(cm);
429
430 if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
431 error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
432 if (serr->ee_errno != 0)
433 error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
434
435 hi = serr->ee_data;
436 lo = serr->ee_info;
437 range = hi - lo + 1;
438
439 /* Detect notification gaps. These should not happen often, if at all.
440 * Gaps can occur due to drops, reordering and retransmissions.
441 */
442 if (cfg_verbose && lo != next_completion)
443 fprintf(stderr, "gap: %u..%u does not append to %u\n",
444 lo, hi, next_completion);
445 next_completion = hi + 1;
446
447 zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
448 if (cfg_expect_zerocopy != -1 &&
449 cfg_expect_zerocopy != zerocopy) {
450 fprintf(stderr, "serr: ee_code: %u != expected %u\n",
451 zerocopy, cfg_expect_zerocopy);
452 exitcode = 1;
453 /* suppress repeated messages */
454 cfg_expect_zerocopy = zerocopy;
455 }
456
457 if (cfg_verbose >= 2)
458 fprintf(stderr, "completed: %u (h=%u l=%u)\n",
459 range, hi, lo);
460
461 completions += range;
462 return true;
463 }
464
465 /* Read all outstanding messages on the errqueue */
do_recv_completions(int fd,int domain)466 static void do_recv_completions(int fd, int domain)
467 {
468 while (do_recv_completion(fd, domain)) {}
469 sends_since_notify = 0;
470 }
471
472 /* Wait for all remaining completions on the errqueue */
do_recv_remaining_completions(int fd,int domain)473 static void do_recv_remaining_completions(int fd, int domain)
474 {
475 int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
476
477 while (completions < expected_completions &&
478 gettimeofday_ms() < tstop) {
479 if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
480 do_recv_completions(fd, domain);
481 }
482
483 if (completions < expected_completions)
484 fprintf(stderr, "missing notifications: %lu < %lu\n",
485 completions, expected_completions);
486 }
487
do_tx(int domain,int type,int protocol)488 static void do_tx(int domain, int type, int protocol)
489 {
490 struct iovec iov[3] = { {0} };
491 struct sockaddr_ll laddr;
492 struct msghdr msg = {0};
493 struct ethhdr eth;
494 union {
495 struct ipv6hdr ip6h;
496 struct iphdr iph;
497 } nh;
498 uint64_t tstop;
499 int fd;
500
501 fd = do_setup_tx(domain, type, protocol);
502
503 if (domain == PF_PACKET) {
504 uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
505
506 /* sock_raw passes ll header as data */
507 if (type == SOCK_RAW) {
508 memset(eth.h_dest, 0x06, ETH_ALEN);
509 memset(eth.h_source, 0x02, ETH_ALEN);
510 eth.h_proto = htons(proto);
511 iov[0].iov_base = ð
512 iov[0].iov_len = sizeof(eth);
513 msg.msg_iovlen++;
514 }
515
516 /* both sock_raw and sock_dgram expect name */
517 memset(&laddr, 0, sizeof(laddr));
518 laddr.sll_family = AF_PACKET;
519 laddr.sll_ifindex = cfg_ifindex;
520 laddr.sll_protocol = htons(proto);
521 laddr.sll_halen = ETH_ALEN;
522
523 memset(laddr.sll_addr, 0x06, ETH_ALEN);
524
525 msg.msg_name = &laddr;
526 msg.msg_namelen = sizeof(laddr);
527 }
528
529 /* packet and raw sockets with hdrincl must pass network header */
530 if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
531 if (cfg_family == PF_INET)
532 iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
533 else
534 iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
535
536 iov[1].iov_base = (void *) &nh;
537 msg.msg_iovlen++;
538 }
539
540 if (domain == PF_RDS) {
541 msg.msg_name = &cfg_dst_addr;
542 msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ?
543 sizeof(struct sockaddr_in) :
544 sizeof(struct sockaddr_in6));
545 }
546
547 iov[2].iov_base = payload;
548 iov[2].iov_len = cfg_payload_len;
549 msg.msg_iovlen++;
550 msg.msg_iov = &iov[3 - msg.msg_iovlen];
551
552 tstop = gettimeofday_ms() + cfg_runtime_ms;
553 do {
554 if (cfg_cork)
555 do_sendmsg_corked(fd, &msg);
556 else
557 do_sendmsg(fd, &msg, cfg_zerocopy, domain);
558
559 if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
560 do_recv_completions(fd, domain);
561
562 while (!do_poll(fd, POLLOUT)) {
563 if (cfg_zerocopy)
564 do_recv_completions(fd, domain);
565 }
566
567 } while (gettimeofday_ms() < tstop);
568
569 if (cfg_zerocopy)
570 do_recv_remaining_completions(fd, domain);
571
572 if (close(fd))
573 error(1, errno, "close");
574
575 fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
576 packets, bytes >> 20, completions,
577 cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n');
578 }
579
do_setup_rx(int domain,int type,int protocol)580 static int do_setup_rx(int domain, int type, int protocol)
581 {
582 int fd;
583
584 /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
585 * to recv the only copy of the packet, not a clone
586 */
587 if (domain == PF_PACKET)
588 error(1, 0, "Use PF_INET/SOCK_RAW to read");
589
590 if (type == SOCK_RAW && protocol == IPPROTO_RAW)
591 error(1, 0, "IPPROTO_RAW: not supported on Rx");
592
593 fd = socket(domain, type, protocol);
594 if (fd == -1)
595 error(1, errno, "socket r");
596
597 do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
598 do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
599 do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
600
601 if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
602 error(1, errno, "bind");
603
604 if (type == SOCK_STREAM) {
605 if (listen(fd, 1))
606 error(1, errno, "listen");
607 fd = do_accept(fd);
608 }
609
610 return fd;
611 }
612
613 /* Flush all outstanding bytes for the tcp receive queue */
do_flush_tcp(int fd)614 static void do_flush_tcp(int fd)
615 {
616 int ret;
617
618 /* MSG_TRUNC flushes up to len bytes */
619 ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
620 if (ret == -1 && errno == EAGAIN)
621 return;
622 if (ret == -1)
623 error(1, errno, "flush");
624 if (!ret)
625 return;
626
627 packets++;
628 bytes += ret;
629 }
630
631 /* Flush all outstanding datagrams. Verify first few bytes of each. */
do_flush_datagram(int fd,int type)632 static void do_flush_datagram(int fd, int type)
633 {
634 int ret, off = 0;
635 char buf[64];
636
637 /* MSG_TRUNC will return full datagram length */
638 ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
639 if (ret == -1 && errno == EAGAIN)
640 return;
641
642 /* raw ipv4 return with header, raw ipv6 without */
643 if (cfg_family == PF_INET && type == SOCK_RAW) {
644 off += sizeof(struct iphdr);
645 ret -= sizeof(struct iphdr);
646 }
647
648 if (ret == -1)
649 error(1, errno, "recv");
650 if (ret != cfg_payload_len)
651 error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
652 if (ret > sizeof(buf) - off)
653 ret = sizeof(buf) - off;
654 if (memcmp(buf + off, payload, ret))
655 error(1, 0, "recv: data mismatch");
656
657 packets++;
658 bytes += cfg_payload_len;
659 }
660
do_rx(int domain,int type,int protocol)661 static void do_rx(int domain, int type, int protocol)
662 {
663 const int cfg_receiver_wait_ms = 400;
664 uint64_t tstop;
665 int fd;
666
667 fd = do_setup_rx(domain, type, protocol);
668
669 tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
670 do {
671 if (type == SOCK_STREAM)
672 do_flush_tcp(fd);
673 else
674 do_flush_datagram(fd, type);
675
676 do_poll(fd, POLLIN);
677
678 } while (gettimeofday_ms() < tstop);
679
680 if (close(fd))
681 error(1, errno, "close");
682
683 fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
684 }
685
do_test(int domain,int type,int protocol)686 static void do_test(int domain, int type, int protocol)
687 {
688 int i;
689
690 if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
691 error(1, 0, "can only cork udp sockets");
692
693 do_setcpu(cfg_cpu);
694
695 for (i = 0; i < IP_MAXPACKET; i++)
696 payload[i] = 'a' + (i % 26);
697
698 if (cfg_rx)
699 do_rx(domain, type, protocol);
700 else
701 do_tx(domain, type, protocol);
702 }
703
usage(const char * filepath)704 static void usage(const char *filepath)
705 {
706 error(1, 0, "Usage: %s [options] <test>", filepath);
707 }
708
parse_opts(int argc,char ** argv)709 static void parse_opts(int argc, char **argv)
710 {
711 const int max_payload_len = sizeof(payload) -
712 sizeof(struct ipv6hdr) -
713 sizeof(struct tcphdr) -
714 40 /* max tcp options */;
715 int c;
716 char *daddr = NULL, *saddr = NULL;
717 char *cfg_test;
718
719 cfg_payload_len = max_payload_len;
720
721 while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) {
722 switch (c) {
723 case '4':
724 if (cfg_family != PF_UNSPEC)
725 error(1, 0, "Pass one of -4 or -6");
726 cfg_family = PF_INET;
727 cfg_alen = sizeof(struct sockaddr_in);
728 break;
729 case '6':
730 if (cfg_family != PF_UNSPEC)
731 error(1, 0, "Pass one of -4 or -6");
732 cfg_family = PF_INET6;
733 cfg_alen = sizeof(struct sockaddr_in6);
734 break;
735 case 'c':
736 cfg_cork = strtol(optarg, NULL, 0);
737 break;
738 case 'C':
739 cfg_cpu = strtol(optarg, NULL, 0);
740 break;
741 case 'D':
742 daddr = optarg;
743 break;
744 case 'i':
745 cfg_ifindex = if_nametoindex(optarg);
746 if (cfg_ifindex == 0)
747 error(1, errno, "invalid iface: %s", optarg);
748 break;
749 case 'l':
750 cfg_notification_limit = strtoul(optarg, NULL, 0);
751 break;
752 case 'm':
753 cfg_cork_mixed = true;
754 break;
755 case 'p':
756 cfg_port = strtoul(optarg, NULL, 0);
757 break;
758 case 'r':
759 cfg_rx = true;
760 break;
761 case 's':
762 cfg_payload_len = strtoul(optarg, NULL, 0);
763 break;
764 case 'S':
765 saddr = optarg;
766 break;
767 case 't':
768 cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
769 break;
770 case 'v':
771 cfg_verbose++;
772 break;
773 case 'z':
774 cfg_zerocopy = true;
775 break;
776 case 'Z':
777 cfg_expect_zerocopy = !!atoi(optarg);
778 break;
779 }
780 }
781
782 cfg_test = argv[argc - 1];
783 if (strcmp(cfg_test, "rds") == 0) {
784 if (!daddr)
785 error(1, 0, "-D <server addr> required for PF_RDS\n");
786 if (!cfg_rx && !saddr)
787 error(1, 0, "-S <client addr> required for PF_RDS\n");
788 }
789 setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
790 setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
791
792 if (cfg_payload_len > max_payload_len)
793 error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
794 if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
795 error(1, 0, "-m: cork_mixed requires corking and zerocopy");
796
797 if (optind != argc - 1)
798 usage(argv[0]);
799 }
800
main(int argc,char ** argv)801 int main(int argc, char **argv)
802 {
803 const char *cfg_test;
804
805 parse_opts(argc, argv);
806
807 cfg_test = argv[argc - 1];
808
809 if (!strcmp(cfg_test, "packet"))
810 do_test(PF_PACKET, SOCK_RAW, 0);
811 else if (!strcmp(cfg_test, "packet_dgram"))
812 do_test(PF_PACKET, SOCK_DGRAM, 0);
813 else if (!strcmp(cfg_test, "raw"))
814 do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
815 else if (!strcmp(cfg_test, "raw_hdrincl"))
816 do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
817 else if (!strcmp(cfg_test, "tcp"))
818 do_test(cfg_family, SOCK_STREAM, 0);
819 else if (!strcmp(cfg_test, "udp"))
820 do_test(cfg_family, SOCK_DGRAM, 0);
821 else if (!strcmp(cfg_test, "rds"))
822 do_test(PF_RDS, SOCK_SEQPACKET, 0);
823 else
824 error(1, 0, "unknown cfg_test %s", cfg_test);
825
826 return exitcode;
827 }
828