1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2024 Meta
3
4 #include <poll.h>
5 #include <test_progs.h>
6 #include "network_helpers.h"
7 #include "sock_iter_batch.skel.h"
8
9 #define TEST_NS "sock_iter_batch_netns"
10 #define TEST_CHILD_NS "sock_iter_batch_child_netns"
11
12 static const int init_batch_size = 16;
13 static const int nr_soreuse = 4;
14
15 struct iter_out {
16 int idx;
17 __u64 cookie;
18 } __packed;
19
20 struct sock_count {
21 __u64 cookie;
22 int count;
23 };
24
insert(__u64 cookie,struct sock_count counts[],int counts_len)25 static int insert(__u64 cookie, struct sock_count counts[], int counts_len)
26 {
27 int insert = -1;
28 int i = 0;
29
30 for (; i < counts_len; i++) {
31 if (!counts[i].cookie) {
32 insert = i;
33 } else if (counts[i].cookie == cookie) {
34 insert = i;
35 break;
36 }
37 }
38 if (insert < 0)
39 return insert;
40
41 counts[insert].cookie = cookie;
42 counts[insert].count++;
43
44 return counts[insert].count;
45 }
46
read_n(int iter_fd,int n,struct sock_count counts[],int counts_len)47 static int read_n(int iter_fd, int n, struct sock_count counts[],
48 int counts_len)
49 {
50 struct iter_out out;
51 int nread = 1;
52 int i = 0;
53
54 for (; nread > 0 && (n < 0 || i < n); i++) {
55 nread = read(iter_fd, &out, sizeof(out));
56 if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread"))
57 break;
58 ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert");
59 }
60
61 ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n");
62
63 return i;
64 }
65
socket_cookie(int fd)66 static __u64 socket_cookie(int fd)
67 {
68 __u64 cookie;
69 socklen_t cookie_len = sizeof(cookie);
70
71 if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie,
72 &cookie_len), "getsockopt(SO_COOKIE)"))
73 return 0;
74 return cookie;
75 }
76
was_seen(int fd,struct sock_count counts[],int counts_len)77 static bool was_seen(int fd, struct sock_count counts[], int counts_len)
78 {
79 __u64 cookie = socket_cookie(fd);
80 int i = 0;
81
82 for (; cookie && i < counts_len; i++)
83 if (cookie == counts[i].cookie)
84 return true;
85
86 return false;
87 }
88
get_seen_socket(int * fds,struct sock_count counts[],int n)89 static int get_seen_socket(int *fds, struct sock_count counts[], int n)
90 {
91 int i = 0;
92
93 for (; i < n; i++)
94 if (was_seen(fds[i], counts, n))
95 return i;
96 return -1;
97 }
98
get_nth_socket(int * fds,int fds_len,struct bpf_link * link,int n)99 static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n)
100 {
101 int i, nread, iter_fd;
102 int nth_sock_idx = -1;
103 struct iter_out out;
104
105 iter_fd = bpf_iter_create(bpf_link__fd(link));
106 if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
107 return -1;
108
109 for (; n >= 0; n--) {
110 nread = read(iter_fd, &out, sizeof(out));
111 if (!nread || !ASSERT_GE(nread, 1, "nread"))
112 goto done;
113 }
114
115 for (i = 0; i < fds_len && nth_sock_idx < 0; i++)
116 if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie)
117 nth_sock_idx = i;
118 done:
119 close(iter_fd);
120 return nth_sock_idx;
121 }
122
destroy(int fd)123 static void destroy(int fd)
124 {
125 struct sock_iter_batch *skel = NULL;
126 __u64 cookie = socket_cookie(fd);
127 struct bpf_link *link = NULL;
128 int iter_fd = -1;
129 int nread;
130 __u64 out;
131
132 skel = sock_iter_batch__open();
133 if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
134 goto done;
135
136 skel->rodata->destroy_cookie = cookie;
137
138 if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load"))
139 goto done;
140
141 link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL);
142 if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
143 goto done;
144
145 iter_fd = bpf_iter_create(bpf_link__fd(link));
146 if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
147 goto done;
148
149 /* Delete matching socket. */
150 nread = read(iter_fd, &out, sizeof(out));
151 ASSERT_GE(nread, 0, "nread");
152 if (nread)
153 ASSERT_EQ(out, cookie, "cookie matches");
154 done:
155 if (iter_fd >= 0)
156 close(iter_fd);
157 bpf_link__destroy(link);
158 sock_iter_batch__destroy(skel);
159 close(fd);
160 }
161
get_seen_count(int fd,struct sock_count counts[],int n)162 static int get_seen_count(int fd, struct sock_count counts[], int n)
163 {
164 __u64 cookie = socket_cookie(fd);
165 int count = 0;
166 int i = 0;
167
168 for (; cookie && !count && i < n; i++)
169 if (cookie == counts[i].cookie)
170 count = counts[i].count;
171
172 return count;
173 }
174
check_n_were_seen_once(int * fds,int fds_len,int n,struct sock_count counts[],int counts_len)175 static void check_n_were_seen_once(int *fds, int fds_len, int n,
176 struct sock_count counts[], int counts_len)
177 {
178 int seen_once = 0;
179 int seen_cnt;
180 int i = 0;
181
182 for (; i < fds_len; i++) {
183 /* Skip any sockets that were closed or that weren't seen
184 * exactly once.
185 */
186 if (fds[i] < 0)
187 continue;
188 seen_cnt = get_seen_count(fds[i], counts, counts_len);
189 if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt"))
190 seen_once++;
191 }
192
193 ASSERT_EQ(seen_once, n, "seen_once");
194 }
195
accept_from_one(struct pollfd * server_poll_fds,int server_poll_fds_len)196 static int accept_from_one(struct pollfd *server_poll_fds,
197 int server_poll_fds_len)
198 {
199 static const int poll_timeout_ms = 5000; /* 5s */
200 int ret;
201 int i;
202
203 ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms);
204 if (!ASSERT_EQ(ret, 1, "poll"))
205 return -1;
206
207 for (i = 0; i < server_poll_fds_len; i++)
208 if (server_poll_fds[i].revents & POLLIN)
209 return accept(server_poll_fds[i].fd, NULL, NULL);
210
211 return -1;
212 }
213
connect_to_server(int family,int sock_type,const char * addr,__u16 port,int nr_connects,int * server_fds,int server_fds_len)214 static int *connect_to_server(int family, int sock_type, const char *addr,
215 __u16 port, int nr_connects, int *server_fds,
216 int server_fds_len)
217 {
218 struct pollfd *server_poll_fds = NULL;
219 int *established_socks = NULL;
220 int i;
221
222 server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds));
223 if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds"))
224 return NULL;
225
226 for (i = 0; i < server_fds_len; i++) {
227 server_poll_fds[i].fd = server_fds[i];
228 server_poll_fds[i].events = POLLIN;
229 }
230
231 i = 0;
232
233 established_socks = malloc(sizeof(*established_socks) * nr_connects*2);
234 if (!ASSERT_OK_PTR(established_socks, "established_socks"))
235 goto error;
236
237 while (nr_connects--) {
238 established_socks[i] = connect_to_addr_str(family, sock_type,
239 addr, port, NULL);
240 if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str"))
241 goto error;
242 i++;
243 established_socks[i] = accept_from_one(server_poll_fds,
244 server_fds_len);
245 if (!ASSERT_OK_FD(established_socks[i], "accept_from_one"))
246 goto error;
247 i++;
248 }
249
250 free(server_poll_fds);
251 return established_socks;
252 error:
253 free_fds(established_socks, i);
254 free(server_poll_fds);
255 return NULL;
256 }
257
remove_seen(int family,int sock_type,const char * addr,__u16 port,int * socks,int socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)258 static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
259 int *socks, int socks_len, int *established_socks,
260 int established_socks_len, struct sock_count *counts,
261 int counts_len, struct bpf_link *link, int iter_fd)
262 {
263 int close_idx;
264
265 /* Iterate through the first socks_len - 1 sockets. */
266 read_n(iter_fd, socks_len - 1, counts, counts_len);
267
268 /* Make sure we saw socks_len - 1 sockets exactly once. */
269 check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
270 counts_len);
271
272 /* Close a socket we've already seen to remove it from the bucket. */
273 close_idx = get_seen_socket(socks, counts, counts_len);
274 if (!ASSERT_GE(close_idx, 0, "close_idx"))
275 return;
276 close(socks[close_idx]);
277 socks[close_idx] = -1;
278
279 /* Iterate through the rest of the sockets. */
280 read_n(iter_fd, -1, counts, counts_len);
281
282 /* Make sure the last socket wasn't skipped and that there were no
283 * repeats.
284 */
285 check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
286 counts_len);
287 }
288
remove_seen_established(int family,int sock_type,const char * addr,__u16 port,int * listen_socks,int listen_socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)289 static void remove_seen_established(int family, int sock_type, const char *addr,
290 __u16 port, int *listen_socks,
291 int listen_socks_len, int *established_socks,
292 int established_socks_len,
293 struct sock_count *counts, int counts_len,
294 struct bpf_link *link, int iter_fd)
295 {
296 int close_idx;
297
298 /* Iterate through all listening sockets. */
299 read_n(iter_fd, listen_socks_len, counts, counts_len);
300
301 /* Make sure we saw all listening sockets exactly once. */
302 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
303 counts, counts_len);
304
305 /* Leave one established socket. */
306 read_n(iter_fd, established_socks_len - 1, counts, counts_len);
307
308 /* Close a socket we've already seen to remove it from the bucket. */
309 close_idx = get_nth_socket(established_socks, established_socks_len,
310 link, listen_socks_len + 1);
311 if (!ASSERT_GE(close_idx, 0, "close_idx"))
312 return;
313 destroy(established_socks[close_idx]);
314 established_socks[close_idx] = -1;
315
316 /* Iterate through the rest of the sockets. */
317 read_n(iter_fd, -1, counts, counts_len);
318
319 /* Make sure the last socket wasn't skipped and that there were no
320 * repeats.
321 */
322 check_n_were_seen_once(established_socks, established_socks_len,
323 established_socks_len - 1, counts, counts_len);
324 }
325
remove_unseen(int family,int sock_type,const char * addr,__u16 port,int * socks,int socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)326 static void remove_unseen(int family, int sock_type, const char *addr,
327 __u16 port, int *socks, int socks_len,
328 int *established_socks, int established_socks_len,
329 struct sock_count *counts, int counts_len,
330 struct bpf_link *link, int iter_fd)
331 {
332 int close_idx;
333
334 /* Iterate through the first socket. */
335 read_n(iter_fd, 1, counts, counts_len);
336
337 /* Make sure we saw a socket from fds. */
338 check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
339
340 /* Close what would be the next socket in the bucket to exercise the
341 * condition where we need to skip past the first cookie we remembered.
342 */
343 close_idx = get_nth_socket(socks, socks_len, link, 1);
344 if (!ASSERT_GE(close_idx, 0, "close_idx"))
345 return;
346 close(socks[close_idx]);
347 socks[close_idx] = -1;
348
349 /* Iterate through the rest of the sockets. */
350 read_n(iter_fd, -1, counts, counts_len);
351
352 /* Make sure the remaining sockets were seen exactly once and that we
353 * didn't repeat the socket that was already seen.
354 */
355 check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
356 counts_len);
357 }
358
remove_unseen_established(int family,int sock_type,const char * addr,__u16 port,int * listen_socks,int listen_socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)359 static void remove_unseen_established(int family, int sock_type,
360 const char *addr, __u16 port,
361 int *listen_socks, int listen_socks_len,
362 int *established_socks,
363 int established_socks_len,
364 struct sock_count *counts, int counts_len,
365 struct bpf_link *link, int iter_fd)
366 {
367 int close_idx;
368
369 /* Iterate through all listening sockets. */
370 read_n(iter_fd, listen_socks_len, counts, counts_len);
371
372 /* Make sure we saw all listening sockets exactly once. */
373 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
374 counts, counts_len);
375
376 /* Iterate through the first established socket. */
377 read_n(iter_fd, 1, counts, counts_len);
378
379 /* Make sure we saw one established socks. */
380 check_n_were_seen_once(established_socks, established_socks_len, 1,
381 counts, counts_len);
382
383 /* Close what would be the next socket in the bucket to exercise the
384 * condition where we need to skip past the first cookie we remembered.
385 */
386 close_idx = get_nth_socket(established_socks, established_socks_len,
387 link, listen_socks_len + 1);
388 if (!ASSERT_GE(close_idx, 0, "close_idx"))
389 return;
390
391 destroy(established_socks[close_idx]);
392 established_socks[close_idx] = -1;
393
394 /* Iterate through the rest of the sockets. */
395 read_n(iter_fd, -1, counts, counts_len);
396
397 /* Make sure the remaining sockets were seen exactly once and that we
398 * didn't repeat the socket that was already seen.
399 */
400 check_n_were_seen_once(established_socks, established_socks_len,
401 established_socks_len - 1, counts, counts_len);
402 }
403
remove_all(int family,int sock_type,const char * addr,__u16 port,int * socks,int socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)404 static void remove_all(int family, int sock_type, const char *addr,
405 __u16 port, int *socks, int socks_len,
406 int *established_socks, int established_socks_len,
407 struct sock_count *counts, int counts_len,
408 struct bpf_link *link, int iter_fd)
409 {
410 int close_idx, i;
411
412 /* Iterate through the first socket. */
413 read_n(iter_fd, 1, counts, counts_len);
414
415 /* Make sure we saw a socket from fds. */
416 check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
417
418 /* Close all remaining sockets to exhaust the list of saved cookies and
419 * exit without putting any sockets into the batch on the next read.
420 */
421 for (i = 0; i < socks_len - 1; i++) {
422 close_idx = get_nth_socket(socks, socks_len, link, 1);
423 if (!ASSERT_GE(close_idx, 0, "close_idx"))
424 return;
425 close(socks[close_idx]);
426 socks[close_idx] = -1;
427 }
428
429 /* Make sure there are no more sockets returned */
430 ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
431 }
432
remove_all_established(int family,int sock_type,const char * addr,__u16 port,int * listen_socks,int listen_socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)433 static void remove_all_established(int family, int sock_type, const char *addr,
434 __u16 port, int *listen_socks,
435 int listen_socks_len, int *established_socks,
436 int established_socks_len,
437 struct sock_count *counts, int counts_len,
438 struct bpf_link *link, int iter_fd)
439 {
440 int *close_idx = NULL;
441 int i;
442
443 /* Iterate through all listening sockets. */
444 read_n(iter_fd, listen_socks_len, counts, counts_len);
445
446 /* Make sure we saw all listening sockets exactly once. */
447 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
448 counts, counts_len);
449
450 /* Iterate through the first established socket. */
451 read_n(iter_fd, 1, counts, counts_len);
452
453 /* Make sure we saw one established socks. */
454 check_n_were_seen_once(established_socks, established_socks_len, 1,
455 counts, counts_len);
456
457 /* Close all remaining sockets to exhaust the list of saved cookies and
458 * exit without putting any sockets into the batch on the next read.
459 */
460 close_idx = malloc(sizeof(int) * (established_socks_len - 1));
461 if (!ASSERT_OK_PTR(close_idx, "close_idx malloc"))
462 return;
463 for (i = 0; i < established_socks_len - 1; i++) {
464 close_idx[i] = get_nth_socket(established_socks,
465 established_socks_len, link,
466 listen_socks_len + i);
467 if (!ASSERT_GE(close_idx[i], 0, "close_idx"))
468 return;
469 }
470
471 for (i = 0; i < established_socks_len - 1; i++) {
472 destroy(established_socks[close_idx[i]]);
473 established_socks[close_idx[i]] = -1;
474 }
475
476 /* Make sure there are no more sockets returned */
477 ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
478 free(close_idx);
479 }
480
add_some(int family,int sock_type,const char * addr,__u16 port,int * socks,int socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)481 static void add_some(int family, int sock_type, const char *addr, __u16 port,
482 int *socks, int socks_len, int *established_socks,
483 int established_socks_len, struct sock_count *counts,
484 int counts_len, struct bpf_link *link, int iter_fd)
485 {
486 int *new_socks = NULL;
487
488 /* Iterate through the first socks_len - 1 sockets. */
489 read_n(iter_fd, socks_len - 1, counts, counts_len);
490
491 /* Make sure we saw socks_len - 1 sockets exactly once. */
492 check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
493 counts_len);
494
495 /* Double the number of sockets in the bucket. */
496 new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
497 socks_len);
498 if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
499 goto done;
500
501 /* Iterate through the rest of the sockets. */
502 read_n(iter_fd, -1, counts, counts_len);
503
504 /* Make sure each of the original sockets was seen exactly once. */
505 check_n_were_seen_once(socks, socks_len, socks_len, counts,
506 counts_len);
507 done:
508 free_fds(new_socks, socks_len);
509 }
510
add_some_established(int family,int sock_type,const char * addr,__u16 port,int * listen_socks,int listen_socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)511 static void add_some_established(int family, int sock_type, const char *addr,
512 __u16 port, int *listen_socks,
513 int listen_socks_len, int *established_socks,
514 int established_socks_len,
515 struct sock_count *counts,
516 int counts_len, struct bpf_link *link,
517 int iter_fd)
518 {
519 int *new_socks = NULL;
520
521 /* Iterate through all listening sockets. */
522 read_n(iter_fd, listen_socks_len, counts, counts_len);
523
524 /* Make sure we saw all listening sockets exactly once. */
525 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
526 counts, counts_len);
527
528 /* Iterate through the first established_socks_len - 1 sockets. */
529 read_n(iter_fd, established_socks_len - 1, counts, counts_len);
530
531 /* Make sure we saw established_socks_len - 1 sockets exactly once. */
532 check_n_were_seen_once(established_socks, established_socks_len,
533 established_socks_len - 1, counts, counts_len);
534
535 /* Double the number of established sockets in the bucket. */
536 new_socks = connect_to_server(family, sock_type, addr, port,
537 established_socks_len / 2, listen_socks,
538 listen_socks_len);
539 if (!ASSERT_OK_PTR(new_socks, "connect_to_server"))
540 goto done;
541
542 /* Iterate through the rest of the sockets. */
543 read_n(iter_fd, -1, counts, counts_len);
544
545 /* Make sure each of the original sockets was seen exactly once. */
546 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
547 counts, counts_len);
548 check_n_were_seen_once(established_socks, established_socks_len,
549 established_socks_len, counts, counts_len);
550 done:
551 free_fds(new_socks, established_socks_len);
552 }
553
force_realloc(int family,int sock_type,const char * addr,__u16 port,int * socks,int socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)554 static void force_realloc(int family, int sock_type, const char *addr,
555 __u16 port, int *socks, int socks_len,
556 int *established_socks, int established_socks_len,
557 struct sock_count *counts, int counts_len,
558 struct bpf_link *link, int iter_fd)
559 {
560 int *new_socks = NULL;
561
562 /* Iterate through the first socket just to initialize the batch. */
563 read_n(iter_fd, 1, counts, counts_len);
564
565 /* Double the number of sockets in the bucket to force a realloc on the
566 * next read.
567 */
568 new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
569 socks_len);
570 if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
571 goto done;
572
573 /* Iterate through the rest of the sockets. */
574 read_n(iter_fd, -1, counts, counts_len);
575
576 /* Make sure each socket from the first set was seen exactly once. */
577 check_n_were_seen_once(socks, socks_len, socks_len, counts,
578 counts_len);
579 done:
580 free_fds(new_socks, socks_len);
581 }
582
force_realloc_established(int family,int sock_type,const char * addr,__u16 port,int * listen_socks,int listen_socks_len,int * established_socks,int established_socks_len,struct sock_count * counts,int counts_len,struct bpf_link * link,int iter_fd)583 static void force_realloc_established(int family, int sock_type,
584 const char *addr, __u16 port,
585 int *listen_socks, int listen_socks_len,
586 int *established_socks,
587 int established_socks_len,
588 struct sock_count *counts, int counts_len,
589 struct bpf_link *link, int iter_fd)
590 {
591 /* Iterate through all sockets to trigger a realloc. */
592 read_n(iter_fd, -1, counts, counts_len);
593
594 /* Make sure each socket was seen exactly once. */
595 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
596 counts, counts_len);
597 check_n_were_seen_once(established_socks, established_socks_len,
598 established_socks_len, counts, counts_len);
599 }
600
601 struct test_case {
602 void (*test)(int family, int sock_type, const char *addr, __u16 port,
603 int *socks, int socks_len, int *established_socks,
604 int established_socks_len, struct sock_count *counts,
605 int counts_len, struct bpf_link *link, int iter_fd);
606 const char *description;
607 int ehash_buckets;
608 int connections;
609 int init_socks;
610 int max_socks;
611 int sock_type;
612 int family;
613 };
614
615 static struct test_case resume_tests[] = {
616 {
617 .description = "udp: resume after removing a seen socket",
618 .init_socks = nr_soreuse,
619 .max_socks = nr_soreuse,
620 .sock_type = SOCK_DGRAM,
621 .family = AF_INET6,
622 .test = remove_seen,
623 },
624 {
625 .description = "udp: resume after removing one unseen socket",
626 .init_socks = nr_soreuse,
627 .max_socks = nr_soreuse,
628 .sock_type = SOCK_DGRAM,
629 .family = AF_INET6,
630 .test = remove_unseen,
631 },
632 {
633 .description = "udp: resume after removing all unseen sockets",
634 .init_socks = nr_soreuse,
635 .max_socks = nr_soreuse,
636 .sock_type = SOCK_DGRAM,
637 .family = AF_INET6,
638 .test = remove_all,
639 },
640 {
641 .description = "udp: resume after adding a few sockets",
642 .init_socks = nr_soreuse,
643 .max_socks = nr_soreuse,
644 .sock_type = SOCK_DGRAM,
645 /* Use AF_INET so that new sockets are added to the head of the
646 * bucket's list.
647 */
648 .family = AF_INET,
649 .test = add_some,
650 },
651 {
652 .description = "udp: force a realloc to occur",
653 .init_socks = init_batch_size,
654 .max_socks = init_batch_size * 2,
655 .sock_type = SOCK_DGRAM,
656 /* Use AF_INET6 so that new sockets are added to the tail of the
657 * bucket's list, needing to be added to the next batch to force
658 * a realloc.
659 */
660 .family = AF_INET6,
661 .test = force_realloc,
662 },
663 {
664 .description = "tcp: resume after removing a seen socket (listening)",
665 .init_socks = nr_soreuse,
666 .max_socks = nr_soreuse,
667 .sock_type = SOCK_STREAM,
668 .family = AF_INET6,
669 .test = remove_seen,
670 },
671 {
672 .description = "tcp: resume after removing one unseen socket (listening)",
673 .init_socks = nr_soreuse,
674 .max_socks = nr_soreuse,
675 .sock_type = SOCK_STREAM,
676 .family = AF_INET6,
677 .test = remove_unseen,
678 },
679 {
680 .description = "tcp: resume after removing all unseen sockets (listening)",
681 .init_socks = nr_soreuse,
682 .max_socks = nr_soreuse,
683 .sock_type = SOCK_STREAM,
684 .family = AF_INET6,
685 .test = remove_all,
686 },
687 {
688 .description = "tcp: resume after adding a few sockets (listening)",
689 .init_socks = nr_soreuse,
690 .max_socks = nr_soreuse,
691 .sock_type = SOCK_STREAM,
692 /* Use AF_INET so that new sockets are added to the head of the
693 * bucket's list.
694 */
695 .family = AF_INET,
696 .test = add_some,
697 },
698 {
699 .description = "tcp: force a realloc to occur (listening)",
700 .init_socks = init_batch_size,
701 .max_socks = init_batch_size * 2,
702 .sock_type = SOCK_STREAM,
703 /* Use AF_INET6 so that new sockets are added to the tail of the
704 * bucket's list, needing to be added to the next batch to force
705 * a realloc.
706 */
707 .family = AF_INET6,
708 .test = force_realloc,
709 },
710 {
711 .description = "tcp: resume after removing a seen socket (established)",
712 /* Force all established sockets into one bucket */
713 .ehash_buckets = 1,
714 .connections = nr_soreuse,
715 .init_socks = nr_soreuse,
716 /* Room for connect()ed and accept()ed sockets */
717 .max_socks = nr_soreuse * 3,
718 .sock_type = SOCK_STREAM,
719 .family = AF_INET6,
720 .test = remove_seen_established,
721 },
722 {
723 .description = "tcp: resume after removing one unseen socket (established)",
724 /* Force all established sockets into one bucket */
725 .ehash_buckets = 1,
726 .connections = nr_soreuse,
727 .init_socks = nr_soreuse,
728 /* Room for connect()ed and accept()ed sockets */
729 .max_socks = nr_soreuse * 3,
730 .sock_type = SOCK_STREAM,
731 .family = AF_INET6,
732 .test = remove_unseen_established,
733 },
734 {
735 .description = "tcp: resume after removing all unseen sockets (established)",
736 /* Force all established sockets into one bucket */
737 .ehash_buckets = 1,
738 .connections = nr_soreuse,
739 .init_socks = nr_soreuse,
740 /* Room for connect()ed and accept()ed sockets */
741 .max_socks = nr_soreuse * 3,
742 .sock_type = SOCK_STREAM,
743 .family = AF_INET6,
744 .test = remove_all_established,
745 },
746 {
747 .description = "tcp: resume after adding a few sockets (established)",
748 /* Force all established sockets into one bucket */
749 .ehash_buckets = 1,
750 .connections = nr_soreuse,
751 .init_socks = nr_soreuse,
752 /* Room for connect()ed and accept()ed sockets */
753 .max_socks = nr_soreuse * 3,
754 .sock_type = SOCK_STREAM,
755 .family = AF_INET6,
756 .test = add_some_established,
757 },
758 {
759 .description = "tcp: force a realloc to occur (established)",
760 /* Force all established sockets into one bucket */
761 .ehash_buckets = 1,
762 /* Bucket size will need to double when going from listening to
763 * established sockets.
764 */
765 .connections = init_batch_size,
766 .init_socks = nr_soreuse,
767 /* Room for connect()ed and accept()ed sockets */
768 .max_socks = nr_soreuse + (init_batch_size * 2),
769 .sock_type = SOCK_STREAM,
770 .family = AF_INET6,
771 .test = force_realloc_established,
772 },
773 };
774
do_resume_test(struct test_case * tc)775 static void do_resume_test(struct test_case *tc)
776 {
777 struct sock_iter_batch *skel = NULL;
778 struct sock_count *counts = NULL;
779 static const __u16 port = 10001;
780 struct nstoken *nstoken = NULL;
781 struct bpf_link *link = NULL;
782 int *established_fds = NULL;
783 int err, iter_fd = -1;
784 const char *addr;
785 int *fds = NULL;
786
787 if (tc->ehash_buckets) {
788 SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
789 SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d",
790 tc->ehash_buckets);
791 SYS(done, "ip netns add %s", TEST_CHILD_NS);
792 SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS);
793 nstoken = open_netns(TEST_CHILD_NS);
794 if (!ASSERT_OK_PTR(nstoken, "open_child_netns"))
795 goto done;
796 }
797
798 counts = calloc(tc->max_socks, sizeof(*counts));
799 if (!ASSERT_OK_PTR(counts, "counts"))
800 goto done;
801 skel = sock_iter_batch__open();
802 if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
803 goto done;
804
805 /* Prepare a bucket of sockets in the kernel hashtable */
806 addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1";
807 fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0,
808 tc->init_socks);
809 if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
810 goto done;
811 if (tc->connections) {
812 established_fds = connect_to_server(tc->family, tc->sock_type,
813 addr, port,
814 tc->connections, fds,
815 tc->init_socks);
816 if (!ASSERT_OK_PTR(established_fds, "connect_to_server"))
817 goto done;
818 }
819 skel->rodata->ports[0] = 0;
820 skel->rodata->ports[1] = 0;
821 skel->rodata->sf = tc->family;
822 skel->rodata->ss = 0;
823
824 err = sock_iter_batch__load(skel);
825 if (!ASSERT_OK(err, "sock_iter_batch__load"))
826 goto done;
827
828 link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ?
829 skel->progs.iter_tcp_soreuse :
830 skel->progs.iter_udp_soreuse,
831 NULL);
832 if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
833 goto done;
834
835 iter_fd = bpf_iter_create(bpf_link__fd(link));
836 if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
837 goto done;
838
839 tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
840 established_fds, tc->connections*2, counts, tc->max_socks,
841 link, iter_fd);
842 done:
843 close_netns(nstoken);
844 SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
845 SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0");
846 free(counts);
847 free_fds(fds, tc->init_socks);
848 free_fds(established_fds, tc->connections*2);
849 if (iter_fd >= 0)
850 close(iter_fd);
851 bpf_link__destroy(link);
852 sock_iter_batch__destroy(skel);
853 }
854
do_resume_tests(void)855 static void do_resume_tests(void)
856 {
857 int i;
858
859 for (i = 0; i < ARRAY_SIZE(resume_tests); i++) {
860 if (test__start_subtest(resume_tests[i].description)) {
861 do_resume_test(&resume_tests[i]);
862 }
863 }
864 }
865
do_test(int sock_type,bool onebyone)866 static void do_test(int sock_type, bool onebyone)
867 {
868 int err, i, nread, to_read, total_read, iter_fd = -1;
869 struct iter_out outputs[nr_soreuse];
870 struct bpf_link *link = NULL;
871 struct sock_iter_batch *skel;
872 int first_idx, second_idx;
873 int *fds[2] = {};
874
875 skel = sock_iter_batch__open();
876 if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
877 return;
878
879 /* Prepare 2 buckets of sockets in the kernel hashtable */
880 for (i = 0; i < ARRAY_SIZE(fds); i++) {
881 int local_port;
882
883 fds[i] = start_reuseport_server(AF_INET6, sock_type, "::1", 0, 0,
884 nr_soreuse);
885 if (!ASSERT_OK_PTR(fds[i], "start_reuseport_server"))
886 goto done;
887 local_port = get_socket_local_port(*fds[i]);
888 if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
889 goto done;
890 skel->rodata->ports[i] = ntohs(local_port);
891 }
892 skel->rodata->sf = AF_INET6;
893 if (sock_type == SOCK_STREAM)
894 skel->rodata->ss = TCP_LISTEN;
895
896 err = sock_iter_batch__load(skel);
897 if (!ASSERT_OK(err, "sock_iter_batch__load"))
898 goto done;
899
900 link = bpf_program__attach_iter(sock_type == SOCK_STREAM ?
901 skel->progs.iter_tcp_soreuse :
902 skel->progs.iter_udp_soreuse,
903 NULL);
904 if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
905 goto done;
906
907 iter_fd = bpf_iter_create(bpf_link__fd(link));
908 if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
909 goto done;
910
911 /* Test reading a bucket (either from fds[0] or fds[1]).
912 * Only read "nr_soreuse - 1" number of sockets
913 * from a bucket and leave one socket out from
914 * that bucket on purpose.
915 */
916 to_read = (nr_soreuse - 1) * sizeof(*outputs);
917 total_read = 0;
918 first_idx = -1;
919 do {
920 nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
921 if (nread <= 0 || nread % sizeof(*outputs))
922 break;
923 total_read += nread;
924
925 if (first_idx == -1)
926 first_idx = outputs[0].idx;
927 for (i = 0; i < nread / sizeof(*outputs); i++)
928 ASSERT_EQ(outputs[i].idx, first_idx, "first_idx");
929 } while (total_read < to_read);
930 ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread");
931 ASSERT_EQ(total_read, to_read, "total_read");
932
933 free_fds(fds[first_idx], nr_soreuse);
934 fds[first_idx] = NULL;
935
936 /* Read the "whole" second bucket */
937 to_read = nr_soreuse * sizeof(*outputs);
938 total_read = 0;
939 second_idx = !first_idx;
940 do {
941 nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
942 if (nread <= 0 || nread % sizeof(*outputs))
943 break;
944 total_read += nread;
945
946 for (i = 0; i < nread / sizeof(*outputs); i++)
947 ASSERT_EQ(outputs[i].idx, second_idx, "second_idx");
948 } while (total_read <= to_read);
949 ASSERT_EQ(nread, 0, "nread");
950 /* Both so_reuseport ports should be in different buckets, so
951 * total_read must equal to the expected to_read.
952 *
953 * For a very unlikely case, both ports collide at the same bucket,
954 * the bucket offset (i.e. 3) will be skipped and it cannot
955 * expect the to_read number of bytes.
956 */
957 if (skel->bss->bucket[0] != skel->bss->bucket[1])
958 ASSERT_EQ(total_read, to_read, "total_read");
959
960 done:
961 for (i = 0; i < ARRAY_SIZE(fds); i++)
962 free_fds(fds[i], nr_soreuse);
963 if (iter_fd < 0)
964 close(iter_fd);
965 bpf_link__destroy(link);
966 sock_iter_batch__destroy(skel);
967 }
968
test_sock_iter_batch(void)969 void test_sock_iter_batch(void)
970 {
971 struct nstoken *nstoken = NULL;
972
973 SYS_NOFAIL("ip netns del " TEST_NS);
974 SYS(done, "ip netns add %s", TEST_NS);
975 SYS(done, "ip -net %s link set dev lo up", TEST_NS);
976
977 nstoken = open_netns(TEST_NS);
978 if (!ASSERT_OK_PTR(nstoken, "open_netns"))
979 goto done;
980
981 if (test__start_subtest("tcp")) {
982 do_test(SOCK_STREAM, true);
983 do_test(SOCK_STREAM, false);
984 }
985 if (test__start_subtest("udp")) {
986 do_test(SOCK_DGRAM, true);
987 do_test(SOCK_DGRAM, false);
988 }
989 do_resume_tests();
990 close_netns(nstoken);
991
992 done:
993 SYS_NOFAIL("ip netns del " TEST_NS);
994 }
995