1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4source lib.sh
5
6checktool "conntrack --version" "run test without conntrack"
7checktool "nft --version" "run test without nft tool"
8
9init_net_max=0
10ct_buckets=0
11tmpfile=""
12tmpfile_proc=""
13tmpfile_uniq=""
14ret=0
15have_socat=0
16
17socat -h > /dev/null && have_socat=1
18
19insert_count=2000
20[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
21
22modprobe -q nf_conntrack
23if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then
24	echo "SKIP: conntrack sysctls not available"
25	exit $KSFT_SKIP
26fi
27
28init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1
29ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1
30
31cleanup() {
32	cleanup_all_ns
33
34	rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq"
35
36	# restore original sysctl setting
37	sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
38	sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets
39}
40trap cleanup EXIT
41
42check_max_alias()
43{
44	local expected="$1"
45	# old name, expected to alias to the first, i.e. changing one
46	# changes the other as well.
47	local lv=$(sysctl -n net.nf_conntrack_max)
48
49	if [ $expected -ne "$lv" ];then
50		echo "nf_conntrack_max sysctls should have identical values"
51		exit 1
52	fi
53}
54
55insert_ctnetlink() {
56	local ns="$1"
57	local count="$2"
58	local i=0
59	local bulk=16
60
61	while [ $i -lt $count ] ;do
62		ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \
63			if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
64					  -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
65					  --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \
66					  return;\
67			fi & \
68		done ; wait" 2>/dev/null
69
70		i=$((i+bulk))
71	done
72}
73
74check_ctcount() {
75	local ns="$1"
76	local count="$2"
77	local msg="$3"
78
79	local now=$(ip netns exec "$ns" conntrack -C)
80
81	if [ $now -ne "$count" ] ;then
82		echo "expected $count entries in $ns, not $now: $msg"
83		exit 1
84	fi
85
86	echo "PASS: got $count connections: $msg"
87}
88
89ctresize() {
90	local duration="$1"
91	local now=$(date +%s)
92	local end=$((now + duration))
93
94	while [ $now -lt $end ]; do
95		sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM
96		now=$(date +%s)
97	done
98}
99
100do_rsleep() {
101	local limit="$1"
102	local r=$RANDOM
103
104	r=$((r%limit))
105	sleep "$r"
106}
107
108ct_flush_once() {
109	local ns="$1"
110
111	ip netns exec "$ns" conntrack -F 2>/dev/null
112}
113
114ctflush() {
115	local ns="$1"
116	local duration="$2"
117	local now=$(date +%s)
118	local end=$((now + duration))
119
120	do_rsleep "$duration"
121
122        while [ $now -lt $end ]; do
123		ct_flush_once "$ns"
124		do_rsleep "$duration"
125		now=$(date +%s)
126        done
127}
128
129ct_pingflood()
130{
131	local ns="$1"
132	local duration="$2"
133	local msg="$3"
134	local now=$(date +%s)
135	local end=$((now + duration))
136	local j=0
137	local k=0
138
139        while [ $now -lt $end ]; do
140		j=$((j%256))
141		k=$((k%256))
142
143		ip netns exec "$ns" bash -c \
144			"j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1
145
146		j=$((j+1))
147
148		if [ $j -eq 256 ];then
149			k=$((k+1))
150		fi
151
152		now=$(date +%s)
153	done
154
155	wait
156}
157
158ct_udpflood()
159{
160	local ns="$1"
161	local duration="$2"
162	local now=$(date +%s)
163	local end=$((now + duration))
164
165	[ $have_socat -ne "1" ] && return
166
167        while [ $now -lt $end ]; do
168ip netns exec "$ns" bash<<"EOF"
169	for i in $(seq 1 100);do
170		dport=$(((RANDOM%65536)+1))
171
172		echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" &
173	done > /dev/null 2>&1
174	wait
175EOF
176		now=$(date +%s)
177	done
178}
179
180ct_udpclash()
181{
182	local ns="$1"
183	local duration="$2"
184	local now=$(date +%s)
185	local end=$((now + duration))
186
187	[ -x udpclash ] || return
188
189        while [ $now -lt $end ]; do
190		ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1
191
192		now=$(date +%s)
193	done
194}
195
196# dump to /dev/null.  We don't want dumps to cause infinite loops
197# or use-after-free even when conntrack table is altered while dumps
198# are in progress.
199ct_nulldump()
200{
201	local ns="$1"
202
203	ip netns exec "$ns" conntrack -L > /dev/null 2>&1 &
204
205	# Don't require /proc support in conntrack
206	if [ -r /proc/self/net/nf_conntrack ] ; then
207		ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null &
208	fi
209
210	wait
211}
212
213ct_nulldump_loop()
214{
215	local ns="$1"
216	local duration="$2"
217	local now=$(date +%s)
218	local end=$((now + duration))
219
220        while [ $now -lt $end ]; do
221		ct_nulldump "$ns"
222		sleep $((RANDOM%2))
223		now=$(date +%s)
224	done
225}
226
227change_timeouts()
228{
229	local ns="$1"
230	local r1=$((RANDOM%2))
231	local r2=$((RANDOM%2))
232
233	[ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5))
234	[ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5))
235}
236
237ct_change_timeouts_loop()
238{
239	local ns="$1"
240	local duration="$2"
241	local now=$(date +%s)
242	local end=$((now + duration))
243
244        while [ $now -lt $end ]; do
245		change_timeouts "$ns"
246		sleep $((RANDOM%2))
247		now=$(date +%s)
248	done
249
250	# restore defaults
251	ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30
252	ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30
253}
254
255check_taint()
256{
257	local tainted_then="$1"
258	local msg="$2"
259
260	local tainted_now=0
261
262	if [ "$tainted_then" -ne 0 ];then
263		return
264	fi
265
266	read tainted_now < /proc/sys/kernel/tainted
267
268	if [ "$tainted_now" -eq 0 ];then
269		echo "PASS: $msg"
270	else
271		echo "TAINT: $msg"
272		dmesg
273		exit 1
274	fi
275}
276
277insert_flood()
278{
279	local n="$1"
280	local r=0
281
282	r=$((RANDOM%$insert_count))
283
284	ct_pingflood "$n" "$timeout" "floodresize" &
285	ct_udpflood "$n" "$timeout" &
286	ct_udpclash "$n" "$timeout" &
287
288	insert_ctnetlink "$n" "$r" &
289	ctflush "$n" "$timeout" &
290	ct_nulldump_loop "$n" "$timeout" &
291	ct_change_timeouts_loop "$n" "$timeout" &
292
293	wait
294}
295
296test_floodresize_all()
297{
298	local timeout=20
299	local n=""
300	local tainted_then=""
301
302	read tainted_then < /proc/sys/kernel/tainted
303
304	for n in "$nsclient1" "$nsclient2";do
305		insert_flood "$n" &
306	done
307
308	# resize table constantly while flood/insert/dump/flushs
309	# are happening in parallel.
310	ctresize "$timeout"
311
312	# wait for subshells to complete, everything is limited
313	# by $timeout.
314	wait
315
316	check_taint "$tainted_then" "resize+flood"
317}
318
319check_dump()
320{
321	local ns="$1"
322	local protoname="$2"
323	local c=0
324	local proto=0
325	local proc=0
326	local unique=""
327	local lret=0
328
329	# NOTE: assumes timeouts are large enough to not have
330	# expirations in all following tests.
331	l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l)
332	c=$(ip netns exec "$ns" conntrack -C)
333
334	if [ "$c" -eq 0 ]; then
335		echo "FAIL: conntrack count for $ns is 0"
336		lret=1
337	fi
338
339	if [ "$c" -ne "$l" ]; then
340		echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l"
341		lret=1
342	fi
343
344	# check the dump we retrieved is free of duplicated entries.
345	unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l)
346	if [ "$l" -ne "$unique" ]; then
347		echo "FAIL: listing contained redundant entries for $ns: $l != $unique"
348		diff -u "$tmpfile" "$tmpfile_uniq"
349		lret=1
350	fi
351
352	# we either inserted icmp or only udp, hence, --proto should return same entry count as without filter.
353	proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l)
354	if [ "$l" -ne "$proto" ]; then
355		echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto"
356		diff -u "$tmpfile" "$tmpfile_uniq"
357		lret=1
358	fi
359
360	if [ -r /proc/self/net/nf_conntrack ] ; then
361		proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l")
362
363		if [ "$l" -ne "$proc" ]; then
364			echo "FAIL: proc inconsistency for $ns: $l != $proc"
365			lret=1
366		fi
367
368		proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l)
369		if [ "$l" -ne "$proc" ]; then
370			echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc"
371			diff -u "$tmpfile_proc" "$tmpfile_uniq"
372			lret=1
373		fi
374	fi
375
376	if [ $lret -eq 0 ];then
377		echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)"
378	else
379		echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)"
380		ret=1
381	fi
382}
383
384test_dump_all()
385{
386	local timeout=3
387	local tainted_then=""
388
389	read tainted_then < /proc/sys/kernel/tainted
390
391	ct_flush_once "$nsclient1"
392	ct_flush_once "$nsclient2"
393
394	ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
395
396	ct_pingflood "$nsclient1" $timeout "dumpall" &
397	insert_ctnetlink "$nsclient2" $insert_count
398
399	wait
400
401	check_dump "$nsclient1" "icmp"
402	check_dump "$nsclient2" "udp"
403
404	check_taint "$tainted_then" "test parallel conntrack dumps"
405}
406
407check_sysctl_immutable()
408{
409	local ns="$1"
410	local name="$2"
411	local failhard="$3"
412	local o=0
413	local n=0
414
415	o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
416	n=$((o+1))
417
418	# return value isn't reliable, need to read it back
419	ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null
420
421	n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
422
423	[ -z "$n" ] && return 1
424
425	if [ $o -ne $n ]; then
426		if [ $failhard -gt 0 ] ;then
427			echo "FAIL: net.$name should not be changeable from namespace (now $n)"
428			ret=1
429		fi
430		return 0
431	fi
432
433	return 1
434}
435
436test_conntrack_max_limit()
437{
438	sysctl -q net.netfilter.nf_conntrack_max=100
439	insert_ctnetlink "$nsclient1" 101
440
441	# check netns is clamped by init_net, i.e., either netns follows
442	# init_net value, or a higher pernet limit (compared to init_net) is ignored.
443	check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound"
444
445	sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
446}
447
448test_conntrack_disable()
449{
450	local timeout=2
451
452	# disable conntrack pickups
453	ip netns exec "$nsclient1" nft flush table ip test_ct
454
455	ct_flush_once "$nsclient1"
456	ct_flush_once "$nsclient2"
457
458	ct_pingflood "$nsclient1" "$timeout" "conntrack disable"
459	ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
460
461	# Disabled, should not have picked up any connection.
462	check_ctcount "$nsclient1" 0 "conntrack disabled"
463
464	# This one is still active, expect 1 connection.
465	check_ctcount "$nsclient2" 1 "conntrack enabled"
466}
467
468init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max)
469
470check_max_alias $init_net_max
471
472sysctl -q net.netfilter.nf_conntrack_max="262000"
473check_max_alias 262000
474
475setup_ns nsclient1 nsclient2
476
477# check this only works from init_net
478for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do
479	check_sysctl_immutable "$nsclient1" "net.$n" 1
480done
481
482# won't work on older kernels. If it works, check that the netns obeys the limit
483if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then
484	# subtest: if pernet is changeable, check that reducing it in pernet
485	# limits the pernet entries.  Inverse, pernet clamped by a lower init_net
486	# setting, is already checked by "test_conntrack_max_limit" test.
487
488	ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1
489	insert_ctnetlink "$nsclient1" 2
490	check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound"
491	ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
492fi
493
494for n in "$nsclient1" "$nsclient2";do
495# enable conntrack in both namespaces
496ip netns exec "$n" nft -f - <<EOF
497table ip test_ct {
498	chain input {
499		type filter hook input priority 0
500		ct state new counter
501	}
502}
503EOF
504done
505
506tmpfile=$(mktemp)
507tmpfile_proc=$(mktemp)
508tmpfile_uniq=$(mktemp)
509test_conntrack_max_limit
510test_dump_all
511test_floodresize_all
512test_conntrack_disable
513
514exit $ret
515