1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# This tests basic flowtable functionality. 5# Creates following default topology: 6# 7# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) 8# Router1 is the one doing flow offloading, Router2 has no special 9# purpose other than having a link that is smaller than either Originator 10# and responder, i.e. TCPMSS announced values are too large and will still 11# result in fragmentation and/or PMTU discovery. 12# 13# You can check with different Orgininator/Link/Responder MTU eg: 14# nft_flowtable.sh -o8000 -l1500 -r2000 15# 16 17sfx=$(mktemp -u "XXXXXXXX") 18ns1="ns1-$sfx" 19ns2="ns2-$sfx" 20nsr1="nsr1-$sfx" 21nsr2="nsr2-$sfx" 22 23# Kselftest framework requirement - SKIP code is 4. 24ksft_skip=4 25ret=0 26 27nsin="" 28ns1out="" 29ns2out="" 30 31log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) 32 33checktool (){ 34 if ! $1 > /dev/null 2>&1; then 35 echo "SKIP: Could not $2" 36 exit $ksft_skip 37 fi 38} 39 40checktool "nft --version" "run test without nft tool" 41checktool "ip -Version" "run test without ip tool" 42checktool "which nc" "run test without nc (netcat)" 43checktool "ip netns add $nsr1" "create net namespace $nsr1" 44 45ip netns add $ns1 46ip netns add $ns2 47ip netns add $nsr2 48 49cleanup() { 50 ip netns del $ns1 51 ip netns del $ns2 52 ip netns del $nsr1 53 ip netns del $nsr2 54 55 rm -f "$nsin" "$ns1out" "$ns2out" 56 57 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns 58} 59 60trap cleanup EXIT 61 62sysctl -q net.netfilter.nf_log_all_netns=1 63 64ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1 65ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2 66 67ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2 68 69for dev in lo veth0 veth1; do 70 ip -net $nsr1 link set $dev up 71 ip -net $nsr2 link set $dev up 72done 73 74ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 75ip -net $nsr1 addr add dead:1::1/64 dev veth0 76 77ip -net $nsr2 addr add 10.0.2.1/24 dev veth1 78ip -net $nsr2 addr add dead:2::1/64 dev veth1 79 80# set different MTUs so we need to push packets coming from ns1 (large MTU) 81# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), 82# or to do PTMU discovery (send ICMP error back to originator). 83# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers 84# is NOT the lowest link mtu. 85 86omtu=9000 87lmtu=1500 88rmtu=2000 89 90usage(){ 91 echo "nft_flowtable.sh [OPTIONS]" 92 echo 93 echo "MTU options" 94 echo " -o originator" 95 echo " -l link" 96 echo " -r responder" 97 exit 1 98} 99 100while getopts "o:l:r:" o 101do 102 case $o in 103 o) omtu=$OPTARG;; 104 l) lmtu=$OPTARG;; 105 r) rmtu=$OPTARG;; 106 *) usage;; 107 esac 108done 109 110if ! ip -net $nsr1 link set veth0 mtu $omtu; then 111 exit 1 112fi 113 114ip -net $ns1 link set eth0 mtu $omtu 115 116if ! ip -net $nsr2 link set veth1 mtu $rmtu; then 117 exit 1 118fi 119 120ip -net $ns2 link set eth0 mtu $rmtu 121 122# transfer-net between nsr1 and nsr2. 123# these addresses are not used for connections. 124ip -net $nsr1 addr add 192.168.10.1/24 dev veth1 125ip -net $nsr1 addr add fee1:2::1/64 dev veth1 126 127ip -net $nsr2 addr add 192.168.10.2/24 dev veth0 128ip -net $nsr2 addr add fee1:2::2/64 dev veth0 129 130for i in 0 1; do 131 ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null 132 ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null 133done 134 135for ns in $ns1 $ns2;do 136 ip -net $ns link set lo up 137 ip -net $ns link set eth0 up 138 139 if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then 140 echo "ERROR: Check Originator/Responder values (problem during address addition)" 141 exit 1 142 fi 143 # don't set ip DF bit for first two tests 144 ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null 145done 146 147ip -net $ns1 addr add 10.0.1.99/24 dev eth0 148ip -net $ns2 addr add 10.0.2.99/24 dev eth0 149ip -net $ns1 route add default via 10.0.1.1 150ip -net $ns2 route add default via 10.0.2.1 151ip -net $ns1 addr add dead:1::99/64 dev eth0 152ip -net $ns2 addr add dead:2::99/64 dev eth0 153ip -net $ns1 route add default via dead:1::1 154ip -net $ns2 route add default via dead:2::1 155 156ip -net $nsr1 route add default via 192.168.10.2 157ip -net $nsr2 route add default via 192.168.10.1 158 159ip netns exec $nsr1 nft -f - <<EOF 160table inet filter { 161 flowtable f1 { 162 hook ingress priority 0 163 devices = { veth0, veth1 } 164 } 165 166 counter routed_orig { } 167 counter routed_repl { } 168 169 chain forward { 170 type filter hook forward priority 0; policy drop; 171 172 # flow offloaded? Tag ct with mark 1, so we can detect when it fails. 173 meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept 174 175 # count packets supposedly offloaded as per direction. 176 ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept 177 178 ct state established,related accept 179 180 meta nfproto ipv4 meta l4proto icmp accept 181 meta nfproto ipv6 meta l4proto icmpv6 accept 182 } 183} 184EOF 185 186if [ $? -ne 0 ]; then 187 echo "SKIP: Could not load nft ruleset" 188 exit $ksft_skip 189fi 190 191# test basic connectivity 192if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then 193 echo "ERROR: $ns1 cannot reach ns2" 1>&2 194 exit 1 195fi 196 197if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then 198 echo "ERROR: $ns2 cannot reach $ns1" 1>&2 199 exit 1 200fi 201 202if [ $ret -eq 0 ];then 203 echo "PASS: netns routing/connectivity: $ns1 can reach $ns2" 204fi 205 206nsin=$(mktemp) 207ns1out=$(mktemp) 208ns2out=$(mktemp) 209 210make_file() 211{ 212 name=$1 213 214 SIZE=$((RANDOM % (1024 * 128))) 215 SIZE=$((SIZE + (1024 * 8))) 216 TSIZE=$((SIZE * 1024)) 217 218 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null 219 220 SIZE=$((RANDOM % 1024)) 221 SIZE=$((SIZE + 128)) 222 TSIZE=$((TSIZE + SIZE)) 223 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null 224} 225 226check_counters() 227{ 228 local what=$1 229 local ok=1 230 231 local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets) 232 local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets) 233 234 local orig_cnt=${orig#*bytes} 235 local repl_cnt=${repl#*bytes} 236 237 local fs=$(du -sb $nsin) 238 local max_orig=${fs%%/*} 239 local max_repl=$((max_orig/4)) 240 241 if [ $orig_cnt -gt $max_orig ];then 242 echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2 243 ret=1 244 ok=0 245 fi 246 247 if [ $repl_cnt -gt $max_repl ];then 248 echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2 249 ret=1 250 ok=0 251 fi 252 253 if [ $ok -eq 1 ]; then 254 echo "PASS: $what" 255 fi 256} 257 258check_transfer() 259{ 260 in=$1 261 out=$2 262 what=$3 263 264 if ! cmp "$in" "$out" > /dev/null 2>&1; then 265 echo "FAIL: file mismatch for $what" 1>&2 266 ls -l "$in" 267 ls -l "$out" 268 return 1 269 fi 270 271 return 0 272} 273 274test_tcp_forwarding_ip() 275{ 276 local nsa=$1 277 local nsb=$2 278 local dstip=$3 279 local dstport=$4 280 local lret=0 281 282 ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" & 283 lpid=$! 284 285 sleep 1 286 ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" & 287 cpid=$! 288 289 sleep 3 290 291 if ps -p $lpid > /dev/null;then 292 kill $lpid 293 fi 294 295 if ps -p $cpid > /dev/null;then 296 kill $cpid 297 fi 298 299 wait 300 301 if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then 302 lret=1 303 fi 304 305 if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then 306 lret=1 307 fi 308 309 return $lret 310} 311 312test_tcp_forwarding() 313{ 314 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 315 316 return $? 317} 318 319test_tcp_forwarding_nat() 320{ 321 local lret 322 local pmtu 323 324 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 325 lret=$? 326 327 pmtu=$3 328 what=$4 329 330 if [ $lret -eq 0 ] ; then 331 if [ $pmtu -eq 1 ] ;then 332 check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" 333 else 334 echo "PASS: flow offload for ns1/ns2 with masquerade $what" 335 fi 336 337 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 338 lret=$? 339 if [ $pmtu -eq 1 ] ;then 340 check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" 341 elif [ $lret -eq 0 ] ; then 342 echo "PASS: flow offload for ns1/ns2 with dnat $what" 343 fi 344 fi 345 346 return $lret 347} 348 349make_file "$nsin" 350 351# First test: 352# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. 353# Due to MTU mismatch in both directions, all packets (except small packets like pure 354# acks) have to be handled by normal forwarding path. Therefore, packet counters 355# are not checked. 356if test_tcp_forwarding $ns1 $ns2; then 357 echo "PASS: flow offloaded for ns1/ns2" 358else 359 echo "FAIL: flow offload for ns1/ns2:" 1>&2 360 ip netns exec $nsr1 nft list ruleset 361 ret=1 362fi 363 364# delete default route, i.e. ns2 won't be able to reach ns1 and 365# will depend on ns1 being masqueraded in nsr1. 366# expect ns1 has nsr1 address. 367ip -net $ns2 route del default via 10.0.2.1 368ip -net $ns2 route del default via dead:2::1 369ip -net $ns2 route add 192.168.10.1 via 10.0.2.1 370 371# Second test: 372# Same, but with NAT enabled. Same as in first test: we expect normal forward path 373# to handle most packets. 374ip netns exec $nsr1 nft -f - <<EOF 375table ip nat { 376 chain prerouting { 377 type nat hook prerouting priority 0; policy accept; 378 meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 379 } 380 381 chain postrouting { 382 type nat hook postrouting priority 0; policy accept; 383 meta oifname "veth1" counter masquerade 384 } 385} 386EOF 387 388if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then 389 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 390 ip netns exec $nsr1 nft list ruleset 391 ret=1 392fi 393 394# Third test: 395# Same as second test, but with PMTU discovery enabled. This 396# means that we expect the fastpath to handle packets as soon 397# as the endpoints adjust the packet size. 398ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 399ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 400 401# reset counters. 402# With pmtu in-place we'll also check that nft counters 403# are lower than file size and packets were forwarded via flowtable layer. 404# For earlier tests (large mtus), packets cannot be handled via flowtable 405# (except pure acks and other small packets). 406ip netns exec $nsr1 nft reset counters table inet filter >/dev/null 407 408if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then 409 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 410 ip netns exec $nsr1 nft list ruleset 411fi 412 413# Another test: 414# Add bridge interface br0 to Router1, with NAT enabled. 415ip -net $nsr1 link add name br0 type bridge 416ip -net $nsr1 addr flush dev veth0 417ip -net $nsr1 link set up dev veth0 418ip -net $nsr1 link set veth0 master br0 419ip -net $nsr1 addr add 10.0.1.1/24 dev br0 420ip -net $nsr1 addr add dead:1::1/64 dev br0 421ip -net $nsr1 link set up dev br0 422 423ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null 424 425# br0 with NAT enabled. 426ip netns exec $nsr1 nft -f - <<EOF 427flush table ip nat 428table ip nat { 429 chain prerouting { 430 type nat hook prerouting priority 0; policy accept; 431 meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 432 } 433 434 chain postrouting { 435 type nat hook postrouting priority 0; policy accept; 436 meta oifname "veth1" counter masquerade 437 } 438} 439EOF 440 441if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then 442 echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 443 ip netns exec $nsr1 nft list ruleset 444 ret=1 445fi 446 447 448# Another test: 449# Add bridge interface br0 to Router1, with NAT and VLAN. 450ip -net $nsr1 link set veth0 nomaster 451ip -net $nsr1 link set down dev veth0 452ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10 453ip -net $nsr1 link set up dev veth0 454ip -net $nsr1 link set up dev veth0.10 455ip -net $nsr1 link set veth0.10 master br0 456 457ip -net $ns1 addr flush dev eth0 458ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10 459ip -net $ns1 link set eth0 up 460ip -net $ns1 link set eth0.10 up 461ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10 462ip -net $ns1 route add default via 10.0.1.1 463ip -net $ns1 addr add dead:1::99/64 dev eth0.10 464 465if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then 466 echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 467 ip netns exec $nsr1 nft list ruleset 468 ret=1 469fi 470 471# restore test topology (remove bridge and VLAN) 472ip -net $nsr1 link set veth0 nomaster 473ip -net $nsr1 link set veth0 down 474ip -net $nsr1 link set veth0.10 down 475ip -net $nsr1 link delete veth0.10 type vlan 476ip -net $nsr1 link delete br0 type bridge 477ip -net $ns1 addr flush dev eth0.10 478ip -net $ns1 link set eth0.10 down 479ip -net $ns1 link set eth0 down 480ip -net $ns1 link delete eth0.10 type vlan 481 482# restore address in ns1 and nsr1 483ip -net $ns1 link set eth0 up 484ip -net $ns1 addr add 10.0.1.99/24 dev eth0 485ip -net $ns1 route add default via 10.0.1.1 486ip -net $ns1 addr add dead:1::99/64 dev eth0 487ip -net $ns1 route add default via dead:1::1 488ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 489ip -net $nsr1 addr add dead:1::1/64 dev veth0 490ip -net $nsr1 link set up dev veth0 491 492KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) 493KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) 494SPI1=$RANDOM 495SPI2=$RANDOM 496 497if [ $SPI1 -eq $SPI2 ]; then 498 SPI2=$((SPI2+1)) 499fi 500 501do_esp() { 502 local ns=$1 503 local me=$2 504 local remote=$3 505 local lnet=$4 506 local rnet=$5 507 local spi_out=$6 508 local spi_in=$7 509 510 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet 511 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet 512 513 # to encrypt packets as they go out (includes forwarded packets that need encapsulation) 514 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow 515 # to fwd decrypted packets after esp processing: 516 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow 517 518} 519 520do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 521 522do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 523 524ip netns exec $nsr1 nft delete table ip nat 525 526# restore default routes 527ip -net $ns2 route del 192.168.10.1 via 10.0.2.1 528ip -net $ns2 route add default via 10.0.2.1 529ip -net $ns2 route add default via dead:2::1 530 531if test_tcp_forwarding $ns1 $ns2; then 532 check_counters "ipsec tunnel mode for ns1/ns2" 533else 534 echo "FAIL: ipsec tunnel mode for ns1/ns2" 535 ip netns exec $nsr1 nft list ruleset 1>&2 536 ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2 537fi 538 539exit $ret 540