1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2018 Facebook
3 
4 #include <string.h>
5 
6 #include <linux/stddef.h>
7 #include <linux/bpf.h>
8 #include <linux/in.h>
9 #include <linux/in6.h>
10 #include <linux/tcp.h>
11 #include <linux/if.h>
12 #include <errno.h>
13 
14 #include <bpf/bpf_helpers.h>
15 #include <bpf/bpf_endian.h>
16 
17 #include "bpf_tcp_helpers.h"
18 
19 #define SRC_REWRITE_IP4		0x7f000004U
20 #define DST_REWRITE_IP4		0x7f000001U
21 #define DST_REWRITE_PORT4	4444
22 
23 #ifndef TCP_CA_NAME_MAX
24 #define TCP_CA_NAME_MAX 16
25 #endif
26 
27 #ifndef TCP_NOTSENT_LOWAT
28 #define TCP_NOTSENT_LOWAT 25
29 #endif
30 
31 #ifndef IFNAMSIZ
32 #define IFNAMSIZ 16
33 #endif
34 
35 __attribute__ ((noinline))
do_bind(struct bpf_sock_addr * ctx)36 int do_bind(struct bpf_sock_addr *ctx)
37 {
38 	struct sockaddr_in sa = {};
39 
40 	sa.sin_family = AF_INET;
41 	sa.sin_port = bpf_htons(0);
42 	sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
43 
44 	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
45 		return 0;
46 
47 	return 1;
48 }
49 
verify_cc(struct bpf_sock_addr * ctx,char expected[TCP_CA_NAME_MAX])50 static __inline int verify_cc(struct bpf_sock_addr *ctx,
51 			      char expected[TCP_CA_NAME_MAX])
52 {
53 	char buf[TCP_CA_NAME_MAX];
54 	int i;
55 
56 	if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
57 		return 1;
58 
59 	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
60 		if (buf[i] != expected[i])
61 			return 1;
62 		if (buf[i] == 0)
63 			break;
64 	}
65 
66 	return 0;
67 }
68 
set_cc(struct bpf_sock_addr * ctx)69 static __inline int set_cc(struct bpf_sock_addr *ctx)
70 {
71 	char reno[TCP_CA_NAME_MAX] = "reno";
72 	char cubic[TCP_CA_NAME_MAX] = "cubic";
73 
74 	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
75 		return 1;
76 	if (verify_cc(ctx, reno))
77 		return 1;
78 
79 	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
80 		return 1;
81 	if (verify_cc(ctx, cubic))
82 		return 1;
83 
84 	return 0;
85 }
86 
bind_to_device(struct bpf_sock_addr * ctx)87 static __inline int bind_to_device(struct bpf_sock_addr *ctx)
88 {
89 	char veth1[IFNAMSIZ] = "test_sock_addr1";
90 	char veth2[IFNAMSIZ] = "test_sock_addr2";
91 	char missing[IFNAMSIZ] = "nonexistent_dev";
92 	char del_bind[IFNAMSIZ] = "";
93 
94 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
95 				&veth1, sizeof(veth1)))
96 		return 1;
97 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
98 				&veth2, sizeof(veth2)))
99 		return 1;
100 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
101 				&missing, sizeof(missing)) != -ENODEV)
102 		return 1;
103 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
104 				&del_bind, sizeof(del_bind)))
105 		return 1;
106 
107 	return 0;
108 }
109 
set_keepalive(struct bpf_sock_addr * ctx)110 static __inline int set_keepalive(struct bpf_sock_addr *ctx)
111 {
112 	int zero = 0, one = 1;
113 
114 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
115 		return 1;
116 	if (ctx->type == SOCK_STREAM) {
117 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
118 			return 1;
119 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
120 			return 1;
121 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
122 			return 1;
123 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
124 			return 1;
125 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
126 			return 1;
127 	}
128 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
129 		return 1;
130 
131 	return 0;
132 }
133 
set_notsent_lowat(struct bpf_sock_addr * ctx)134 static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
135 {
136 	int lowat = 65535;
137 
138 	if (ctx->type == SOCK_STREAM) {
139 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
140 			return 1;
141 	}
142 
143 	return 0;
144 }
145 
146 SEC("cgroup/connect4")
connect_v4_prog(struct bpf_sock_addr * ctx)147 int connect_v4_prog(struct bpf_sock_addr *ctx)
148 {
149 	struct bpf_sock_tuple tuple = {};
150 	struct bpf_sock *sk;
151 
152 	/* Verify that new destination is available. */
153 	memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
154 	memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
155 
156 	tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
157 	tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
158 
159 	/* Bind to device and unbind it. */
160 	if (bind_to_device(ctx))
161 		return 0;
162 
163 	if (set_keepalive(ctx))
164 		return 0;
165 
166 	if (set_notsent_lowat(ctx))
167 		return 0;
168 
169 	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
170 		return 0;
171 	else if (ctx->type == SOCK_STREAM)
172 		sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4),
173 				       BPF_F_CURRENT_NETNS, 0);
174 	else
175 		sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4),
176 				       BPF_F_CURRENT_NETNS, 0);
177 
178 	if (!sk)
179 		return 0;
180 
181 	if (sk->src_ip4 != tuple.ipv4.daddr ||
182 	    sk->src_port != DST_REWRITE_PORT4) {
183 		bpf_sk_release(sk);
184 		return 0;
185 	}
186 
187 	bpf_sk_release(sk);
188 
189 	/* Rewrite congestion control. */
190 	if (ctx->type == SOCK_STREAM && set_cc(ctx))
191 		return 0;
192 
193 	/* Rewrite destination. */
194 	ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
195 	ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
196 
197 	return do_bind(ctx) ? 1 : 0;
198 }
199 
200 char _license[] SEC("license") = "GPL";
201