1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org>
4 */
5
6 #include <linux/kernel.h>
7 #include <linux/if_vlan.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/netlink.h>
11 #include <linux/netfilter.h>
12 #include <linux/netfilter/nf_tables.h>
13 #include <net/netfilter/nf_tables_core.h>
14 #include <net/netfilter/nf_tables.h>
15 #include <net/netfilter/nft_meta.h>
16 #include <net/netfilter/nf_tables_offload.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <net/gre.h>
20 #include <net/geneve.h>
21 #include <net/ip.h>
22 #include <linux/icmpv6.h>
23 #include <linux/ip.h>
24 #include <linux/ipv6.h>
25
26 struct nft_inner_tun_ctx_locked {
27 struct nft_inner_tun_ctx ctx;
28 local_lock_t bh_lock;
29 };
30
31 static DEFINE_PER_CPU(struct nft_inner_tun_ctx_locked, nft_pcpu_tun_ctx) = {
32 .bh_lock = INIT_LOCAL_LOCK(bh_lock),
33 };
34
35 /* Same layout as nft_expr but it embeds the private expression data area. */
36 struct __nft_expr {
37 const struct nft_expr_ops *ops;
38 union {
39 struct nft_payload payload;
40 struct nft_meta meta;
41 } __attribute__((aligned(__alignof__(u64))));
42 };
43
44 enum {
45 NFT_INNER_EXPR_PAYLOAD,
46 NFT_INNER_EXPR_META,
47 };
48
49 struct nft_inner {
50 u8 flags;
51 u8 hdrsize;
52 u8 type;
53 u8 expr_type;
54
55 struct __nft_expr expr;
56 };
57
nft_inner_parse_l2l3(const struct nft_inner * priv,const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * ctx,u32 off)58 static int nft_inner_parse_l2l3(const struct nft_inner *priv,
59 const struct nft_pktinfo *pkt,
60 struct nft_inner_tun_ctx *ctx, u32 off)
61 {
62 __be16 llproto, outer_llproto;
63 u32 nhoff, thoff;
64
65 if (priv->flags & NFT_INNER_LL) {
66 struct vlan_ethhdr *veth, _veth;
67 struct ethhdr *eth, _eth;
68 u32 hdrsize;
69
70 eth = skb_header_pointer(pkt->skb, off, sizeof(_eth), &_eth);
71 if (!eth)
72 return -1;
73
74 switch (eth->h_proto) {
75 case htons(ETH_P_IP):
76 case htons(ETH_P_IPV6):
77 llproto = eth->h_proto;
78 hdrsize = sizeof(_eth);
79 break;
80 case htons(ETH_P_8021Q):
81 veth = skb_header_pointer(pkt->skb, off, sizeof(_veth), &_veth);
82 if (!veth)
83 return -1;
84
85 outer_llproto = veth->h_vlan_encapsulated_proto;
86 llproto = veth->h_vlan_proto;
87 hdrsize = sizeof(_veth);
88 break;
89 default:
90 return -1;
91 }
92
93 ctx->inner_lloff = off;
94 ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL;
95 off += hdrsize;
96 } else {
97 struct iphdr *iph;
98 u32 _version;
99
100 iph = skb_header_pointer(pkt->skb, off, sizeof(_version), &_version);
101 if (!iph)
102 return -1;
103
104 switch (iph->version) {
105 case 4:
106 llproto = htons(ETH_P_IP);
107 break;
108 case 6:
109 llproto = htons(ETH_P_IPV6);
110 break;
111 default:
112 return -1;
113 }
114 }
115
116 ctx->llproto = llproto;
117 if (llproto == htons(ETH_P_8021Q))
118 llproto = outer_llproto;
119
120 nhoff = off;
121
122 switch (llproto) {
123 case htons(ETH_P_IP): {
124 struct iphdr *iph, _iph;
125
126 iph = skb_header_pointer(pkt->skb, nhoff, sizeof(_iph), &_iph);
127 if (!iph)
128 return -1;
129
130 if (iph->ihl < 5 || iph->version != 4)
131 return -1;
132
133 ctx->inner_nhoff = nhoff;
134 ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH;
135
136 thoff = nhoff + (iph->ihl * 4);
137 if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) {
138 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
139 ctx->inner_thoff = thoff;
140 ctx->l4proto = iph->protocol;
141 }
142 }
143 break;
144 case htons(ETH_P_IPV6): {
145 struct ipv6hdr *ip6h, _ip6h;
146 int fh_flags = IP6_FH_F_AUTH;
147 unsigned short fragoff;
148 int l4proto;
149
150 ip6h = skb_header_pointer(pkt->skb, nhoff, sizeof(_ip6h), &_ip6h);
151 if (!ip6h)
152 return -1;
153
154 if (ip6h->version != 6)
155 return -1;
156
157 ctx->inner_nhoff = nhoff;
158 ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH;
159
160 thoff = nhoff;
161 l4proto = ipv6_find_hdr(pkt->skb, &thoff, -1, &fragoff, &fh_flags);
162 if (l4proto < 0 || thoff > U16_MAX)
163 return -1;
164
165 if (fragoff == 0) {
166 thoff = nhoff + sizeof(_ip6h);
167 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
168 ctx->inner_thoff = thoff;
169 ctx->l4proto = l4proto;
170 }
171 }
172 break;
173 default:
174 return -1;
175 }
176
177 return 0;
178 }
179
nft_inner_parse_tunhdr(const struct nft_inner * priv,const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * ctx,u32 * off)180 static int nft_inner_parse_tunhdr(const struct nft_inner *priv,
181 const struct nft_pktinfo *pkt,
182 struct nft_inner_tun_ctx *ctx, u32 *off)
183 {
184 if (pkt->tprot == IPPROTO_GRE) {
185 ctx->inner_tunoff = pkt->thoff;
186 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN;
187 return 0;
188 }
189
190 if (pkt->tprot != IPPROTO_UDP)
191 return -1;
192
193 ctx->inner_tunoff = *off;
194 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN;
195 *off += priv->hdrsize;
196
197 switch (priv->type) {
198 case NFT_INNER_GENEVE: {
199 struct genevehdr *gnvh, _gnvh;
200
201 gnvh = skb_header_pointer(pkt->skb, pkt->inneroff,
202 sizeof(_gnvh), &_gnvh);
203 if (!gnvh)
204 return -1;
205
206 *off += gnvh->opt_len * 4;
207 }
208 break;
209 default:
210 break;
211 }
212
213 return 0;
214 }
215
nft_inner_parse(const struct nft_inner * priv,struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * tun_ctx)216 static int nft_inner_parse(const struct nft_inner *priv,
217 struct nft_pktinfo *pkt,
218 struct nft_inner_tun_ctx *tun_ctx)
219 {
220 u32 off = pkt->inneroff;
221
222 if (priv->flags & NFT_INNER_HDRSIZE &&
223 nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0)
224 return -1;
225
226 if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) {
227 if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0)
228 return -1;
229 } else if (priv->flags & NFT_INNER_TH) {
230 tun_ctx->inner_thoff = off;
231 tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
232 }
233
234 tun_ctx->type = priv->type;
235 tun_ctx->cookie = (unsigned long)pkt->skb;
236 pkt->flags |= NFT_PKTINFO_INNER_FULL;
237
238 return 0;
239 }
240
nft_inner_restore_tun_ctx(const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * tun_ctx)241 static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt,
242 struct nft_inner_tun_ctx *tun_ctx)
243 {
244 struct nft_inner_tun_ctx *this_cpu_tun_ctx;
245
246 local_bh_disable();
247 local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
248 this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx);
249 if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) {
250 local_bh_enable();
251 local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
252 return false;
253 }
254 *tun_ctx = *this_cpu_tun_ctx;
255 local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
256 local_bh_enable();
257
258 return true;
259 }
260
nft_inner_save_tun_ctx(const struct nft_pktinfo * pkt,const struct nft_inner_tun_ctx * tun_ctx)261 static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt,
262 const struct nft_inner_tun_ctx *tun_ctx)
263 {
264 struct nft_inner_tun_ctx *this_cpu_tun_ctx;
265
266 local_bh_disable();
267 local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
268 this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx);
269 if (this_cpu_tun_ctx->cookie != tun_ctx->cookie)
270 *this_cpu_tun_ctx = *tun_ctx;
271 local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
272 local_bh_enable();
273 }
274
nft_inner_parse_needed(const struct nft_inner * priv,const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * tun_ctx)275 static bool nft_inner_parse_needed(const struct nft_inner *priv,
276 const struct nft_pktinfo *pkt,
277 struct nft_inner_tun_ctx *tun_ctx)
278 {
279 if (!(pkt->flags & NFT_PKTINFO_INNER_FULL))
280 return true;
281
282 if (!nft_inner_restore_tun_ctx(pkt, tun_ctx))
283 return true;
284
285 if (priv->type != tun_ctx->type)
286 return true;
287
288 return false;
289 }
290
nft_inner_eval(const struct nft_expr * expr,struct nft_regs * regs,const struct nft_pktinfo * pkt)291 static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs,
292 const struct nft_pktinfo *pkt)
293 {
294 const struct nft_inner *priv = nft_expr_priv(expr);
295 struct nft_inner_tun_ctx tun_ctx = {};
296
297 if (nft_payload_inner_offset(pkt) < 0)
298 goto err;
299
300 if (nft_inner_parse_needed(priv, pkt, &tun_ctx) &&
301 nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0)
302 goto err;
303
304 switch (priv->expr_type) {
305 case NFT_INNER_EXPR_PAYLOAD:
306 nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
307 break;
308 case NFT_INNER_EXPR_META:
309 nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
310 break;
311 default:
312 WARN_ON_ONCE(1);
313 goto err;
314 }
315 nft_inner_save_tun_ctx(pkt, &tun_ctx);
316
317 return;
318 err:
319 regs->verdict.code = NFT_BREAK;
320 }
321
322 static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = {
323 [NFTA_INNER_NUM] = { .type = NLA_U32 },
324 [NFTA_INNER_FLAGS] = { .type = NLA_U32 },
325 [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 },
326 [NFTA_INNER_TYPE] = { .type = NLA_U32 },
327 [NFTA_INNER_EXPR] = { .type = NLA_NESTED },
328 };
329
330 struct nft_expr_info {
331 const struct nft_expr_ops *ops;
332 const struct nlattr *attr;
333 struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
334 };
335
nft_inner_init(const struct nft_ctx * ctx,const struct nft_expr * expr,const struct nlattr * const tb[])336 static int nft_inner_init(const struct nft_ctx *ctx,
337 const struct nft_expr *expr,
338 const struct nlattr * const tb[])
339 {
340 struct nft_inner *priv = nft_expr_priv(expr);
341 u32 flags, hdrsize, type, num;
342 struct nft_expr_info expr_info;
343 int err;
344
345 if (!tb[NFTA_INNER_FLAGS] ||
346 !tb[NFTA_INNER_NUM] ||
347 !tb[NFTA_INNER_HDRSIZE] ||
348 !tb[NFTA_INNER_TYPE] ||
349 !tb[NFTA_INNER_EXPR])
350 return -EINVAL;
351
352 flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS]));
353 if (flags & ~NFT_INNER_MASK)
354 return -EOPNOTSUPP;
355
356 num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM]));
357 if (num != 0)
358 return -EOPNOTSUPP;
359
360 hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE]));
361 type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE]));
362
363 if (type > U8_MAX)
364 return -EINVAL;
365
366 if (flags & NFT_INNER_HDRSIZE) {
367 if (hdrsize == 0 || hdrsize > 64)
368 return -EOPNOTSUPP;
369 }
370
371 priv->flags = flags;
372 priv->hdrsize = hdrsize;
373 priv->type = type;
374
375 err = nft_expr_inner_parse(ctx, tb[NFTA_INNER_EXPR], &expr_info);
376 if (err < 0)
377 return err;
378
379 priv->expr.ops = expr_info.ops;
380
381 if (!strcmp(expr_info.ops->type->name, "payload"))
382 priv->expr_type = NFT_INNER_EXPR_PAYLOAD;
383 else if (!strcmp(expr_info.ops->type->name, "meta"))
384 priv->expr_type = NFT_INNER_EXPR_META;
385 else
386 return -EINVAL;
387
388 err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr,
389 (const struct nlattr * const*)expr_info.tb);
390 if (err < 0)
391 return err;
392
393 return 0;
394 }
395
nft_inner_dump(struct sk_buff * skb,const struct nft_expr * expr,bool reset)396 static int nft_inner_dump(struct sk_buff *skb,
397 const struct nft_expr *expr, bool reset)
398 {
399 const struct nft_inner *priv = nft_expr_priv(expr);
400
401 if (nla_put_be32(skb, NFTA_INNER_NUM, htonl(0)) ||
402 nla_put_be32(skb, NFTA_INNER_TYPE, htonl(priv->type)) ||
403 nla_put_be32(skb, NFTA_INNER_FLAGS, htonl(priv->flags)) ||
404 nla_put_be32(skb, NFTA_INNER_HDRSIZE, htonl(priv->hdrsize)))
405 goto nla_put_failure;
406
407 if (nft_expr_dump(skb, NFTA_INNER_EXPR,
408 (struct nft_expr *)&priv->expr, reset) < 0)
409 goto nla_put_failure;
410
411 return 0;
412
413 nla_put_failure:
414 return -1;
415 }
416
417 static const struct nft_expr_ops nft_inner_ops = {
418 .type = &nft_inner_type,
419 .size = NFT_EXPR_SIZE(sizeof(struct nft_inner)),
420 .eval = nft_inner_eval,
421 .init = nft_inner_init,
422 .dump = nft_inner_dump,
423 };
424
425 struct nft_expr_type nft_inner_type __read_mostly = {
426 .name = "inner",
427 .ops = &nft_inner_ops,
428 .policy = nft_inner_policy,
429 .maxattr = NFTA_INNER_MAX,
430 .owner = THIS_MODULE,
431 };
432