1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/rculist.h>
4 #include <linux/list.h>
5 #include <linux/hash.h>
6 #include <linux/types.h>
7 #include <linux/spinlock.h>
8 #include <linux/bpf.h>
9 #include <linux/btf.h>
10 #include <linux/btf_ids.h>
11 #include <linux/bpf_local_storage.h>
12 #include <net/bpf_sk_storage.h>
13 #include <net/sock.h>
14 #include <uapi/linux/sock_diag.h>
15 #include <uapi/linux/btf.h>
16 #include <linux/rcupdate_trace.h>
17
18 DEFINE_BPF_STORAGE_CACHE(sk_cache);
19
20 static struct bpf_local_storage_data *
bpf_sk_storage_lookup(struct sock * sk,struct bpf_map * map,bool cacheit_lockit)21 bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
22 {
23 struct bpf_local_storage *sk_storage;
24 struct bpf_local_storage_map *smap;
25
26 sk_storage =
27 rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held());
28 if (!sk_storage)
29 return NULL;
30
31 smap = (struct bpf_local_storage_map *)map;
32 return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit);
33 }
34
bpf_sk_storage_del(struct sock * sk,struct bpf_map * map)35 static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
36 {
37 struct bpf_local_storage_data *sdata;
38
39 sdata = bpf_sk_storage_lookup(sk, map, false);
40 if (!sdata)
41 return -ENOENT;
42
43 bpf_selem_unlink(SELEM(sdata), true);
44
45 return 0;
46 }
47
48 /* Called by __sk_destruct() & bpf_sk_storage_clone() */
bpf_sk_storage_free(struct sock * sk)49 void bpf_sk_storage_free(struct sock *sk)
50 {
51 struct bpf_local_storage *sk_storage;
52 bool free_sk_storage = false;
53
54 rcu_read_lock();
55 sk_storage = rcu_dereference(sk->sk_bpf_storage);
56 if (!sk_storage) {
57 rcu_read_unlock();
58 return;
59 }
60
61 raw_spin_lock_bh(&sk_storage->lock);
62 free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage);
63 raw_spin_unlock_bh(&sk_storage->lock);
64 rcu_read_unlock();
65
66 if (free_sk_storage)
67 kfree_rcu(sk_storage, rcu);
68 }
69
bpf_sk_storage_map_free(struct bpf_map * map)70 static void bpf_sk_storage_map_free(struct bpf_map *map)
71 {
72 bpf_local_storage_map_free(map, &sk_cache, NULL);
73 }
74
bpf_sk_storage_map_alloc(union bpf_attr * attr)75 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
76 {
77 return bpf_local_storage_map_alloc(attr, &sk_cache);
78 }
79
notsupp_get_next_key(struct bpf_map * map,void * key,void * next_key)80 static int notsupp_get_next_key(struct bpf_map *map, void *key,
81 void *next_key)
82 {
83 return -ENOTSUPP;
84 }
85
bpf_fd_sk_storage_lookup_elem(struct bpf_map * map,void * key)86 static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
87 {
88 struct bpf_local_storage_data *sdata;
89 struct socket *sock;
90 int fd, err;
91
92 fd = *(int *)key;
93 sock = sockfd_lookup(fd, &err);
94 if (sock) {
95 sdata = bpf_sk_storage_lookup(sock->sk, map, true);
96 sockfd_put(sock);
97 return sdata ? sdata->data : NULL;
98 }
99
100 return ERR_PTR(err);
101 }
102
bpf_fd_sk_storage_update_elem(struct bpf_map * map,void * key,void * value,u64 map_flags)103 static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
104 void *value, u64 map_flags)
105 {
106 struct bpf_local_storage_data *sdata;
107 struct socket *sock;
108 int fd, err;
109
110 fd = *(int *)key;
111 sock = sockfd_lookup(fd, &err);
112 if (sock) {
113 sdata = bpf_local_storage_update(
114 sock->sk, (struct bpf_local_storage_map *)map, value,
115 map_flags, GFP_ATOMIC);
116 sockfd_put(sock);
117 return PTR_ERR_OR_ZERO(sdata);
118 }
119
120 return err;
121 }
122
bpf_fd_sk_storage_delete_elem(struct bpf_map * map,void * key)123 static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
124 {
125 struct socket *sock;
126 int fd, err;
127
128 fd = *(int *)key;
129 sock = sockfd_lookup(fd, &err);
130 if (sock) {
131 err = bpf_sk_storage_del(sock->sk, map);
132 sockfd_put(sock);
133 return err;
134 }
135
136 return err;
137 }
138
139 static struct bpf_local_storage_elem *
bpf_sk_storage_clone_elem(struct sock * newsk,struct bpf_local_storage_map * smap,struct bpf_local_storage_elem * selem)140 bpf_sk_storage_clone_elem(struct sock *newsk,
141 struct bpf_local_storage_map *smap,
142 struct bpf_local_storage_elem *selem)
143 {
144 struct bpf_local_storage_elem *copy_selem;
145
146 copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
147 if (!copy_selem)
148 return NULL;
149
150 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
151 copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
152 SDATA(selem)->data, true);
153 else
154 copy_map_value(&smap->map, SDATA(copy_selem)->data,
155 SDATA(selem)->data);
156
157 return copy_selem;
158 }
159
bpf_sk_storage_clone(const struct sock * sk,struct sock * newsk)160 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
161 {
162 struct bpf_local_storage *new_sk_storage = NULL;
163 struct bpf_local_storage *sk_storage;
164 struct bpf_local_storage_elem *selem;
165 int ret = 0;
166
167 RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
168
169 rcu_read_lock();
170 sk_storage = rcu_dereference(sk->sk_bpf_storage);
171
172 if (!sk_storage || hlist_empty(&sk_storage->list))
173 goto out;
174
175 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
176 struct bpf_local_storage_elem *copy_selem;
177 struct bpf_local_storage_map *smap;
178 struct bpf_map *map;
179
180 smap = rcu_dereference(SDATA(selem)->smap);
181 if (!(smap->map.map_flags & BPF_F_CLONE))
182 continue;
183
184 /* Note that for lockless listeners adding new element
185 * here can race with cleanup in bpf_local_storage_map_free.
186 * Try to grab map refcnt to make sure that it's still
187 * alive and prevent concurrent removal.
188 */
189 map = bpf_map_inc_not_zero(&smap->map);
190 if (IS_ERR(map))
191 continue;
192
193 copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
194 if (!copy_selem) {
195 ret = -ENOMEM;
196 bpf_map_put(map);
197 goto out;
198 }
199
200 if (new_sk_storage) {
201 bpf_selem_link_map(smap, copy_selem);
202 bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
203 } else {
204 ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
205 if (ret) {
206 kfree(copy_selem);
207 atomic_sub(smap->elem_size,
208 &newsk->sk_omem_alloc);
209 bpf_map_put(map);
210 goto out;
211 }
212
213 new_sk_storage =
214 rcu_dereference(copy_selem->local_storage);
215 }
216 bpf_map_put(map);
217 }
218
219 out:
220 rcu_read_unlock();
221
222 /* In case of an error, don't free anything explicitly here, the
223 * caller is responsible to call bpf_sk_storage_free.
224 */
225
226 return ret;
227 }
228
229 /* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_sk_storage_get,struct bpf_map *,map,struct sock *,sk,void *,value,u64,flags,gfp_t,gfp_flags)230 BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
231 void *, value, u64, flags, gfp_t, gfp_flags)
232 {
233 struct bpf_local_storage_data *sdata;
234
235 WARN_ON_ONCE(!bpf_rcu_lock_held());
236 if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
237 return (unsigned long)NULL;
238
239 sdata = bpf_sk_storage_lookup(sk, map, true);
240 if (sdata)
241 return (unsigned long)sdata->data;
242
243 if (flags == BPF_SK_STORAGE_GET_F_CREATE &&
244 /* Cannot add new elem to a going away sk.
245 * Otherwise, the new elem may become a leak
246 * (and also other memory issues during map
247 * destruction).
248 */
249 refcount_inc_not_zero(&sk->sk_refcnt)) {
250 sdata = bpf_local_storage_update(
251 sk, (struct bpf_local_storage_map *)map, value,
252 BPF_NOEXIST, gfp_flags);
253 /* sk must be a fullsock (guaranteed by verifier),
254 * so sock_gen_put() is unnecessary.
255 */
256 sock_put(sk);
257 return IS_ERR(sdata) ?
258 (unsigned long)NULL : (unsigned long)sdata->data;
259 }
260
261 return (unsigned long)NULL;
262 }
263
BPF_CALL_2(bpf_sk_storage_delete,struct bpf_map *,map,struct sock *,sk)264 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
265 {
266 WARN_ON_ONCE(!bpf_rcu_lock_held());
267 if (!sk || !sk_fullsock(sk))
268 return -EINVAL;
269
270 if (refcount_inc_not_zero(&sk->sk_refcnt)) {
271 int err;
272
273 err = bpf_sk_storage_del(sk, map);
274 sock_put(sk);
275 return err;
276 }
277
278 return -ENOENT;
279 }
280
bpf_sk_storage_charge(struct bpf_local_storage_map * smap,void * owner,u32 size)281 static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
282 void *owner, u32 size)
283 {
284 int optmem_max = READ_ONCE(sysctl_optmem_max);
285 struct sock *sk = (struct sock *)owner;
286
287 /* same check as in sock_kmalloc() */
288 if (size <= optmem_max &&
289 atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
290 atomic_add(size, &sk->sk_omem_alloc);
291 return 0;
292 }
293
294 return -ENOMEM;
295 }
296
bpf_sk_storage_uncharge(struct bpf_local_storage_map * smap,void * owner,u32 size)297 static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap,
298 void *owner, u32 size)
299 {
300 struct sock *sk = owner;
301
302 atomic_sub(size, &sk->sk_omem_alloc);
303 }
304
305 static struct bpf_local_storage __rcu **
bpf_sk_storage_ptr(void * owner)306 bpf_sk_storage_ptr(void *owner)
307 {
308 struct sock *sk = owner;
309
310 return &sk->sk_bpf_storage;
311 }
312
313 const struct bpf_map_ops sk_storage_map_ops = {
314 .map_meta_equal = bpf_map_meta_equal,
315 .map_alloc_check = bpf_local_storage_map_alloc_check,
316 .map_alloc = bpf_sk_storage_map_alloc,
317 .map_free = bpf_sk_storage_map_free,
318 .map_get_next_key = notsupp_get_next_key,
319 .map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
320 .map_update_elem = bpf_fd_sk_storage_update_elem,
321 .map_delete_elem = bpf_fd_sk_storage_delete_elem,
322 .map_check_btf = bpf_local_storage_map_check_btf,
323 .map_btf_id = &bpf_local_storage_map_btf_id[0],
324 .map_local_storage_charge = bpf_sk_storage_charge,
325 .map_local_storage_uncharge = bpf_sk_storage_uncharge,
326 .map_owner_storage_ptr = bpf_sk_storage_ptr,
327 };
328
329 const struct bpf_func_proto bpf_sk_storage_get_proto = {
330 .func = bpf_sk_storage_get,
331 .gpl_only = false,
332 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
333 .arg1_type = ARG_CONST_MAP_PTR,
334 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
335 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
336 .arg4_type = ARG_ANYTHING,
337 };
338
339 const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = {
340 .func = bpf_sk_storage_get,
341 .gpl_only = false,
342 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
343 .arg1_type = ARG_CONST_MAP_PTR,
344 .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */
345 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
346 .arg4_type = ARG_ANYTHING,
347 };
348
349 const struct bpf_func_proto bpf_sk_storage_delete_proto = {
350 .func = bpf_sk_storage_delete,
351 .gpl_only = false,
352 .ret_type = RET_INTEGER,
353 .arg1_type = ARG_CONST_MAP_PTR,
354 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
355 };
356
bpf_sk_storage_tracing_allowed(const struct bpf_prog * prog)357 static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
358 {
359 const struct btf *btf_vmlinux;
360 const struct btf_type *t;
361 const char *tname;
362 u32 btf_id;
363
364 if (prog->aux->dst_prog)
365 return false;
366
367 /* Ensure the tracing program is not tracing
368 * any bpf_sk_storage*() function and also
369 * use the bpf_sk_storage_(get|delete) helper.
370 */
371 switch (prog->expected_attach_type) {
372 case BPF_TRACE_ITER:
373 case BPF_TRACE_RAW_TP:
374 /* bpf_sk_storage has no trace point */
375 return true;
376 case BPF_TRACE_FENTRY:
377 case BPF_TRACE_FEXIT:
378 btf_vmlinux = bpf_get_btf_vmlinux();
379 if (IS_ERR_OR_NULL(btf_vmlinux))
380 return false;
381 btf_id = prog->aux->attach_btf_id;
382 t = btf_type_by_id(btf_vmlinux, btf_id);
383 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
384 return !!strncmp(tname, "bpf_sk_storage",
385 strlen("bpf_sk_storage"));
386 default:
387 return false;
388 }
389
390 return false;
391 }
392
393 /* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_sk_storage_get_tracing,struct bpf_map *,map,struct sock *,sk,void *,value,u64,flags,gfp_t,gfp_flags)394 BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
395 void *, value, u64, flags, gfp_t, gfp_flags)
396 {
397 WARN_ON_ONCE(!bpf_rcu_lock_held());
398 if (in_hardirq() || in_nmi())
399 return (unsigned long)NULL;
400
401 return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
402 gfp_flags);
403 }
404
BPF_CALL_2(bpf_sk_storage_delete_tracing,struct bpf_map *,map,struct sock *,sk)405 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
406 struct sock *, sk)
407 {
408 WARN_ON_ONCE(!bpf_rcu_lock_held());
409 if (in_hardirq() || in_nmi())
410 return -EPERM;
411
412 return ____bpf_sk_storage_delete(map, sk);
413 }
414
415 const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = {
416 .func = bpf_sk_storage_get_tracing,
417 .gpl_only = false,
418 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
419 .arg1_type = ARG_CONST_MAP_PTR,
420 .arg2_type = ARG_PTR_TO_BTF_ID,
421 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
422 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
423 .arg4_type = ARG_ANYTHING,
424 .allowed = bpf_sk_storage_tracing_allowed,
425 };
426
427 const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = {
428 .func = bpf_sk_storage_delete_tracing,
429 .gpl_only = false,
430 .ret_type = RET_INTEGER,
431 .arg1_type = ARG_CONST_MAP_PTR,
432 .arg2_type = ARG_PTR_TO_BTF_ID,
433 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
434 .allowed = bpf_sk_storage_tracing_allowed,
435 };
436
437 struct bpf_sk_storage_diag {
438 u32 nr_maps;
439 struct bpf_map *maps[];
440 };
441
442 /* The reply will be like:
443 * INET_DIAG_BPF_SK_STORAGES (nla_nest)
444 * SK_DIAG_BPF_STORAGE (nla_nest)
445 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
446 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
447 * SK_DIAG_BPF_STORAGE (nla_nest)
448 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
449 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
450 * ....
451 */
nla_value_size(u32 value_size)452 static int nla_value_size(u32 value_size)
453 {
454 /* SK_DIAG_BPF_STORAGE (nla_nest)
455 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
456 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
457 */
458 return nla_total_size(0) + nla_total_size(sizeof(u32)) +
459 nla_total_size_64bit(value_size);
460 }
461
bpf_sk_storage_diag_free(struct bpf_sk_storage_diag * diag)462 void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
463 {
464 u32 i;
465
466 if (!diag)
467 return;
468
469 for (i = 0; i < diag->nr_maps; i++)
470 bpf_map_put(diag->maps[i]);
471
472 kfree(diag);
473 }
474 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free);
475
diag_check_dup(const struct bpf_sk_storage_diag * diag,const struct bpf_map * map)476 static bool diag_check_dup(const struct bpf_sk_storage_diag *diag,
477 const struct bpf_map *map)
478 {
479 u32 i;
480
481 for (i = 0; i < diag->nr_maps; i++) {
482 if (diag->maps[i] == map)
483 return true;
484 }
485
486 return false;
487 }
488
489 struct bpf_sk_storage_diag *
bpf_sk_storage_diag_alloc(const struct nlattr * nla_stgs)490 bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
491 {
492 struct bpf_sk_storage_diag *diag;
493 struct nlattr *nla;
494 u32 nr_maps = 0;
495 int rem, err;
496
497 /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as
498 * the map_alloc_check() side also does.
499 */
500 if (!bpf_capable())
501 return ERR_PTR(-EPERM);
502
503 nla_for_each_nested(nla, nla_stgs, rem) {
504 if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
505 nr_maps++;
506 }
507
508 diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
509 if (!diag)
510 return ERR_PTR(-ENOMEM);
511
512 nla_for_each_nested(nla, nla_stgs, rem) {
513 struct bpf_map *map;
514 int map_fd;
515
516 if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
517 continue;
518
519 map_fd = nla_get_u32(nla);
520 map = bpf_map_get(map_fd);
521 if (IS_ERR(map)) {
522 err = PTR_ERR(map);
523 goto err_free;
524 }
525 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
526 bpf_map_put(map);
527 err = -EINVAL;
528 goto err_free;
529 }
530 if (diag_check_dup(diag, map)) {
531 bpf_map_put(map);
532 err = -EEXIST;
533 goto err_free;
534 }
535 diag->maps[diag->nr_maps++] = map;
536 }
537
538 return diag;
539
540 err_free:
541 bpf_sk_storage_diag_free(diag);
542 return ERR_PTR(err);
543 }
544 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
545
diag_get(struct bpf_local_storage_data * sdata,struct sk_buff * skb)546 static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
547 {
548 struct nlattr *nla_stg, *nla_value;
549 struct bpf_local_storage_map *smap;
550
551 /* It cannot exceed max nlattr's payload */
552 BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
553
554 nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
555 if (!nla_stg)
556 return -EMSGSIZE;
557
558 smap = rcu_dereference(sdata->smap);
559 if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
560 goto errout;
561
562 nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE,
563 smap->map.value_size,
564 SK_DIAG_BPF_STORAGE_PAD);
565 if (!nla_value)
566 goto errout;
567
568 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))
569 copy_map_value_locked(&smap->map, nla_data(nla_value),
570 sdata->data, true);
571 else
572 copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
573
574 nla_nest_end(skb, nla_stg);
575 return 0;
576
577 errout:
578 nla_nest_cancel(skb, nla_stg);
579 return -EMSGSIZE;
580 }
581
bpf_sk_storage_diag_put_all(struct sock * sk,struct sk_buff * skb,int stg_array_type,unsigned int * res_diag_size)582 static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
583 int stg_array_type,
584 unsigned int *res_diag_size)
585 {
586 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
587 unsigned int diag_size = nla_total_size(0);
588 struct bpf_local_storage *sk_storage;
589 struct bpf_local_storage_elem *selem;
590 struct bpf_local_storage_map *smap;
591 struct nlattr *nla_stgs;
592 unsigned int saved_len;
593 int err = 0;
594
595 rcu_read_lock();
596
597 sk_storage = rcu_dereference(sk->sk_bpf_storage);
598 if (!sk_storage || hlist_empty(&sk_storage->list)) {
599 rcu_read_unlock();
600 return 0;
601 }
602
603 nla_stgs = nla_nest_start(skb, stg_array_type);
604 if (!nla_stgs)
605 /* Continue to learn diag_size */
606 err = -EMSGSIZE;
607
608 saved_len = skb->len;
609 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
610 smap = rcu_dereference(SDATA(selem)->smap);
611 diag_size += nla_value_size(smap->map.value_size);
612
613 if (nla_stgs && diag_get(SDATA(selem), skb))
614 /* Continue to learn diag_size */
615 err = -EMSGSIZE;
616 }
617
618 rcu_read_unlock();
619
620 if (nla_stgs) {
621 if (saved_len == skb->len)
622 nla_nest_cancel(skb, nla_stgs);
623 else
624 nla_nest_end(skb, nla_stgs);
625 }
626
627 if (diag_size == nla_total_size(0)) {
628 *res_diag_size = 0;
629 return 0;
630 }
631
632 *res_diag_size = diag_size;
633 return err;
634 }
635
bpf_sk_storage_diag_put(struct bpf_sk_storage_diag * diag,struct sock * sk,struct sk_buff * skb,int stg_array_type,unsigned int * res_diag_size)636 int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
637 struct sock *sk, struct sk_buff *skb,
638 int stg_array_type,
639 unsigned int *res_diag_size)
640 {
641 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
642 unsigned int diag_size = nla_total_size(0);
643 struct bpf_local_storage *sk_storage;
644 struct bpf_local_storage_data *sdata;
645 struct nlattr *nla_stgs;
646 unsigned int saved_len;
647 int err = 0;
648 u32 i;
649
650 *res_diag_size = 0;
651
652 /* No map has been specified. Dump all. */
653 if (!diag->nr_maps)
654 return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type,
655 res_diag_size);
656
657 rcu_read_lock();
658 sk_storage = rcu_dereference(sk->sk_bpf_storage);
659 if (!sk_storage || hlist_empty(&sk_storage->list)) {
660 rcu_read_unlock();
661 return 0;
662 }
663
664 nla_stgs = nla_nest_start(skb, stg_array_type);
665 if (!nla_stgs)
666 /* Continue to learn diag_size */
667 err = -EMSGSIZE;
668
669 saved_len = skb->len;
670 for (i = 0; i < diag->nr_maps; i++) {
671 sdata = bpf_local_storage_lookup(sk_storage,
672 (struct bpf_local_storage_map *)diag->maps[i],
673 false);
674
675 if (!sdata)
676 continue;
677
678 diag_size += nla_value_size(diag->maps[i]->value_size);
679
680 if (nla_stgs && diag_get(sdata, skb))
681 /* Continue to learn diag_size */
682 err = -EMSGSIZE;
683 }
684 rcu_read_unlock();
685
686 if (nla_stgs) {
687 if (saved_len == skb->len)
688 nla_nest_cancel(skb, nla_stgs);
689 else
690 nla_nest_end(skb, nla_stgs);
691 }
692
693 if (diag_size == nla_total_size(0)) {
694 *res_diag_size = 0;
695 return 0;
696 }
697
698 *res_diag_size = diag_size;
699 return err;
700 }
701 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
702
703 struct bpf_iter_seq_sk_storage_map_info {
704 struct bpf_map *map;
705 unsigned int bucket_id;
706 unsigned skip_elems;
707 };
708
709 static struct bpf_local_storage_elem *
bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info * info,struct bpf_local_storage_elem * prev_selem)710 bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
711 struct bpf_local_storage_elem *prev_selem)
712 __acquires(RCU) __releases(RCU)
713 {
714 struct bpf_local_storage *sk_storage;
715 struct bpf_local_storage_elem *selem;
716 u32 skip_elems = info->skip_elems;
717 struct bpf_local_storage_map *smap;
718 u32 bucket_id = info->bucket_id;
719 u32 i, count, n_buckets;
720 struct bpf_local_storage_map_bucket *b;
721
722 smap = (struct bpf_local_storage_map *)info->map;
723 n_buckets = 1U << smap->bucket_log;
724 if (bucket_id >= n_buckets)
725 return NULL;
726
727 /* try to find next selem in the same bucket */
728 selem = prev_selem;
729 count = 0;
730 while (selem) {
731 selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)),
732 struct bpf_local_storage_elem, map_node);
733 if (!selem) {
734 /* not found, unlock and go to the next bucket */
735 b = &smap->buckets[bucket_id++];
736 rcu_read_unlock();
737 skip_elems = 0;
738 break;
739 }
740 sk_storage = rcu_dereference(selem->local_storage);
741 if (sk_storage) {
742 info->skip_elems = skip_elems + count;
743 return selem;
744 }
745 count++;
746 }
747
748 for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
749 b = &smap->buckets[i];
750 rcu_read_lock();
751 count = 0;
752 hlist_for_each_entry_rcu(selem, &b->list, map_node) {
753 sk_storage = rcu_dereference(selem->local_storage);
754 if (sk_storage && count >= skip_elems) {
755 info->bucket_id = i;
756 info->skip_elems = count;
757 return selem;
758 }
759 count++;
760 }
761 rcu_read_unlock();
762 skip_elems = 0;
763 }
764
765 info->bucket_id = i;
766 info->skip_elems = 0;
767 return NULL;
768 }
769
bpf_sk_storage_map_seq_start(struct seq_file * seq,loff_t * pos)770 static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
771 {
772 struct bpf_local_storage_elem *selem;
773
774 selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
775 if (!selem)
776 return NULL;
777
778 if (*pos == 0)
779 ++*pos;
780 return selem;
781 }
782
bpf_sk_storage_map_seq_next(struct seq_file * seq,void * v,loff_t * pos)783 static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v,
784 loff_t *pos)
785 {
786 struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
787
788 ++*pos;
789 ++info->skip_elems;
790 return bpf_sk_storage_map_seq_find_next(seq->private, v);
791 }
792
793 struct bpf_iter__bpf_sk_storage_map {
794 __bpf_md_ptr(struct bpf_iter_meta *, meta);
795 __bpf_md_ptr(struct bpf_map *, map);
796 __bpf_md_ptr(struct sock *, sk);
797 __bpf_md_ptr(void *, value);
798 };
799
DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map,struct bpf_iter_meta * meta,struct bpf_map * map,struct sock * sk,void * value)800 DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
801 struct bpf_map *map, struct sock *sk,
802 void *value)
803
804 static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
805 struct bpf_local_storage_elem *selem)
806 {
807 struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
808 struct bpf_iter__bpf_sk_storage_map ctx = {};
809 struct bpf_local_storage *sk_storage;
810 struct bpf_iter_meta meta;
811 struct bpf_prog *prog;
812 int ret = 0;
813
814 meta.seq = seq;
815 prog = bpf_iter_get_info(&meta, selem == NULL);
816 if (prog) {
817 ctx.meta = &meta;
818 ctx.map = info->map;
819 if (selem) {
820 sk_storage = rcu_dereference(selem->local_storage);
821 ctx.sk = sk_storage->owner;
822 ctx.value = SDATA(selem)->data;
823 }
824 ret = bpf_iter_run_prog(prog, &ctx);
825 }
826
827 return ret;
828 }
829
bpf_sk_storage_map_seq_show(struct seq_file * seq,void * v)830 static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
831 {
832 return __bpf_sk_storage_map_seq_show(seq, v);
833 }
834
bpf_sk_storage_map_seq_stop(struct seq_file * seq,void * v)835 static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
836 __releases(RCU)
837 {
838 if (!v)
839 (void)__bpf_sk_storage_map_seq_show(seq, v);
840 else
841 rcu_read_unlock();
842 }
843
bpf_iter_init_sk_storage_map(void * priv_data,struct bpf_iter_aux_info * aux)844 static int bpf_iter_init_sk_storage_map(void *priv_data,
845 struct bpf_iter_aux_info *aux)
846 {
847 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
848
849 bpf_map_inc_with_uref(aux->map);
850 seq_info->map = aux->map;
851 return 0;
852 }
853
bpf_iter_fini_sk_storage_map(void * priv_data)854 static void bpf_iter_fini_sk_storage_map(void *priv_data)
855 {
856 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
857
858 bpf_map_put_with_uref(seq_info->map);
859 }
860
bpf_iter_attach_map(struct bpf_prog * prog,union bpf_iter_link_info * linfo,struct bpf_iter_aux_info * aux)861 static int bpf_iter_attach_map(struct bpf_prog *prog,
862 union bpf_iter_link_info *linfo,
863 struct bpf_iter_aux_info *aux)
864 {
865 struct bpf_map *map;
866 int err = -EINVAL;
867
868 if (!linfo->map.map_fd)
869 return -EBADF;
870
871 map = bpf_map_get_with_uref(linfo->map.map_fd);
872 if (IS_ERR(map))
873 return PTR_ERR(map);
874
875 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
876 goto put_map;
877
878 if (prog->aux->max_rdwr_access > map->value_size) {
879 err = -EACCES;
880 goto put_map;
881 }
882
883 aux->map = map;
884 return 0;
885
886 put_map:
887 bpf_map_put_with_uref(map);
888 return err;
889 }
890
bpf_iter_detach_map(struct bpf_iter_aux_info * aux)891 static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
892 {
893 bpf_map_put_with_uref(aux->map);
894 }
895
896 static const struct seq_operations bpf_sk_storage_map_seq_ops = {
897 .start = bpf_sk_storage_map_seq_start,
898 .next = bpf_sk_storage_map_seq_next,
899 .stop = bpf_sk_storage_map_seq_stop,
900 .show = bpf_sk_storage_map_seq_show,
901 };
902
903 static const struct bpf_iter_seq_info iter_seq_info = {
904 .seq_ops = &bpf_sk_storage_map_seq_ops,
905 .init_seq_private = bpf_iter_init_sk_storage_map,
906 .fini_seq_private = bpf_iter_fini_sk_storage_map,
907 .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info),
908 };
909
910 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
911 .target = "bpf_sk_storage_map",
912 .attach_target = bpf_iter_attach_map,
913 .detach_target = bpf_iter_detach_map,
914 .show_fdinfo = bpf_iter_map_show_fdinfo,
915 .fill_link_info = bpf_iter_map_fill_link_info,
916 .ctx_arg_info_size = 2,
917 .ctx_arg_info = {
918 { offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
919 PTR_TO_BTF_ID_OR_NULL },
920 { offsetof(struct bpf_iter__bpf_sk_storage_map, value),
921 PTR_TO_BUF | PTR_MAYBE_NULL },
922 },
923 .seq_info = &iter_seq_info,
924 };
925
bpf_sk_storage_map_iter_init(void)926 static int __init bpf_sk_storage_map_iter_init(void)
927 {
928 bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id =
929 btf_sock_ids[BTF_SOCK_TYPE_SOCK];
930 return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info);
931 }
932 late_initcall(bpf_sk_storage_map_iter_init);
933