[PATCH bpf-next v2 1/4] bpf: Generalize bpf_sk_storage
Martin KaFai Lau
kafai at fb.com
Fri Jun 19 06:43:32 UTC 2020
On Wed, Jun 17, 2020 at 10:29:38PM +0200, KP Singh wrote:
> From: KP Singh <kpsingh at google.com>
>
> Refactor the functionality in bpf_sk_storage.c so that concept of
> storage linked to kernel objects can be extended to other objects like
> inode, task_struct etc.
>
> bpf_sk_storage is updated to be bpf_local_storage with a union that
> contains a pointer to the owner object. The type of the
> bpf_local_storage can be determined using the newly added
> bpf_local_storage_type enum.
>
> Each new local storage will still be a separate map and provide its own
> set of helpers. This allows for future object specific extensions and
> still share a lot of the underlying implementation.
Thanks for taking up this effort to refactor sk_local_storage.
I took a quick look. I have some comments and would like to explore
some thoughts.
> --- a/net/core/bpf_sk_storage.c
> +++ b/kernel/bpf/bpf_local_storage.c
> @@ -1,19 +1,22 @@
> // SPDX-License-Identifier: GPL-2.0
> /* Copyright (c) 2019 Facebook */
> +#include "linux/bpf.h"
> +#include "asm-generic/bug.h"
> +#include "linux/err.h"
"<" ">"
> #include <linux/rculist.h>
> #include <linux/list.h>
> #include <linux/hash.h>
> #include <linux/types.h>
> #include <linux/spinlock.h>
> #include <linux/bpf.h>
> -#include <net/bpf_sk_storage.h>
> +#include <linux/bpf_local_storage.h>
> #include <net/sock.h>
> #include <uapi/linux/sock_diag.h>
> #include <uapi/linux/btf.h>
>
> static atomic_t cache_idx;
inode local storage and sk local storage probably need a separate
cache_idx. An improvement on picking cache_idx has just been
landed also.
[ ... ]
> +struct bpf_local_storage {
> + struct bpf_local_storage_data __rcu *cache[BPF_STORAGE_CACHE_SIZE];
> + struct hlist_head list; /* List of bpf_local_storage_elem */
> + /* The object that owns the the above "list" of
> + * bpf_local_storage_elem
> + */
> + union {
> + struct sock *sk;
> + };
> struct rcu_head rcu;
> raw_spinlock_t lock; /* Protect adding/removing from the "list" */
> + enum bpf_local_storage_type stype;
> };
[ ... ]
> +static struct bpf_local_storage_elem *sk_selem_alloc(
> + struct bpf_local_storage_map *smap, struct sock *sk, void *value,
> + bool charge_omem)
> +{
> + struct bpf_local_storage_elem *selem;
> +
> + if (charge_omem && omem_charge(sk, smap->elem_size))
> + return NULL;
> +
> + selem = selem_alloc(smap, value);
> + if (selem)
> + return selem;
> +
> if (charge_omem)
> atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
>
> return NULL;
> }
>
> -/* sk_storage->lock must be held and selem->sk_storage == sk_storage.
> +static void __unlink_local_storage(struct bpf_local_storage *local_storage,
> + bool uncharge_omem)
Nit. indent is off. There are a few more cases like this.
> +{
> + struct sock *sk;
> +
> + switch (local_storage->stype) {
Does it need a new bpf_local_storage_type? Is map_type as good?
Instead of adding any new member (e.g. stype) to
"struct bpf_local_storage", can the smap pointer be directly used
here instead?
For example in __unlink_local_storage() here, it should
have a hold to the selem which then has a hold to smap.
> + case BPF_LOCAL_STORAGE_SK:
> + sk = local_storage->sk;
> + if (uncharge_omem)
> + atomic_sub(sizeof(struct bpf_local_storage),
> + &sk->sk_omem_alloc);
> +
> + /* After this RCU_INIT, sk may be freed and cannot be used */
> + RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
> + local_storage->sk = NULL;
> + break;
> + }
Another thought on the stype switch cases.
Instead of having multiple switches on stype in bpf_local_storage.c which may
not be scalable soon if we are planning to support a few more kernel objects,
have you considered putting them into its own "ops". May be a few new
ops can be added to bpf_map_ops to do local storage unlink/update/alloc...etc.
> +}
> +
> +/* local_storage->lock must be held and selem->local_storage == local_storage.
> * The caller must ensure selem->smap is still valid to be
> * dereferenced for its smap->elem_size and smap->cache_idx.
> + *
> + * uncharge_omem is only relevant when:
> + *
> + * local_storage->stype == BPF_LOCAL_STORAGE_SK
> */
[ ... ]
> @@ -845,7 +947,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
> BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
> void *, value, u64, flags)
> {
> - struct bpf_sk_storage_data *sdata;
> + struct bpf_local_storage_data *sdata;
>
> if (flags > BPF_SK_STORAGE_GET_F_CREATE)
> return (unsigned long)NULL;
> @@ -854,14 +956,14 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
> if (sdata)
> return (unsigned long)sdata->data;
>
> - if (flags == BPF_SK_STORAGE_GET_F_CREATE &&
> + if (flags == BPF_LOCAL_STORAGE_GET_F_CREATE &&
> /* Cannot add new elem to a going away sk.
> * Otherwise, the new elem may become a leak
> * (and also other memory issues during map
> * destruction).
> */
> refcount_inc_not_zero(&sk->sk_refcnt)) {
> - sdata = sk_storage_update(sk, map, value, BPF_NOEXIST);
> + sdata = local_storage_update(sk, map, value, BPF_NOEXIST);
> /* sk must be a fullsock (guaranteed by verifier),
> * so sock_gen_put() is unnecessary.
> */
> @@ -887,14 +989,14 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
> }
>
> const struct bpf_map_ops sk_storage_map_ops = {
> - .map_alloc_check = bpf_sk_storage_map_alloc_check,
> - .map_alloc = bpf_sk_storage_map_alloc,
> - .map_free = bpf_sk_storage_map_free,
> + .map_alloc_check = bpf_local_storage_map_alloc_check,
> + .map_alloc = bpf_local_storage_map_alloc,
> + .map_free = bpf_local_storage_map_free,
> .map_get_next_key = notsupp_get_next_key,
> - .map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
> - .map_update_elem = bpf_fd_sk_storage_update_elem,
> - .map_delete_elem = bpf_fd_sk_storage_delete_elem,
> - .map_check_btf = bpf_sk_storage_map_check_btf,
> + .map_lookup_elem = bpf_sk_storage_lookup_elem,
> + .map_update_elem = bpf_sk_storage_update_elem,
> + .map_delete_elem = bpf_sk_storage_delete_elem,
> + .map_check_btf = bpf_local_storage_map_check_btf,
> };
>
> const struct bpf_func_proto bpf_sk_storage_get_proto = {
> @@ -975,7 +1077,7 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
> u32 nr_maps = 0;
> int rem, err;
>
> - /* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as
> + /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as
> * the map_alloc_check() side also does.
> */
> if (!bpf_capable())
> @@ -1025,10 +1127,10 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
> }
> EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
Would it be cleaner to leave bpf_sk specific function, map_ops, and func_proto
in net/core/bpf_sk_storage.c?
There is a test in map_tests/sk_storage_map.c, in case you may not notice.
More information about the Linux-security-module-archive
mailing list