bpf: support cloning sk storage on accept()
Add new helper bpf_sk_storage_clone which optionally clones sk storage and call it from sk_clone_lock. Cc: Martin KaFai Lau <kafai@fb.com> Cc: Yonghong Song <yhs@fb.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Yonghong Song <yhs@fb.com> Signed-off-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
parent
b0e4701ce1
commit
8f51dfc73b
|
@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk);
|
|||
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
|
||||
#else
|
||||
static inline int bpf_sk_storage_clone(const struct sock *sk,
|
||||
struct sock *newsk)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _BPF_SK_STORAGE_H */
|
||||
|
|
|
@ -337,6 +337,9 @@ enum bpf_attach_type {
|
|||
#define BPF_F_RDONLY_PROG (1U << 7)
|
||||
#define BPF_F_WRONLY_PROG (1U << 8)
|
||||
|
||||
/* Clone map from listener for newly accepted socket */
|
||||
#define BPF_F_CLONE (1U << 9)
|
||||
|
||||
/* flags for BPF_PROG_QUERY */
|
||||
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
|
||||
|
||||
|
|
|
@ -12,6 +12,9 @@
|
|||
|
||||
static atomic_t cache_idx;
|
||||
|
||||
#define SK_STORAGE_CREATE_FLAG_MASK \
|
||||
(BPF_F_NO_PREALLOC | BPF_F_CLONE)
|
||||
|
||||
struct bucket {
|
||||
struct hlist_head list;
|
||||
raw_spinlock_t lock;
|
||||
|
@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
|
|||
kfree_rcu(sk_storage, rcu);
|
||||
}
|
||||
|
||||
/* sk_storage->lock must be held and sk_storage->list cannot be empty */
|
||||
static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
|
||||
struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
|
@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Called by __sk_destruct() */
|
||||
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
|
||||
void bpf_sk_storage_free(struct sock *sk)
|
||||
{
|
||||
struct bpf_sk_storage_elem *selem;
|
||||
|
@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
|
|||
|
||||
smap = (struct bpf_sk_storage_map *)map;
|
||||
|
||||
/* Note that this map might be concurrently cloned from
|
||||
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
|
||||
* RCU read section to finish before proceeding. New RCU
|
||||
* read sections should be prevented via bpf_map_inc_not_zero.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
/* bpf prog and the userspace can no longer access this map
|
||||
|
@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
|
|||
|
||||
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
|
||||
{
|
||||
if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
|
||||
if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
|
||||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
|
||||
attr->max_entries ||
|
||||
attr->key_size != sizeof(int) || !attr->value_size ||
|
||||
/* Enforce BTF for userspace sk dumping */
|
||||
!attr->btf_key_type_id || !attr->btf_value_type_id)
|
||||
|
@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
|
|||
return err;
|
||||
}
|
||||
|
||||
static struct bpf_sk_storage_elem *
|
||||
bpf_sk_storage_clone_elem(struct sock *newsk,
|
||||
struct bpf_sk_storage_map *smap,
|
||||
struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
struct bpf_sk_storage_elem *copy_selem;
|
||||
|
||||
copy_selem = selem_alloc(smap, newsk, NULL, true);
|
||||
if (!copy_selem)
|
||||
return NULL;
|
||||
|
||||
if (map_value_has_spin_lock(&smap->map))
|
||||
copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
|
||||
SDATA(selem)->data, true);
|
||||
else
|
||||
copy_map_value(&smap->map, SDATA(copy_selem)->data,
|
||||
SDATA(selem)->data);
|
||||
|
||||
return copy_selem;
|
||||
}
|
||||
|
||||
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
|
||||
{
|
||||
struct bpf_sk_storage *new_sk_storage = NULL;
|
||||
struct bpf_sk_storage *sk_storage;
|
||||
struct bpf_sk_storage_elem *selem;
|
||||
int ret = 0;
|
||||
|
||||
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
|
||||
|
||||
rcu_read_lock();
|
||||
sk_storage = rcu_dereference(sk->sk_bpf_storage);
|
||||
|
||||
if (!sk_storage || hlist_empty(&sk_storage->list))
|
||||
goto out;
|
||||
|
||||
hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
|
||||
struct bpf_sk_storage_elem *copy_selem;
|
||||
struct bpf_sk_storage_map *smap;
|
||||
struct bpf_map *map;
|
||||
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
if (!(smap->map.map_flags & BPF_F_CLONE))
|
||||
continue;
|
||||
|
||||
/* Note that for lockless listeners adding new element
|
||||
* here can race with cleanup in bpf_sk_storage_map_free.
|
||||
* Try to grab map refcnt to make sure that it's still
|
||||
* alive and prevent concurrent removal.
|
||||
*/
|
||||
map = bpf_map_inc_not_zero(&smap->map, false);
|
||||
if (IS_ERR(map))
|
||||
continue;
|
||||
|
||||
copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
|
||||
if (!copy_selem) {
|
||||
ret = -ENOMEM;
|
||||
bpf_map_put(map);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (new_sk_storage) {
|
||||
selem_link_map(smap, copy_selem);
|
||||
__selem_link_sk(new_sk_storage, copy_selem);
|
||||
} else {
|
||||
ret = sk_storage_alloc(newsk, smap, copy_selem);
|
||||
if (ret) {
|
||||
kfree(copy_selem);
|
||||
atomic_sub(smap->elem_size,
|
||||
&newsk->sk_omem_alloc);
|
||||
bpf_map_put(map);
|
||||
goto out;
|
||||
}
|
||||
|
||||
new_sk_storage = rcu_dereference(copy_selem->sk_storage);
|
||||
}
|
||||
bpf_map_put(map);
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
/* In case of an error, don't free anything explicitly here, the
|
||||
* caller is responsible to call bpf_sk_storage_free.
|
||||
*/
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
|
||||
void *, value, u64, flags)
|
||||
{
|
||||
|
|
|
@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
|
|||
goto out;
|
||||
}
|
||||
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
|
||||
#endif
|
||||
|
||||
if (bpf_sk_storage_clone(sk, newsk)) {
|
||||
sk_free_unlock_clone(newsk);
|
||||
newsk = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
newsk->sk_err = 0;
|
||||
newsk->sk_err_soft = 0;
|
||||
|
|
Loading…
Reference in New Issue