Commit 8f51dfc7 authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Daniel Borkmann

bpf: support cloning sk storage on accept()

Add new helper bpf_sk_storage_clone which optionally clones sk storage
and call it from sk_clone_lock.

Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Acked-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarStanislav Fomichev <sdf@google.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent b0e4701c
...@@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk); ...@@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
#ifdef CONFIG_BPF_SYSCALL
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
#else
static inline int bpf_sk_storage_clone(const struct sock *sk,
struct sock *newsk)
{
return 0;
}
#endif
#endif /* _BPF_SK_STORAGE_H */ #endif /* _BPF_SK_STORAGE_H */
...@@ -337,6 +337,9 @@ enum bpf_attach_type { ...@@ -337,6 +337,9 @@ enum bpf_attach_type {
#define BPF_F_RDONLY_PROG (1U << 7) #define BPF_F_RDONLY_PROG (1U << 7)
#define BPF_F_WRONLY_PROG (1U << 8) #define BPF_F_WRONLY_PROG (1U << 8)
/* Clone map from listener for newly accepted socket */
#define BPF_F_CLONE (1U << 9)
/* flags for BPF_PROG_QUERY */ /* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0) #define BPF_F_QUERY_EFFECTIVE (1U << 0)
......
...@@ -12,6 +12,9 @@ ...@@ -12,6 +12,9 @@
static atomic_t cache_idx; static atomic_t cache_idx;
#define SK_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_CLONE)
struct bucket { struct bucket {
struct hlist_head list; struct hlist_head list;
raw_spinlock_t lock; raw_spinlock_t lock;
...@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem) ...@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
kfree_rcu(sk_storage, rcu); kfree_rcu(sk_storage, rcu);
} }
/* sk_storage->lock must be held and sk_storage->list cannot be empty */
static void __selem_link_sk(struct bpf_sk_storage *sk_storage, static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
struct bpf_sk_storage_elem *selem) struct bpf_sk_storage_elem *selem)
{ {
...@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map) ...@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0; return 0;
} }
/* Called by __sk_destruct() */ /* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk) void bpf_sk_storage_free(struct sock *sk)
{ {
struct bpf_sk_storage_elem *selem; struct bpf_sk_storage_elem *selem;
...@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) ...@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_sk_storage_map *)map; smap = (struct bpf_sk_storage_map *)map;
/* Note that this map might be concurrently cloned from
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
* RCU read section to finish before proceeding. New RCU
* read sections should be prevented via bpf_map_inc_not_zero.
*/
synchronize_rcu(); synchronize_rcu();
/* bpf prog and the userspace can no longer access this map /* bpf prog and the userspace can no longer access this map
...@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) ...@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
{ {
if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries || if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
attr->max_entries ||
attr->key_size != sizeof(int) || !attr->value_size || attr->key_size != sizeof(int) || !attr->value_size ||
/* Enforce BTF for userspace sk dumping */ /* Enforce BTF for userspace sk dumping */
!attr->btf_key_type_id || !attr->btf_value_type_id) !attr->btf_key_type_id || !attr->btf_value_type_id)
...@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) ...@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
return err; return err;
} }
static struct bpf_sk_storage_elem *
bpf_sk_storage_clone_elem(struct sock *newsk,
struct bpf_sk_storage_map *smap,
struct bpf_sk_storage_elem *selem)
{
struct bpf_sk_storage_elem *copy_selem;
copy_selem = selem_alloc(smap, newsk, NULL, true);
if (!copy_selem)
return NULL;
if (map_value_has_spin_lock(&smap->map))
copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
SDATA(selem)->data, true);
else
copy_map_value(&smap->map, SDATA(copy_selem)->data,
SDATA(selem)->data);
return copy_selem;
}
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
{
struct bpf_sk_storage *new_sk_storage = NULL;
struct bpf_sk_storage *sk_storage;
struct bpf_sk_storage_elem *selem;
int ret = 0;
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
rcu_read_lock();
sk_storage = rcu_dereference(sk->sk_bpf_storage);
if (!sk_storage || hlist_empty(&sk_storage->list))
goto out;
hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
struct bpf_sk_storage_elem *copy_selem;
struct bpf_sk_storage_map *smap;
struct bpf_map *map;
smap = rcu_dereference(SDATA(selem)->smap);
if (!(smap->map.map_flags & BPF_F_CLONE))
continue;
/* Note that for lockless listeners adding new element
* here can race with cleanup in bpf_sk_storage_map_free.
* Try to grab map refcnt to make sure that it's still
* alive and prevent concurrent removal.
*/
map = bpf_map_inc_not_zero(&smap->map, false);
if (IS_ERR(map))
continue;
copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
if (!copy_selem) {
ret = -ENOMEM;
bpf_map_put(map);
goto out;
}
if (new_sk_storage) {
selem_link_map(smap, copy_selem);
__selem_link_sk(new_sk_storage, copy_selem);
} else {
ret = sk_storage_alloc(newsk, smap, copy_selem);
if (ret) {
kfree(copy_selem);
atomic_sub(smap->elem_size,
&newsk->sk_omem_alloc);
bpf_map_put(map);
goto out;
}
new_sk_storage = rcu_dereference(copy_selem->sk_storage);
}
bpf_map_put(map);
}
out:
rcu_read_unlock();
/* In case of an error, don't free anything explicitly here, the
* caller is responsible to call bpf_sk_storage_free.
*/
return ret;
}
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags) void *, value, u64, flags)
{ {
......
...@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) ...@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
goto out; goto out;
} }
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
#ifdef CONFIG_BPF_SYSCALL
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); if (bpf_sk_storage_clone(sk, newsk)) {
#endif sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
}
newsk->sk_err = 0; newsk->sk_err = 0;
newsk->sk_err_soft = 0; newsk->sk_err_soft = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment