Commit 8f51dfc7 authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Daniel Borkmann

bpf: support cloning sk storage on accept()

Add new helper bpf_sk_storage_clone which optionally clones sk storage
and call it from sk_clone_lock.

Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Acked-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarStanislav Fomichev <sdf@google.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent b0e4701c
......@@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
#ifdef CONFIG_BPF_SYSCALL
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
#else
static inline int bpf_sk_storage_clone(const struct sock *sk,
struct sock *newsk)
{
return 0;
}
#endif
#endif /* _BPF_SK_STORAGE_H */
......@@ -337,6 +337,9 @@ enum bpf_attach_type {
#define BPF_F_RDONLY_PROG (1U << 7)
#define BPF_F_WRONLY_PROG (1U << 8)
/* Clone map from listener for newly accepted socket */
#define BPF_F_CLONE (1U << 9)
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
......
......@@ -12,6 +12,9 @@
static atomic_t cache_idx;
#define SK_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_CLONE)
struct bucket {
struct hlist_head list;
raw_spinlock_t lock;
......@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
kfree_rcu(sk_storage, rcu);
}
/* sk_storage->lock must be held and sk_storage->list cannot be empty */
static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
struct bpf_sk_storage_elem *selem)
{
......@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0;
}
/* Called by __sk_destruct() */
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
struct bpf_sk_storage_elem *selem;
......@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_sk_storage_map *)map;
/* Note that this map might be concurrently cloned from
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
* RCU read section to finish before proceeding. New RCU
* read sections should be prevented via bpf_map_inc_not_zero.
*/
synchronize_rcu();
/* bpf prog and the userspace can no longer access this map
......@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
{
if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
attr->max_entries ||
attr->key_size != sizeof(int) || !attr->value_size ||
/* Enforce BTF for userspace sk dumping */
!attr->btf_key_type_id || !attr->btf_value_type_id)
......@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
return err;
}
static struct bpf_sk_storage_elem *
bpf_sk_storage_clone_elem(struct sock *newsk,
struct bpf_sk_storage_map *smap,
struct bpf_sk_storage_elem *selem)
{
struct bpf_sk_storage_elem *copy_selem;
copy_selem = selem_alloc(smap, newsk, NULL, true);
if (!copy_selem)
return NULL;
if (map_value_has_spin_lock(&smap->map))
copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
SDATA(selem)->data, true);
else
copy_map_value(&smap->map, SDATA(copy_selem)->data,
SDATA(selem)->data);
return copy_selem;
}
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
{
struct bpf_sk_storage *new_sk_storage = NULL;
struct bpf_sk_storage *sk_storage;
struct bpf_sk_storage_elem *selem;
int ret = 0;
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
rcu_read_lock();
sk_storage = rcu_dereference(sk->sk_bpf_storage);
if (!sk_storage || hlist_empty(&sk_storage->list))
goto out;
hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
struct bpf_sk_storage_elem *copy_selem;
struct bpf_sk_storage_map *smap;
struct bpf_map *map;
smap = rcu_dereference(SDATA(selem)->smap);
if (!(smap->map.map_flags & BPF_F_CLONE))
continue;
/* Note that for lockless listeners adding new element
* here can race with cleanup in bpf_sk_storage_map_free.
* Try to grab map refcnt to make sure that it's still
* alive and prevent concurrent removal.
*/
map = bpf_map_inc_not_zero(&smap->map, false);
if (IS_ERR(map))
continue;
copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
if (!copy_selem) {
ret = -ENOMEM;
bpf_map_put(map);
goto out;
}
if (new_sk_storage) {
selem_link_map(smap, copy_selem);
__selem_link_sk(new_sk_storage, copy_selem);
} else {
ret = sk_storage_alloc(newsk, smap, copy_selem);
if (ret) {
kfree(copy_selem);
atomic_sub(smap->elem_size,
&newsk->sk_omem_alloc);
bpf_map_put(map);
goto out;
}
new_sk_storage = rcu_dereference(copy_selem->sk_storage);
}
bpf_map_put(map);
}
out:
rcu_read_unlock();
/* In case of an error, don't free anything explicitly here, the
* caller is responsible to call bpf_sk_storage_free.
*/
return ret;
}
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
......
......@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
goto out;
}
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
#ifdef CONFIG_BPF_SYSCALL
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
#endif
if (bpf_sk_storage_clone(sk, newsk)) {
sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
}
newsk->sk_err = 0;
newsk->sk_err_soft = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment