Commit cbb2fb13 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'sockmap-ulp-fixes'

Daniel Borkmann says:

====================
Batch of various fixes related to BPF sockmap and ULP, including
adding module alias to restrict module requests, races and memory
leaks in sockmap code. For details please refer to the individual
patches. Thanks!
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 965931e3 585f5a62
...@@ -2065,6 +2065,10 @@ int tcp_set_ulp_id(struct sock *sk, const int ulp); ...@@ -2065,6 +2065,10 @@ int tcp_set_ulp_id(struct sock *sk, const int ulp);
void tcp_get_available_ulp(char *buf, size_t len); void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk); void tcp_cleanup_ulp(struct sock *sk);
#define MODULE_ALIAS_TCP_ULP(name) \
__MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
/* Call BPF_SOCK_OPS program that returns an int. If the return value /* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF * is < 0, then the BPF op failed (for example if the loaded BPF
* program does not support the chosen operation or there is no BPF * program does not support the chosen operation or there is no BPF
......
...@@ -58,6 +58,7 @@ struct bpf_stab { ...@@ -58,6 +58,7 @@ struct bpf_stab {
struct bpf_map map; struct bpf_map map;
struct sock **sock_map; struct sock **sock_map;
struct bpf_sock_progs progs; struct bpf_sock_progs progs;
raw_spinlock_t lock;
}; };
struct bucket { struct bucket {
...@@ -89,9 +90,9 @@ enum smap_psock_state { ...@@ -89,9 +90,9 @@ enum smap_psock_state {
struct smap_psock_map_entry { struct smap_psock_map_entry {
struct list_head list; struct list_head list;
struct bpf_map *map;
struct sock **entry; struct sock **entry;
struct htab_elem __rcu *hash_link; struct htab_elem __rcu *hash_link;
struct bpf_htab __rcu *htab;
}; };
struct smap_psock { struct smap_psock {
...@@ -343,13 +344,18 @@ static void bpf_tcp_close(struct sock *sk, long timeout) ...@@ -343,13 +344,18 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
e = psock_map_pop(sk, psock); e = psock_map_pop(sk, psock);
while (e) { while (e) {
if (e->entry) { if (e->entry) {
osk = cmpxchg(e->entry, sk, NULL); struct bpf_stab *stab = container_of(e->map, struct bpf_stab, map);
raw_spin_lock_bh(&stab->lock);
osk = *e->entry;
if (osk == sk) { if (osk == sk) {
*e->entry = NULL;
smap_release_sock(psock, sk); smap_release_sock(psock, sk);
} }
raw_spin_unlock_bh(&stab->lock);
} else { } else {
struct htab_elem *link = rcu_dereference(e->hash_link); struct htab_elem *link = rcu_dereference(e->hash_link);
struct bpf_htab *htab = rcu_dereference(e->htab); struct bpf_htab *htab = container_of(e->map, struct bpf_htab, map);
struct hlist_head *head; struct hlist_head *head;
struct htab_elem *l; struct htab_elem *l;
struct bucket *b; struct bucket *b;
...@@ -370,6 +376,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout) ...@@ -370,6 +376,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
} }
raw_spin_unlock_bh(&b->lock); raw_spin_unlock_bh(&b->lock);
} }
kfree(e);
e = psock_map_pop(sk, psock); e = psock_map_pop(sk, psock);
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -1641,6 +1648,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) ...@@ -1641,6 +1648,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&stab->map, attr); bpf_map_init_from_attr(&stab->map, attr);
raw_spin_lock_init(&stab->lock);
/* make sure page count doesn't overflow */ /* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *); cost = (u64) stab->map.max_entries * sizeof(struct sock *);
...@@ -1675,8 +1683,10 @@ static void smap_list_map_remove(struct smap_psock *psock, ...@@ -1675,8 +1683,10 @@ static void smap_list_map_remove(struct smap_psock *psock,
spin_lock_bh(&psock->maps_lock); spin_lock_bh(&psock->maps_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) { list_for_each_entry_safe(e, tmp, &psock->maps, list) {
if (e->entry == entry) if (e->entry == entry) {
list_del(&e->list); list_del(&e->list);
kfree(e);
}
} }
spin_unlock_bh(&psock->maps_lock); spin_unlock_bh(&psock->maps_lock);
} }
...@@ -1690,8 +1700,10 @@ static void smap_list_hash_remove(struct smap_psock *psock, ...@@ -1690,8 +1700,10 @@ static void smap_list_hash_remove(struct smap_psock *psock,
list_for_each_entry_safe(e, tmp, &psock->maps, list) { list_for_each_entry_safe(e, tmp, &psock->maps, list) {
struct htab_elem *c = rcu_dereference(e->hash_link); struct htab_elem *c = rcu_dereference(e->hash_link);
if (c == hash_link) if (c == hash_link) {
list_del(&e->list); list_del(&e->list);
kfree(e);
}
} }
spin_unlock_bh(&psock->maps_lock); spin_unlock_bh(&psock->maps_lock);
} }
...@@ -1711,14 +1723,15 @@ static void sock_map_free(struct bpf_map *map) ...@@ -1711,14 +1723,15 @@ static void sock_map_free(struct bpf_map *map)
* and a grace period expire to ensure psock is really safe to remove. * and a grace period expire to ensure psock is really safe to remove.
*/ */
rcu_read_lock(); rcu_read_lock();
raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) { for (i = 0; i < stab->map.max_entries; i++) {
struct smap_psock *psock; struct smap_psock *psock;
struct sock *sock; struct sock *sock;
sock = xchg(&stab->sock_map[i], NULL); sock = stab->sock_map[i];
if (!sock) if (!sock)
continue; continue;
stab->sock_map[i] = NULL;
psock = smap_psock_sk(sock); psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get the /* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens * sk_callback_lock before this case but after xchg happens
...@@ -1730,6 +1743,7 @@ static void sock_map_free(struct bpf_map *map) ...@@ -1730,6 +1743,7 @@ static void sock_map_free(struct bpf_map *map)
smap_release_sock(psock, sock); smap_release_sock(psock, sock);
} }
} }
raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock(); rcu_read_unlock();
sock_map_remove_complete(stab); sock_map_remove_complete(stab);
...@@ -1773,19 +1787,23 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key) ...@@ -1773,19 +1787,23 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
if (k >= map->max_entries) if (k >= map->max_entries)
return -EINVAL; return -EINVAL;
sock = xchg(&stab->sock_map[k], NULL); raw_spin_lock_bh(&stab->lock);
sock = stab->sock_map[k];
stab->sock_map[k] = NULL;
raw_spin_unlock_bh(&stab->lock);
if (!sock) if (!sock)
return -EINVAL; return -EINVAL;
psock = smap_psock_sk(sock); psock = smap_psock_sk(sock);
if (!psock) if (!psock)
goto out; return 0;
if (psock->bpf_parse) {
if (psock->bpf_parse) write_lock_bh(&sock->sk_callback_lock);
smap_stop_sock(psock, sock); smap_stop_sock(psock, sock);
write_unlock_bh(&sock->sk_callback_lock);
}
smap_list_map_remove(psock, &stab->sock_map[k]); smap_list_map_remove(psock, &stab->sock_map[k]);
smap_release_sock(psock, sock); smap_release_sock(psock, sock);
out:
return 0; return 0;
} }
...@@ -1821,11 +1839,9 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key) ...@@ -1821,11 +1839,9 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
static int __sock_map_ctx_update_elem(struct bpf_map *map, static int __sock_map_ctx_update_elem(struct bpf_map *map,
struct bpf_sock_progs *progs, struct bpf_sock_progs *progs,
struct sock *sock, struct sock *sock,
struct sock **map_link,
void *key) void *key)
{ {
struct bpf_prog *verdict, *parse, *tx_msg; struct bpf_prog *verdict, *parse, *tx_msg;
struct smap_psock_map_entry *e = NULL;
struct smap_psock *psock; struct smap_psock *psock;
bool new = false; bool new = false;
int err = 0; int err = 0;
...@@ -1898,14 +1914,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, ...@@ -1898,14 +1914,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
new = true; new = true;
} }
if (map_link) {
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (!e) {
err = -ENOMEM;
goto out_free;
}
}
/* 3. At this point we have a reference to a valid psock that is /* 3. At this point we have a reference to a valid psock that is
* running. Attach any BPF programs needed. * running. Attach any BPF programs needed.
*/ */
...@@ -1927,17 +1935,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, ...@@ -1927,17 +1935,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
write_unlock_bh(&sock->sk_callback_lock); write_unlock_bh(&sock->sk_callback_lock);
} }
/* 4. Place psock in sockmap for use and stop any programs on
* the old sock assuming its not the same sock we are replacing
* it with. Because we can only have a single set of programs if
* old_sock has a strp we can stop it.
*/
if (map_link) {
e->entry = map_link;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
}
return err; return err;
out_free: out_free:
smap_release_sock(psock, sock); smap_release_sock(psock, sock);
...@@ -1948,7 +1945,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, ...@@ -1948,7 +1945,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
} }
if (tx_msg) if (tx_msg)
bpf_prog_put(tx_msg); bpf_prog_put(tx_msg);
kfree(e);
return err; return err;
} }
...@@ -1958,36 +1954,57 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops, ...@@ -1958,36 +1954,57 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
{ {
struct bpf_stab *stab = container_of(map, struct bpf_stab, map); struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_sock_progs *progs = &stab->progs; struct bpf_sock_progs *progs = &stab->progs;
struct sock *osock, *sock; struct sock *osock, *sock = skops->sk;
struct smap_psock_map_entry *e;
struct smap_psock *psock;
u32 i = *(u32 *)key; u32 i = *(u32 *)key;
int err; int err;
if (unlikely(flags > BPF_EXIST)) if (unlikely(flags > BPF_EXIST))
return -EINVAL; return -EINVAL;
if (unlikely(i >= stab->map.max_entries)) if (unlikely(i >= stab->map.max_entries))
return -E2BIG; return -E2BIG;
sock = READ_ONCE(stab->sock_map[i]); e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (flags == BPF_EXIST && !sock) if (!e)
return -ENOENT; return -ENOMEM;
else if (flags == BPF_NOEXIST && sock)
return -EEXIST;
sock = skops->sk; err = __sock_map_ctx_update_elem(map, progs, sock, key);
err = __sock_map_ctx_update_elem(map, progs, sock, &stab->sock_map[i],
key);
if (err) if (err)
goto out; goto out;
osock = xchg(&stab->sock_map[i], sock); /* psock guaranteed to be present. */
if (osock) { psock = smap_psock_sk(sock);
struct smap_psock *opsock = smap_psock_sk(osock); raw_spin_lock_bh(&stab->lock);
osock = stab->sock_map[i];
if (osock && flags == BPF_NOEXIST) {
err = -EEXIST;
goto out_unlock;
}
if (!osock && flags == BPF_EXIST) {
err = -ENOENT;
goto out_unlock;
}
smap_list_map_remove(opsock, &stab->sock_map[i]); e->entry = &stab->sock_map[i];
smap_release_sock(opsock, osock); e->map = map;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
stab->sock_map[i] = sock;
if (osock) {
psock = smap_psock_sk(osock);
smap_list_map_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, osock);
} }
raw_spin_unlock_bh(&stab->lock);
return 0;
out_unlock:
smap_release_sock(psock, sock);
raw_spin_unlock_bh(&stab->lock);
out: out:
kfree(e);
return err; return err;
} }
...@@ -2350,7 +2367,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, ...@@ -2350,7 +2367,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
b = __select_bucket(htab, hash); b = __select_bucket(htab, hash);
head = &b->head; head = &b->head;
err = __sock_map_ctx_update_elem(map, progs, sock, NULL, key); err = __sock_map_ctx_update_elem(map, progs, sock, key);
if (err) if (err)
goto err; goto err;
...@@ -2376,8 +2393,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, ...@@ -2376,8 +2393,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
} }
rcu_assign_pointer(e->hash_link, l_new); rcu_assign_pointer(e->hash_link, l_new);
rcu_assign_pointer(e->htab, e->map = map;
container_of(map, struct bpf_htab, map));
spin_lock_bh(&psock->maps_lock); spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps); list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock); spin_unlock_bh(&psock->maps_lock);
......
...@@ -51,7 +51,7 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) ...@@ -51,7 +51,7 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
#ifdef CONFIG_MODULES #ifdef CONFIG_MODULES
if (!ulp && capable(CAP_NET_ADMIN)) { if (!ulp && capable(CAP_NET_ADMIN)) {
rcu_read_unlock(); rcu_read_unlock();
request_module("%s", name); request_module("tcp-ulp-%s", name);
rcu_read_lock(); rcu_read_lock();
ulp = tcp_ulp_find(name); ulp = tcp_ulp_find(name);
} }
...@@ -129,6 +129,8 @@ void tcp_cleanup_ulp(struct sock *sk) ...@@ -129,6 +129,8 @@ void tcp_cleanup_ulp(struct sock *sk)
if (icsk->icsk_ulp_ops->release) if (icsk->icsk_ulp_ops->release)
icsk->icsk_ulp_ops->release(sk); icsk->icsk_ulp_ops->release(sk);
module_put(icsk->icsk_ulp_ops->owner); module_put(icsk->icsk_ulp_ops->owner);
icsk->icsk_ulp_ops = NULL;
} }
/* Change upper layer protocol for socket */ /* Change upper layer protocol for socket */
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
MODULE_AUTHOR("Mellanox Technologies"); MODULE_AUTHOR("Mellanox Technologies");
MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_DESCRIPTION("Transport Layer Security Support");
MODULE_LICENSE("Dual BSD/GPL"); MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_TCP_ULP("tls");
enum { enum {
TLSV4, TLSV4,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment