Commit 3c4a594b authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'update-sockmap-from-prog'

Lorenz Bauer says:

====================
We're currently building a control plane for our BPF socket dispatch
work. As part of that, we have a need to create a copy of an existing
sockhash, to allow us to change the keys. I previously proposed allowing
privileged userspace to look up sockets, which doesn't work due to
security concerns (see [1]).

In follow up discussions during BPF office hours we identified bpf_iter
as a possible solution: instead of accessing sockets from user space
we can iterate the source sockhash, and insert the values into a new
map. Enabling this requires two pieces: the ability to iterate
sockmap and sockhash, as well as being able to call map_update_elem
from BPF.

This patch set implements the latter: it's now possible to update
sockmap from BPF context. As a next step, we can implement bpf_iter
for sockmap.

===

I've done some more fixups, and audited the safe contexts more
thoroughly. As a result I'm removing CGROUP_SKB, SK_MSG and SK_SKB
for now.

Changes in v3:
- Use CHECK as much as possible (Yonghong)
- Reject ARG_PTR_TO_MAP_VALUE_OR_NULL for sockmap (Yonghong)
- Remove CGROUP_SKB, SK_MSG, SK_SKB from safe contexts
- Test that the verifier rejects update from unsafe context

Changes in v2:
- Fix warning in patch #2 (Jakub K)
- Renamed override_map_arg_type (John)
- Only allow updating sockmap from known safe contexts (John)
- Use __s64 for sockmap updates from user space (Yonghong)
- Various small test fixes around test macros and such (Yonghong)

Thank your for your reviews!

1: https://lore.kernel.org/bpf/20200310174711.7490-1-lmb@cloudflare.com/
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents dca5612f bb23c0e1
......@@ -1648,6 +1648,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which);
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
......@@ -1669,6 +1670,12 @@ static inline int sock_map_prog_detach(const union bpf_attr *attr,
{
return -EOPNOTSUPP;
}
static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
u64 flags)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_STREAM_PARSER */
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
......
......@@ -340,23 +340,6 @@ static inline void sk_psock_update_proto(struct sock *sk,
struct sk_psock *psock,
struct proto *ops)
{
/* Initialize saved callbacks and original proto only once, since this
* function may be called multiple times for a psock, e.g. when
* psock->progs.msg_parser is updated.
*
* Since we've not installed the new proto, psock is not yet in use and
* we can initialize it without synchronization.
*/
if (!psock->sk_proto) {
struct proto *orig = READ_ONCE(sk->sk_prot);
psock->saved_unhash = orig->unhash;
psock->saved_close = orig->close;
psock->saved_write_space = sk->sk_write_space;
psock->sk_proto = orig;
}
/* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, ops);
}
......
......@@ -157,10 +157,11 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
if (bpf_map_is_dev_bound(map)) {
return bpf_map_offload_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
map->map_type == BPF_MAP_TYPE_SOCKHASH ||
map->map_type == BPF_MAP_TYPE_SOCKMAP ||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
return map->ops->map_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
map->map_type == BPF_MAP_TYPE_SOCKMAP) {
return sock_map_update_elem_sys(map, key, value, flags);
} else if (IS_FD_PROG_ARRAY(map)) {
return bpf_fd_array_map_update_elem(map, f.file, key, value,
flags);
......
......@@ -3872,6 +3872,33 @@ static int int_ptr_type_to_size(enum bpf_arg_type type)
return -EINVAL;
}
static int resolve_map_arg_type(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_arg_type *arg_type)
{
if (!meta->map_ptr) {
/* kernel subsystem misconfigured verifier */
verbose(env, "invalid map_ptr to access map->type\n");
return -EACCES;
}
switch (meta->map_ptr->map_type) {
case BPF_MAP_TYPE_SOCKMAP:
case BPF_MAP_TYPE_SOCKHASH:
if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
*arg_type = ARG_PTR_TO_SOCKET;
} else {
verbose(env, "invalid arg_type for sockmap/sockhash\n");
return -EINVAL;
}
break;
default:
break;
}
return 0;
}
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
......@@ -3904,6 +3931,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}
if (arg_type == ARG_PTR_TO_MAP_VALUE ||
arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
err = resolve_map_arg_type(env, meta, &arg_type);
if (err)
return err;
}
if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE ||
arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
......@@ -4143,6 +4178,38 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}
static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
{
enum bpf_attach_type eatype = env->prog->expected_attach_type;
enum bpf_prog_type type = env->prog->type;
if (func_id != BPF_FUNC_map_update_elem)
return false;
/* It's not possible to get access to a locked struct sock in these
* contexts, so updating is safe.
*/
switch (type) {
case BPF_PROG_TYPE_TRACING:
if (eatype == BPF_TRACE_ITER)
return true;
break;
case BPF_PROG_TYPE_SOCKET_FILTER:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_XDP:
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_SK_LOOKUP:
return true;
default:
break;
}
verbose(env, "cannot update sockmap in this context\n");
return false;
}
static int check_map_func_compatibility(struct bpf_verifier_env *env,
struct bpf_map *map, int func_id)
{
......@@ -4214,7 +4281,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_map &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem)
func_id != BPF_FUNC_map_lookup_elem &&
!may_update_sockmap(env, func_id))
goto error;
break;
case BPF_MAP_TYPE_SOCKHASH:
......@@ -4223,7 +4291,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_hash &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem)
func_id != BPF_FUNC_map_lookup_elem &&
!may_update_sockmap(env, func_id))
goto error;
break;
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
......
......@@ -494,14 +494,34 @@ static void sk_psock_backlog(struct work_struct *work)
struct sk_psock *sk_psock_init(struct sock *sk, int node)
{
struct sk_psock *psock = kzalloc_node(sizeof(*psock),
GFP_ATOMIC | __GFP_NOWARN,
node);
if (!psock)
return NULL;
struct sk_psock *psock;
struct proto *prot;
write_lock_bh(&sk->sk_callback_lock);
if (inet_csk_has_ulp(sk)) {
psock = ERR_PTR(-EINVAL);
goto out;
}
if (sk->sk_user_data) {
psock = ERR_PTR(-EBUSY);
goto out;
}
psock = kzalloc_node(sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node);
if (!psock) {
psock = ERR_PTR(-ENOMEM);
goto out;
}
prot = READ_ONCE(sk->sk_prot);
psock->sk = sk;
psock->eval = __SK_NONE;
psock->sk_proto = prot;
psock->saved_unhash = prot->unhash;
psock->saved_close = prot->close;
psock->saved_write_space = sk->sk_write_space;
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
......@@ -516,6 +536,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
rcu_assign_sk_user_data_nocopy(sk, psock);
sock_hold(sk);
out:
write_unlock_bh(&sk->sk_callback_lock);
return psock;
}
EXPORT_SYMBOL_GPL(sk_psock_init);
......
......@@ -184,8 +184,6 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
{
struct proto *prot;
sock_owned_by_me(sk);
switch (sk->sk_type) {
case SOCK_STREAM:
prot = tcp_bpf_get_proto(sk, psock);
......@@ -272,8 +270,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
}
} else {
psock = sk_psock_init(sk, map->numa_node);
if (!psock) {
ret = -ENOMEM;
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
goto out_progs;
}
}
......@@ -322,8 +320,8 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
if (!psock) {
psock = sk_psock_init(sk, map->numa_node);
if (!psock)
return -ENOMEM;
if (IS_ERR(psock))
return PTR_ERR(psock);
}
ret = sock_map_init_proto(sk, psock);
......@@ -478,8 +476,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
if (inet_csk_has_ulp(sk))
return -EINVAL;
link = sk_psock_init_link();
if (!link)
......@@ -563,10 +559,12 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
return false;
}
static int sock_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
static int sock_hash_update_common(struct bpf_map *map, void *key,
struct sock *sk, u64 flags);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
u64 flags)
{
u32 idx = *(u32 *)key;
struct socket *sock;
struct sock *sk;
int ret;
......@@ -595,14 +593,38 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
sock_map_sk_acquire(sk);
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
else
ret = sock_map_update_common(map, idx, sk, flags);
ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
return ret;
}
static int sock_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
struct sock *sk = (struct sock *)value;
int ret;
if (!sock_map_sk_is_suitable(sk))
return -EOPNOTSUPP;
local_bh_disable();
bh_lock_sock(sk);
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
else
ret = sock_hash_update_common(map, key, sk, flags);
bh_unlock_sock(sk);
local_bh_enable();
return ret;
}
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops,
struct bpf_map *, map, void *, key, u64, flags)
{
......@@ -855,8 +877,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
WARN_ON_ONCE(!rcu_read_lock_held());
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
if (inet_csk_has_ulp(sk))
return -EINVAL;
link = sk_psock_init_link();
if (!link)
......@@ -915,45 +935,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
return ret;
}
static int sock_hash_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
struct socket *sock;
struct sock *sk;
int ret;
u64 ufd;
if (map->value_size == sizeof(u64))
ufd = *(u64 *)value;
else
ufd = *(u32 *)value;
if (ufd > S32_MAX)
return -EINVAL;
sock = sockfd_lookup(ufd, &ret);
if (!sock)
return ret;
sk = sock->sk;
if (!sk) {
ret = -EINVAL;
goto out;
}
if (!sock_map_sk_is_suitable(sk)) {
ret = -EOPNOTSUPP;
goto out;
}
sock_map_sk_acquire(sk);
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
else
ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
return ret;
}
static int sock_hash_get_next_key(struct bpf_map *map, void *key,
void *key_next)
{
......@@ -1222,7 +1203,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_alloc = sock_hash_alloc,
.map_free = sock_hash_free,
.map_get_next_key = sock_hash_get_next_key,
.map_update_elem = sock_hash_update_elem,
.map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_hash_delete_elem,
.map_lookup_elem = sock_hash_lookup,
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
......
......@@ -567,10 +567,9 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage;
}
static void tcp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops)
{
if (sk->sk_family == AF_INET6 &&
unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
if (unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
spin_lock_bh(&tcpv6_prot_lock);
if (likely(ops != tcpv6_prot_saved)) {
tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV6], ops);
......@@ -603,13 +602,11 @@ struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
if (!psock->sk_proto) {
struct proto *ops = READ_ONCE(sk->sk_prot);
if (tcp_bpf_assert_proto_ops(ops))
if (sk->sk_family == AF_INET6) {
if (tcp_bpf_assert_proto_ops(psock->sk_proto))
return ERR_PTR(-EINVAL);
tcp_bpf_check_v6_needs_rebuild(sk, ops);
tcp_bpf_check_v6_needs_rebuild(psock->sk_proto);
}
return &tcp_bpf_prots[family][config];
......
......@@ -22,10 +22,9 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
prot->close = sock_map_close;
}
static void udp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
{
if (sk->sk_family == AF_INET6 &&
unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) {
if (unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) {
spin_lock_bh(&udpv6_prot_lock);
if (likely(ops != udpv6_prot_saved)) {
udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV6], ops);
......@@ -46,8 +45,8 @@ struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
{
int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
if (!psock->sk_proto)
udp_bpf_check_v6_needs_rebuild(sk, READ_ONCE(sk->sk_prot));
if (sk->sk_family == AF_INET6)
udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
return &udp_bpf_prots[family];
}
......@@ -4,6 +4,8 @@
#include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h"
#include "test_sockmap_update.skel.h"
#include "test_sockmap_invalid_update.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
......@@ -101,6 +103,76 @@ static void test_skmsg_helpers(enum bpf_map_type map_type)
test_skmsg_load_helpers__destroy(skel);
}
static void test_sockmap_update(enum bpf_map_type map_type)
{
struct bpf_prog_test_run_attr tattr;
int err, prog, src, dst, duration = 0;
struct test_sockmap_update *skel;
__u64 src_cookie, dst_cookie;
const __u32 zero = 0;
char dummy[14] = {0};
__s64 sk;
sk = connected_socket_v4();
if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n"))
return;
skel = test_sockmap_update__open_and_load();
if (CHECK(!skel, "open_and_load", "cannot load skeleton\n")) {
close(sk);
return;
}
prog = bpf_program__fd(skel->progs.copy_sock_map);
src = bpf_map__fd(skel->maps.src);
if (map_type == BPF_MAP_TYPE_SOCKMAP)
dst = bpf_map__fd(skel->maps.dst_sock_map);
else
dst = bpf_map__fd(skel->maps.dst_sock_hash);
err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
goto out;
err = bpf_map_lookup_elem(src, &zero, &src_cookie);
if (CHECK(err, "lookup_elem(src, cookie)", "errno=%u\n", errno))
goto out;
tattr = (struct bpf_prog_test_run_attr){
.prog_fd = prog,
.repeat = 1,
.data_in = dummy,
.data_size_in = sizeof(dummy),
};
err = bpf_prog_test_run_xattr(&tattr);
if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
"errno=%u retval=%u\n", errno, tattr.retval))
goto out;
err = bpf_map_lookup_elem(dst, &zero, &dst_cookie);
if (CHECK(err, "lookup_elem(dst, cookie)", "errno=%u\n", errno))
goto out;
CHECK(dst_cookie != src_cookie, "cookie mismatch", "%llu != %llu\n",
dst_cookie, src_cookie);
out:
close(sk);
test_sockmap_update__destroy(skel);
}
static void test_sockmap_invalid_update(void)
{
struct test_sockmap_invalid_update *skel;
int duration = 0;
skel = test_sockmap_invalid_update__open_and_load();
CHECK(skel, "open_and_load", "verifier accepted map_update\n");
if (skel)
test_sockmap_invalid_update__destroy(skel);
}
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
......@@ -111,4 +183,10 @@ void test_sockmap_basic(void)
test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash sk_msg load helpers"))
test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
if (test__start_subtest("sockmap update"))
test_sockmap_update(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash update"))
test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
if (test__start_subtest("sockmap update in unsafe context"))
test_sockmap_invalid_update();
}
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} map SEC(".maps");
SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 key = 0;
if (skops->sk)
bpf_map_update_elem(&map, &key, skops->sk, 0);
return 0;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} src SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} dst_sock_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKHASH);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} dst_sock_hash SEC(".maps");
SEC("classifier/copy_sock_map")
int copy_sock_map(void *ctx)
{
struct bpf_sock *sk;
bool failed = false;
__u32 key = 0;
sk = bpf_map_lookup_elem(&src, &key);
if (!sk)
return SK_DROP;
if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0))
failed = true;
if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0))
failed = true;
bpf_sk_release(sk);
return failed ? SK_DROP : SK_PASS;
}
char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment