Commit e95c6cf4 authored by David S. Miller's avatar David S. Miller

Merge branch 'sockmap-fixes'

John Fastabend says:

====================
sockmap fixes for net

The following implements a set of fixes for sockmap and changes the
API slightly in a few places to reduce preempt_disable/enable scope.
We do this here in net because it requires an API change and this
avoids getting stuck with legacy API going forward.

The short description:

Access to skb mark is removed, it is problematic when we add
features in the future because mark is a union and used by the
TCP/socket code internally. We don't want to expose this to the
BPF programs or let programs change the values.

The other change is caching metadata in the skb itself between
when the BPF program returns a redirect code and the core code
implements the redirect. This avoids having per cpu metadata.

Finally, tighten restriction on using sockmap to CAP_NET_ADMIN and
only SOCK_STREAM sockets.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1cc276ce 9ef2a8cd
......@@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex);
struct sock *do_sk_redirect_map(void);
struct sock *do_sk_redirect_map(struct sk_buff *skb);
#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
......
......@@ -840,6 +840,11 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6;
#endif
} header; /* For incoming skbs */
struct {
__u32 key;
__u32 flags;
struct bpf_map *map;
} bpf;
};
};
......
......@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
int err = -EINVAL;
u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
......
......@@ -39,6 +39,7 @@
#include <linux/workqueue.h>
#include <linux/list.h>
#include <net/strparser.h>
#include <net/tcp.h>
struct bpf_stab {
struct bpf_map map;
......@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
return SK_DROP;
skb_orphan(skb);
/* We need to ensure that BPF metadata for maps is also cleared
* when we orphan the skb so that we don't have the possibility
* to reference a stale map.
*/
TCP_SKB_CB(skb)->bpf.map = NULL;
skb->sk = psock->sock;
bpf_compute_data_end(skb);
preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable();
skb->sk = NULL;
return rc;
......@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
struct sock *sk;
int rc;
/* Because we use per cpu values to feed input from sock redirect
* in BPF program to do_sk_redirect_map() call we need to ensure we
* are not preempted. RCU read lock is not sufficient in this case
* with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
*/
preempt_disable();
rc = smap_verdict_func(psock, skb);
switch (rc) {
case SK_REDIRECT:
sk = do_sk_redirect_map();
preempt_enable();
sk = do_sk_redirect_map(skb);
if (likely(sk)) {
struct smap_psock *peer = smap_psock_sk(sk);
......@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
/* Fall through and free skb otherwise */
case SK_DROP:
default:
if (rc != SK_REDIRECT)
preempt_enable();
kfree_skb(skb);
}
}
......@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
int err = -EINVAL;
u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
......@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EINVAL;
}
if (skops.sk->sk_type != SOCK_STREAM ||
skops.sk->sk_protocol != IPPROTO_TCP) {
fput(socket->file);
return -EOPNOTSUPP;
}
err = sock_map_ctx_update_elem(&skops, map, key, flags);
fput(socket->file);
return err;
......
......@@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct bpf_map *, map, u32, key, u64, flags)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
if (unlikely(flags))
return SK_ABORTED;
ri->ifindex = key;
ri->flags = flags;
ri->map = map;
tcb->bpf.key = key;
tcb->bpf.flags = flags;
tcb->bpf.map = map;
return SK_REDIRECT;
}
struct sock *do_sk_redirect_map(void)
struct sock *do_sk_redirect_map(struct sk_buff *skb)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk = NULL;
if (ri->map) {
sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
if (tcb->bpf.map) {
sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
ri->ifindex = 0;
ri->map = NULL;
/* we do not clear flags for future lookup */
tcb->bpf.key = 0;
tcb->bpf.map = NULL;
}
return sk;
......@@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.func = bpf_sk_redirect_map,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_ANYTHING,
};
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
......@@ -3683,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size,
{
if (type == BPF_WRITE) {
switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_index):
case bpf_ctx_range(struct __sk_buff, priority):
break;
......@@ -3693,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size,
}
switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_classid):
return false;
case bpf_ctx_range(struct __sk_buff, data):
......
......@@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb)
ret = 1;
bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
return bpf_sk_redirect_map(&sock_map, ret, 0);
return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
}
SEC("sockops")
......
......@@ -569,9 +569,10 @@ union bpf_attr {
* @flags: reserved for future use
* Return: 0 on success or negative error code
*
* int bpf_sk_redirect_map(map, key, flags)
* int bpf_sk_redirect_map(skb, map, key, flags)
* Redirect skb to a sock in map using key as a lookup key for the
* sock in map.
* @skb: pointer to skb
* @map: pointer to sockmap
* @key: key to lookup sock in map
* @flags: reserved for future use
......
......@@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_setsockopt;
static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
unsigned long long flags) =
......
......@@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb)
bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
if (!map)
return bpf_sk_redirect_map(&sock_map_rx, sk, 0);
return bpf_sk_redirect_map(&sock_map_tx, sk, 0);
return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
}
char _license[] SEC("license") = "GPL";
......@@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data)
int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, sfd[6] = {0xdeadbeef};
int err, i, fd, udp, sfd[6] = {0xdeadbeef};
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
int parse_prog, verdict_prog;
struct sockaddr_in addr;
......@@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data)
goto out_sockmap;
}
/* Test update with unsupported UDP socket */
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
if (!err) {
printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
i, udp);
goto out_sockmap;
}
/* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
......
......@@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = {
.errstr = "invalid bpf_context access",
},
{
"check skb->mark is writeable by SK_SKB",
"invalid access of skb->mark for SK_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
.errstr = "invalid bpf_context access",
},
{
"check skb->mark is not writeable by SK_SKB",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
.errstr = "invalid bpf_context access",
},
{
"check skb->tc_index is writeable by SK_SKB",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment