Commit 34f79502 authored by John Fastabend's avatar John Fastabend Committed by David S. Miller

bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region

SK_SKB BPF programs are run from the socket/tcp context but early in
the stack before much of the TCP metadata is needed in tcp_skb_cb. So
we can use some unused fields to place BPF metadata needed for SK_SKB
programs when implementing the redirect function.

This allows us to drop the preempt disable logic. It does however
require an API change so sk_redirect_map() has been updated to
additionally provide ctx_ptr to skb. Note, we do however continue to
disable/enable preemption around actual BPF program running to account
for map updates.
Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 435bf0d3
...@@ -728,7 +728,7 @@ void xdp_do_flush_map(void); ...@@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex); void bpf_warn_invalid_xdp_redirect(u32 ifindex);
struct sock *do_sk_redirect_map(void); struct sock *do_sk_redirect_map(struct sk_buff *skb);
#ifdef CONFIG_BPF_JIT #ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable; extern int bpf_jit_enable;
......
...@@ -840,6 +840,11 @@ struct tcp_skb_cb { ...@@ -840,6 +840,11 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6; struct inet6_skb_parm h6;
#endif #endif
} header; /* For incoming skbs */ } header; /* For incoming skbs */
struct {
__u32 key;
__u32 flags;
struct bpf_map *map;
} bpf;
}; };
}; };
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/list.h> #include <linux/list.h>
#include <net/strparser.h> #include <net/strparser.h>
#include <net/tcp.h>
struct bpf_stab { struct bpf_stab {
struct bpf_map map; struct bpf_map map;
...@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) ...@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
return SK_DROP; return SK_DROP;
skb_orphan(skb); skb_orphan(skb);
/* We need to ensure that BPF metadata for maps is also cleared
* when we orphan the skb so that we don't have the possibility
* to reference a stale map.
*/
TCP_SKB_CB(skb)->bpf.map = NULL;
skb->sk = psock->sock; skb->sk = psock->sock;
bpf_compute_data_end(skb); bpf_compute_data_end(skb);
preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi); rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable();
skb->sk = NULL; skb->sk = NULL;
return rc; return rc;
...@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) ...@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
struct sock *sk; struct sock *sk;
int rc; int rc;
/* Because we use per cpu values to feed input from sock redirect
* in BPF program to do_sk_redirect_map() call we need to ensure we
* are not preempted. RCU read lock is not sufficient in this case
* with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
*/
preempt_disable();
rc = smap_verdict_func(psock, skb); rc = smap_verdict_func(psock, skb);
switch (rc) { switch (rc) {
case SK_REDIRECT: case SK_REDIRECT:
sk = do_sk_redirect_map(); sk = do_sk_redirect_map(skb);
preempt_enable();
if (likely(sk)) { if (likely(sk)) {
struct smap_psock *peer = smap_psock_sk(sk); struct smap_psock *peer = smap_psock_sk(sk);
...@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) ...@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
/* Fall through and free skb otherwise */ /* Fall through and free skb otherwise */
case SK_DROP: case SK_DROP:
default: default:
if (rc != SK_REDIRECT)
preempt_enable();
kfree_skb(skb); kfree_skb(skb);
} }
} }
......
...@@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = { ...@@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING,
}; };
BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags) BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct bpf_map *, map, u32, key, u64, flags)
{ {
struct redirect_info *ri = this_cpu_ptr(&redirect_info); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
if (unlikely(flags)) if (unlikely(flags))
return SK_ABORTED; return SK_ABORTED;
ri->ifindex = key; tcb->bpf.key = key;
ri->flags = flags; tcb->bpf.flags = flags;
ri->map = map; tcb->bpf.map = map;
return SK_REDIRECT; return SK_REDIRECT;
} }
struct sock *do_sk_redirect_map(void) struct sock *do_sk_redirect_map(struct sk_buff *skb)
{ {
struct redirect_info *ri = this_cpu_ptr(&redirect_info); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk = NULL; struct sock *sk = NULL;
if (ri->map) { if (tcb->bpf.map) {
sk = __sock_map_lookup_elem(ri->map, ri->ifindex); sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
ri->ifindex = 0; tcb->bpf.key = 0;
ri->map = NULL; tcb->bpf.map = NULL;
/* we do not clear flags for future lookup */
} }
return sk; return sk;
...@@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { ...@@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.func = bpf_sk_redirect_map, .func = bpf_sk_redirect_map,
.gpl_only = false, .gpl_only = false,
.ret_type = RET_INTEGER, .ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR, .arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
.arg4_type = ARG_ANYTHING,
}; };
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
......
...@@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb) ...@@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb)
ret = 1; ret = 1;
bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
return bpf_sk_redirect_map(&sock_map, ret, 0); return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
} }
SEC("sockops") SEC("sockops")
......
...@@ -569,9 +569,10 @@ union bpf_attr { ...@@ -569,9 +569,10 @@ union bpf_attr {
* @flags: reserved for future use * @flags: reserved for future use
* Return: 0 on success or negative error code * Return: 0 on success or negative error code
* *
* int bpf_sk_redirect_map(map, key, flags) * int bpf_sk_redirect_map(skb, map, key, flags)
* Redirect skb to a sock in map using key as a lookup key for the * Redirect skb to a sock in map using key as a lookup key for the
* sock in map. * sock in map.
* @skb: pointer to skb
* @map: pointer to sockmap * @map: pointer to sockmap
* @key: key to lookup sock in map * @key: key to lookup sock in map
* @flags: reserved for future use * @flags: reserved for future use
......
...@@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = ...@@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) = int optlen) =
(void *) BPF_FUNC_setsockopt; (void *) BPF_FUNC_setsockopt;
static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map; (void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value, static int (*bpf_sock_map_update)(void *map, void *key, void *value,
unsigned long long flags) = unsigned long long flags) =
......
...@@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb) ...@@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb)
bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
if (!map) if (!map)
return bpf_sk_redirect_map(&sock_map_rx, sk, 0); return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
return bpf_sk_redirect_map(&sock_map_tx, sk, 0); return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
} }
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment