Commit 31de4105 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Daniel Borkmann

bpf: Add BPF_FIB_LOOKUP_SKIP_NEIGH for bpf_fib_lookup

The bpf_fib_lookup() also looks up the neigh table.
This was done before bpf_redirect_neigh() was added.

In the use case that does not manage the neigh table
and requires bpf_fib_lookup() to lookup a fib to
decide if it needs to redirect or not, the bpf prog can
depend only on using bpf_redirect_neigh() to lookup the
neigh. It also keeps the neigh entries fresh and connected.

This patch adds a bpf_fib_lookup flag, SKIP_NEIGH, to avoid
the double neigh lookup when the bpf prog always call
bpf_redirect_neigh() to do the neigh lookup. The params->smac
output is skipped together when SKIP_NEIGH is set because
bpf_redirect_neigh() will figure out the smac also.
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230217205515.3583372-1-martin.lau@linux.dev
parent 49b5e77a
...@@ -3134,6 +3134,11 @@ union bpf_attr { ...@@ -3134,6 +3134,11 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_OUTPUT** * **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is * Perform lookup from an egress perspective (default is
* ingress). * ingress).
* **BPF_FIB_LOOKUP_SKIP_NEIGH**
* Skip the neighbour table lookup. *params*->dmac
* and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ ().
* *
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
...@@ -6750,6 +6755,7 @@ struct bpf_raw_tracepoint_args { ...@@ -6750,6 +6755,7 @@ struct bpf_raw_tracepoint_args {
enum { enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
}; };
enum { enum {
......
...@@ -5722,12 +5722,8 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { ...@@ -5722,12 +5722,8 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
#endif #endif
#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu)
const struct neighbour *neigh,
const struct net_device *dev, u32 mtu)
{ {
memcpy(params->dmac, neigh->ha, ETH_ALEN);
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0; params->h_vlan_TCI = 0;
params->h_vlan_proto = 0; params->h_vlan_proto = 0;
if (mtu) if (mtu)
...@@ -5838,21 +5834,29 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, ...@@ -5838,21 +5834,29 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (likely(nhc->nhc_gw_family != AF_INET6)) { if (likely(nhc->nhc_gw_family != AF_INET6)) {
if (nhc->nhc_gw_family) if (nhc->nhc_gw_family)
params->ipv4_dst = nhc->nhc_gw.ipv4; params->ipv4_dst = nhc->nhc_gw.ipv4;
neigh = __ipv4_neigh_lookup_noref(dev,
(__force u32)params->ipv4_dst);
} else { } else {
struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst; struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
params->family = AF_INET6; params->family = AF_INET6;
*dst = nhc->nhc_gw.ipv6; *dst = nhc->nhc_gw.ipv6;
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
} }
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
goto set_fwd_params;
if (likely(nhc->nhc_gw_family != AF_INET6))
neigh = __ipv4_neigh_lookup_noref(dev,
(__force u32)params->ipv4_dst);
else
neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
if (!neigh || !(neigh->nud_state & NUD_VALID)) if (!neigh || !(neigh->nud_state & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH; return BPF_FIB_LKUP_RET_NO_NEIGH;
memcpy(params->dmac, neigh->ha, ETH_ALEN);
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
return bpf_fib_set_fwd_params(params, neigh, dev, mtu); set_fwd_params:
return bpf_fib_set_fwd_params(params, mtu);
} }
#endif #endif
...@@ -5960,24 +5964,33 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, ...@@ -5960,24 +5964,33 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric; params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex; params->ifindex = dev->ifindex;
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
goto set_fwd_params;
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here. * not needed here.
*/ */
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
if (!neigh || !(neigh->nud_state & NUD_VALID)) if (!neigh || !(neigh->nud_state & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH; return BPF_FIB_LKUP_RET_NO_NEIGH;
memcpy(params->dmac, neigh->ha, ETH_ALEN);
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
return bpf_fib_set_fwd_params(params, neigh, dev, mtu); set_fwd_params:
return bpf_fib_set_fwd_params(params, mtu);
} }
#endif #endif
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
BPF_FIB_LOOKUP_SKIP_NEIGH)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags) struct bpf_fib_lookup *, params, int, plen, u32, flags)
{ {
if (plen < sizeof(*params)) if (plen < sizeof(*params))
return -EINVAL; return -EINVAL;
if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) if (flags & ~BPF_FIB_LOOKUP_MASK)
return -EINVAL; return -EINVAL;
switch (params->family) { switch (params->family) {
...@@ -6015,7 +6028,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, ...@@ -6015,7 +6028,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
if (plen < sizeof(*params)) if (plen < sizeof(*params))
return -EINVAL; return -EINVAL;
if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) if (flags & ~BPF_FIB_LOOKUP_MASK)
return -EINVAL; return -EINVAL;
if (params->tot_len) if (params->tot_len)
......
...@@ -3134,6 +3134,11 @@ union bpf_attr { ...@@ -3134,6 +3134,11 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_OUTPUT** * **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is * Perform lookup from an egress perspective (default is
* ingress). * ingress).
* **BPF_FIB_LOOKUP_SKIP_NEIGH**
* Skip the neighbour table lookup. *params*->dmac
* and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ ().
* *
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
...@@ -6750,6 +6755,7 @@ struct bpf_raw_tracepoint_args { ...@@ -6750,6 +6755,7 @@ struct bpf_raw_tracepoint_args {
enum { enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
}; };
enum { enum {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment