Commit 1ef09e12 authored by Martin KaFai Lau's avatar Martin KaFai Lau

Merge branch 'bpf: Fix src IP addr related limitation in bpf_*_fib_lookup()'

Martynas Pumputis says:

====================
The patchset fixes the limitation of bpf_*_fib_lookup() helper, which
prevents it from being used in BPF dataplanes with network interfaces
which have more than one IP addr. See the first patch for more details.
Thanks!

* v2->v3: Address Martin KaFai Lau's feedback
* v1->v2: Use IPv6 stubs to fix compilation when CONFIG_IPV6=m.
====================
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents 1be84ca5 b0f7a8ca
...@@ -85,6 +85,11 @@ struct ipv6_bpf_stub { ...@@ -85,6 +85,11 @@ struct ipv6_bpf_stub {
sockptr_t optval, unsigned int optlen); sockptr_t optval, unsigned int optlen);
int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, sockptr_t optlen); sockptr_t optval, sockptr_t optlen);
int (*ipv6_dev_get_saddr)(struct net *net,
const struct net_device *dst_dev,
const struct in6_addr *daddr,
unsigned int prefs,
struct in6_addr *saddr);
}; };
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
......
...@@ -3264,6 +3264,11 @@ union bpf_attr { ...@@ -3264,6 +3264,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common * and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after * use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ (). * doing **bpf_fib_lookup**\ ().
* **BPF_FIB_LOOKUP_SRC**
* Derive and set source IP addr in *params*->ipv{4,6}_src
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
* *
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
...@@ -6964,6 +6969,7 @@ enum { ...@@ -6964,6 +6969,7 @@ enum {
BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3), BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
}; };
enum { enum {
...@@ -6976,6 +6982,7 @@ enum { ...@@ -6976,6 +6982,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
}; };
struct bpf_fib_lookup { struct bpf_fib_lookup {
...@@ -7010,6 +7017,9 @@ struct bpf_fib_lookup { ...@@ -7010,6 +7017,9 @@ struct bpf_fib_lookup {
__u32 rt_metric; __u32 rt_metric;
}; };
/* input: source address to consider for lookup
* output: source address result from lookup
*/
union { union {
__be32 ipv4_src; __be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */ __u32 ipv6_src[4]; /* in6_addr; network order */
......
...@@ -5850,6 +5850,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, ...@@ -5850,6 +5850,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.fi->fib_priority; params->rt_metric = res.fi->fib_priority;
params->ifindex = dev->ifindex; params->ifindex = dev->ifindex;
if (flags & BPF_FIB_LOOKUP_SRC)
params->ipv4_src = fib_result_prefsrc(net, &res);
/* xdp and cls_bpf programs are run in RCU-bh so /* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here * rcu_read_lock_bh is not needed here
*/ */
...@@ -5992,6 +5995,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, ...@@ -5992,6 +5995,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric; params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex; params->ifindex = dev->ifindex;
if (flags & BPF_FIB_LOOKUP_SRC) {
if (res.f6i->fib6_prefsrc.plen) {
*src = res.f6i->fib6_prefsrc.addr;
} else {
err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
&fl6.daddr, 0,
src);
if (err)
return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
}
}
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
goto set_fwd_params; goto set_fwd_params;
...@@ -6010,7 +6025,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, ...@@ -6010,7 +6025,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
#endif #endif
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
BPF_FIB_LOOKUP_SRC)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags) struct bpf_fib_lookup *, params, int, plen, u32, flags)
......
...@@ -1061,6 +1061,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { ...@@ -1061,6 +1061,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.udp6_lib_lookup = __udp6_lib_lookup, .udp6_lib_lookup = __udp6_lib_lookup,
.ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_setsockopt = do_ipv6_setsockopt,
.ipv6_getsockopt = do_ipv6_getsockopt, .ipv6_getsockopt = do_ipv6_getsockopt,
.ipv6_dev_get_saddr = ipv6_dev_get_saddr,
}; };
static int __init inet6_init(void) static int __init inet6_init(void)
......
...@@ -3264,6 +3264,11 @@ union bpf_attr { ...@@ -3264,6 +3264,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common * and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after * use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ (). * doing **bpf_fib_lookup**\ ().
* **BPF_FIB_LOOKUP_SRC**
* Derive and set source IP addr in *params*->ipv{4,6}_src
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
* *
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
...@@ -6964,6 +6969,7 @@ enum { ...@@ -6964,6 +6969,7 @@ enum {
BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3), BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
}; };
enum { enum {
...@@ -6976,6 +6982,7 @@ enum { ...@@ -6976,6 +6982,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
}; };
struct bpf_fib_lookup { struct bpf_fib_lookup {
...@@ -7010,6 +7017,9 @@ struct bpf_fib_lookup { ...@@ -7010,6 +7017,9 @@ struct bpf_fib_lookup {
__u32 rt_metric; __u32 rt_metric;
}; };
/* input: source address to consider for lookup
* output: source address result from lookup
*/
union { union {
__be32 ipv4_src; __be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */ __u32 ipv6_src[4]; /* in6_addr; network order */
......
...@@ -11,9 +11,13 @@ ...@@ -11,9 +11,13 @@
#define NS_TEST "fib_lookup_ns" #define NS_TEST "fib_lookup_ns"
#define IPV6_IFACE_ADDR "face::face" #define IPV6_IFACE_ADDR "face::face"
#define IPV6_IFACE_ADDR_SEC "cafe::cafe"
#define IPV6_ADDR_DST "face::3"
#define IPV6_NUD_FAILED_ADDR "face::1" #define IPV6_NUD_FAILED_ADDR "face::1"
#define IPV6_NUD_STALE_ADDR "face::2" #define IPV6_NUD_STALE_ADDR "face::2"
#define IPV4_IFACE_ADDR "10.0.0.254" #define IPV4_IFACE_ADDR "10.0.0.254"
#define IPV4_IFACE_ADDR_SEC "10.1.0.254"
#define IPV4_ADDR_DST "10.2.0.254"
#define IPV4_NUD_FAILED_ADDR "10.0.0.1" #define IPV4_NUD_FAILED_ADDR "10.0.0.1"
#define IPV4_NUD_STALE_ADDR "10.0.0.2" #define IPV4_NUD_STALE_ADDR "10.0.0.2"
#define IPV4_TBID_ADDR "172.0.0.254" #define IPV4_TBID_ADDR "172.0.0.254"
...@@ -31,6 +35,7 @@ struct fib_lookup_test { ...@@ -31,6 +35,7 @@ struct fib_lookup_test {
const char *desc; const char *desc;
const char *daddr; const char *daddr;
int expected_ret; int expected_ret;
const char *expected_src;
int lookup_flags; int lookup_flags;
__u32 tbid; __u32 tbid;
__u8 dmac[6]; __u8 dmac[6];
...@@ -69,6 +74,22 @@ static const struct fib_lookup_test tests[] = { ...@@ -69,6 +74,22 @@ static const struct fib_lookup_test tests[] = {
.daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
.dmac = DMAC_INIT2, }, .dmac = DMAC_INIT2, },
{ .desc = "IPv4 set src addr from netdev",
.daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.expected_src = IPV4_IFACE_ADDR,
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
{ .desc = "IPv6 set src addr from netdev",
.daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.expected_src = IPV6_IFACE_ADDR,
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
{ .desc = "IPv4 set prefsrc addr from route",
.daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.expected_src = IPV4_IFACE_ADDR_SEC,
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
{ .desc = "IPv6 set prefsrc addr route",
.daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.expected_src = IPV6_IFACE_ADDR_SEC,
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
}; };
static int ifindex; static int ifindex;
...@@ -97,6 +118,13 @@ static int setup_netns(void) ...@@ -97,6 +118,13 @@ static int setup_netns(void)
SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
/* Setup for prefsrc IP addr selection */
SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC);
SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC);
SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC);
SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC);
/* Setup for tbid lookup tests */ /* Setup for tbid lookup tests */
SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR); SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR);
SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET); SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET);
...@@ -133,9 +161,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo ...@@ -133,9 +161,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) { if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
params->family = AF_INET6; params->family = AF_INET6;
ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
return -1; if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
return -1;
}
return 0; return 0;
} }
...@@ -143,9 +174,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo ...@@ -143,9 +174,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
if (!ASSERT_EQ(ret, 1, "convert IP[46] address")) if (!ASSERT_EQ(ret, 1, "convert IP[46] address"))
return -1; return -1;
params->family = AF_INET; params->family = AF_INET;
ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
return -1; ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
return -1;
}
return 0; return 0;
} }
...@@ -156,6 +190,40 @@ static void mac_str(char *b, const __u8 *mac) ...@@ -156,6 +190,40 @@ static void mac_str(char *b, const __u8 *mac)
mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
} }
static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src)
{
int ret;
__u32 src6[4];
__be32 src4;
switch (fib_params->family) {
case AF_INET6:
ret = inet_pton(AF_INET6, expected_src, src6);
ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src));
if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) {
char str_src6[64];
inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6,
sizeof(str_src6));
printf("ipv6 expected %s actual %s ", expected_src,
str_src6);
}
break;
case AF_INET:
ret = inet_pton(AF_INET, expected_src, &src4);
ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src");
break;
default:
PRINT_FAIL("invalid addr family: %d", fib_params->family);
}
}
void test_fib_lookup(void) void test_fib_lookup(void)
{ {
struct bpf_fib_lookup *fib_params; struct bpf_fib_lookup *fib_params;
...@@ -207,6 +275,9 @@ void test_fib_lookup(void) ...@@ -207,6 +275,9 @@ void test_fib_lookup(void)
ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret, ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret,
"fib_lookup_ret"); "fib_lookup_ret");
if (tests[i].expected_src)
assert_src_ip(fib_params, tests[i].expected_src);
ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac)); ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
if (!ASSERT_EQ(ret, 0, "dmac not match")) { if (!ASSERT_EQ(ret, 0, "dmac not match")) {
char expected[18], actual[18]; char expected[18], actual[18];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment