Commit 4429bdc4 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'New BPF helpers to accelerate synproxy'

Maxim Mikityanskiy says:

====================

The first patch of this series is a documentation fix.

The second patch allows BPF helpers to accept memory regions of fixed
size without doing runtime size checks.

The two next patches add new functionality that allows XDP to
accelerate iptables synproxy.

v1 of this series [1] used to include a patch that exposed conntrack
lookup to BPF using stable helpers. It was superseded by series [2] by
Kumar Kartikeya Dwivedi, which implements this functionality using
unstable helpers.

The third patch adds new helpers to issue and check SYN cookies without
binding to a socket, which is useful in the synproxy scenario.

The fourth patch adds a selftest, which includes an XDP program and a
userspace control application. The XDP program uses socketless SYN
cookie helpers and queries conntrack status instead of socket status.
The userspace control application allows to tune parameters of the XDP
program. This program also serves as a minimal example of usage of the
new functionality.

The last two patches expose the new helpers to TC BPF and extend the
selftest.

The draft of the new functionality was presented on Netdev 0x15 [3].

v2 changes:

Split into two series, submitted bugfixes to bpf, dropped the conntrack
patches, implemented the timestamp cookie in BPF using bpf_loop, dropped
the timestamp cookie patch.

v3 changes:

Moved some patches from bpf to bpf-next, dropped the patch that changed
error codes, split the new helpers into IPv4/IPv6, added verifier
functionality to accept memory regions of fixed size.

v4 changes:

Converted the selftest to the test_progs runner. Replaced some
deprecated functions in xdp_synproxy userspace helper.

v5 changes:

Fixed a bug in the selftest. Added questionable functionality to support
new helpers in TC BPF, added selftests for it.

v6 changes:

Wrap the new helpers themselves into #ifdef CONFIG_SYN_COOKIES, replaced
fclose with pclose and fixed the MSS for IPv6 in the selftest.

v7 changes:

Fixed the off-by-one error in indices, changed the section name to
"xdp", added missing kernel config options to vmtest in CI.

v8 changes:

Properly rebased, dropped the first patch (the same change was applied
by someone else), updated the cover letter.

v9 changes:

Fixed selftests for no_alu32.

v10 changes:

Selftests for s390x were blacklisted due to lack of support of kfunc,
rebased the series, split selftests to separate commits, created
ARG_PTR_TO_FIXED_SIZE_MEM and packed arg_size, addressed the rest of
comments.

[1]: https://lore.kernel.org/bpf/20211020095815.GJ28644@breakpoint.cc/t/
[2]: https://lore.kernel.org/bpf/20220114163953.1455836-1-memxor@gmail.com/
[3]: https://netdevconf.info/0x15/session.html?Accelerating-synproxy-with-XDP
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 88bf1858 784d5dc0
...@@ -401,6 +401,9 @@ enum bpf_type_flag { ...@@ -401,6 +401,9 @@ enum bpf_type_flag {
/* DYNPTR points to a ringbuf record. */ /* DYNPTR points to a ringbuf record. */
DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS),
/* Size is known at compile time. */
MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX, __BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
}; };
...@@ -464,6 +467,8 @@ enum bpf_arg_type { ...@@ -464,6 +467,8 @@ enum bpf_arg_type {
* all bytes or clear them in error case. * all bytes or clear them in error case.
*/ */
ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM,
/* Pointer to valid memory of size known at compile time. */
ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM,
/* This must be the last entry. Its purpose is to ensure the enum is /* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag. * wide enough to hold the higher bits reserved for bpf_type_flag.
...@@ -529,6 +534,14 @@ struct bpf_func_proto { ...@@ -529,6 +534,14 @@ struct bpf_func_proto {
u32 *arg5_btf_id; u32 *arg5_btf_id;
}; };
u32 *arg_btf_id[5]; u32 *arg_btf_id[5];
struct {
size_t arg1_size;
size_t arg2_size;
size_t arg3_size;
size_t arg4_size;
size_t arg5_size;
};
size_t arg_size[5];
}; };
int *ret_btf_id; /* return value btf_id */ int *ret_btf_id; /* return value btf_id */
bool (*allowed)(const struct bpf_prog *prog); bool (*allowed)(const struct bpf_prog *prog);
......
...@@ -432,6 +432,7 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, ...@@ -432,6 +432,7 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
struct tcphdr *th, u32 *cookie); struct tcphdr *th, u32 *cookie);
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
struct tcphdr *th, u32 *cookie); struct tcphdr *th, u32 *cookie);
u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss);
u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops, const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct tcphdr *th); struct sock *sk, struct tcphdr *th);
......
...@@ -3597,10 +3597,11 @@ union bpf_attr { ...@@ -3597,10 +3597,11 @@ union bpf_attr {
* *
* *iph* points to the start of the IPv4 or IPv6 header, while * *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or * *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**). * **sizeof**\ (**struct ipv6hdr**).
* *
* *th* points to the start of the TCP header, while *th_len* * *th* points to the start of the TCP header, while *th_len*
* contains **sizeof**\ (**struct tcphdr**). * contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return * Return
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise. * error otherwise.
...@@ -3783,10 +3784,11 @@ union bpf_attr { ...@@ -3783,10 +3784,11 @@ union bpf_attr {
* *
* *iph* points to the start of the IPv4 or IPv6 header, while * *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or * *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**). * **sizeof**\ (**struct ipv6hdr**).
* *
* *th* points to the start of the TCP header, while *th_len* * *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header. * contains the length of the TCP header with options (at least
* **sizeof**\ (**struct tcphdr**)).
* Return * Return
* On success, lower 32 bits hold the generated SYN cookie in * On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie, * followed by 16 bits which hold the MSS value for that cookie,
...@@ -5249,6 +5251,80 @@ union bpf_attr { ...@@ -5249,6 +5251,80 @@ union bpf_attr {
* Pointer to the underlying dynptr data, NULL if the dynptr is * Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length * read-only, if the dynptr is invalid, or if the offset and length
* is out of bounds. * is out of bounds.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IPv4/TCP headers, *iph* and *th*, without depending on a
* listening socket.
*
* *iph* points to the IPv4 header.
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if *th_len* is invalid.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IPv6/TCP headers, *iph* and *th*, without depending on a
* listening socket.
*
* *iph* points to the IPv6 header.
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if *th_len* is invalid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*
* long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK
* without depending on a listening socket.
*
* *iph* points to the IPv4 header.
*
* *th* points to the TCP header.
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK.
*
* On failure, the returned value is one of the following:
*
* **-EACCES** if the SYN cookie is not valid.
*
* long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK
* without depending on a listening socket.
*
* *iph* points to the IPv6 header.
*
* *th* points to the TCP header.
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK.
*
* On failure, the returned value is one of the following:
*
* **-EACCES** if the SYN cookie is not valid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -5455,6 +5531,10 @@ union bpf_attr { ...@@ -5455,6 +5531,10 @@ union bpf_attr {
FN(dynptr_read), \ FN(dynptr_read), \
FN(dynptr_write), \ FN(dynptr_write), \
FN(dynptr_data), \ FN(dynptr_data), \
FN(tcp_raw_gen_syncookie_ipv4), \
FN(tcp_raw_gen_syncookie_ipv6), \
FN(tcp_raw_check_syncookie_ipv4), \
FN(tcp_raw_check_syncookie_ipv6), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
...@@ -5848,6 +5848,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, ...@@ -5848,6 +5848,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno]; struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_arg_type arg_type = fn->arg_type[arg]; enum bpf_arg_type arg_type = fn->arg_type[arg];
enum bpf_reg_type type = reg->type; enum bpf_reg_type type = reg->type;
u32 *arg_btf_id = NULL;
int err = 0; int err = 0;
if (arg_type == ARG_DONTCARE) if (arg_type == ARG_DONTCARE)
...@@ -5884,7 +5885,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, ...@@ -5884,7 +5885,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
*/ */
goto skip_type_check; goto skip_type_check;
err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta); /* arg_btf_id and arg_size are in a union. */
if (base_type(arg_type) == ARG_PTR_TO_BTF_ID)
arg_btf_id = fn->arg_btf_id[arg];
err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
if (err) if (err)
return err; return err;
...@@ -6011,6 +6016,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, ...@@ -6011,6 +6016,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
* next is_mem_size argument below. * next is_mem_size argument below.
*/ */
meta->raw_mode = arg_type & MEM_UNINIT; meta->raw_mode = arg_type & MEM_UNINIT;
if (arg_type & MEM_FIXED_SIZE) {
err = check_helper_mem_access(env, regno,
fn->arg_size[arg], false,
meta);
}
} else if (arg_type_is_mem_size(arg_type)) { } else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
...@@ -6400,11 +6410,19 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn) ...@@ -6400,11 +6410,19 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
return count <= 1; return count <= 1;
} }
static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
enum bpf_arg_type arg_next)
{ {
return (base_type(arg_curr) == ARG_PTR_TO_MEM) != bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
arg_type_is_mem_size(arg_next); bool has_size = fn->arg_size[arg] != 0;
bool is_next_size = false;
if (arg + 1 < ARRAY_SIZE(fn->arg_type))
is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
return is_next_size;
return has_size == is_next_size || is_next_size == is_fixed;
} }
static bool check_arg_pair_ok(const struct bpf_func_proto *fn) static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
...@@ -6415,11 +6433,11 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) ...@@ -6415,11 +6433,11 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
* helper function specification. * helper function specification.
*/ */
if (arg_type_is_mem_size(fn->arg1_type) || if (arg_type_is_mem_size(fn->arg1_type) ||
base_type(fn->arg5_type) == ARG_PTR_TO_MEM || check_args_pair_invalid(fn, 0) ||
check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || check_args_pair_invalid(fn, 1) ||
check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || check_args_pair_invalid(fn, 2) ||
check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || check_args_pair_invalid(fn, 3) ||
check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) check_args_pair_invalid(fn, 4))
return false; return false;
return true; return true;
...@@ -6460,7 +6478,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) ...@@ -6460,7 +6478,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false; return false;
if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
/* arg_btf_id and arg_size are in a union. */
(base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
!(fn->arg_type[i] & MEM_FIXED_SIZE)))
return false; return false;
} }
......
...@@ -7444,6 +7444,114 @@ static const struct bpf_func_proto bpf_skb_set_tstamp_proto = { ...@@ -7444,6 +7444,114 @@ static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
#ifdef CONFIG_SYN_COOKIES
BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph,
struct tcphdr *, th, u32, th_len)
{
u32 cookie;
u16 mss;
if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
return -EINVAL;
mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT;
cookie = __cookie_v4_init_sequence(iph, th, &mss);
return cookie | ((u64)mss << 32);
}
static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
.func = bpf_tcp_raw_gen_syncookie_ipv4,
.gpl_only = true, /* __cookie_v4_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct iphdr),
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};
BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
struct tcphdr *, th, u32, th_len)
{
#if IS_BUILTIN(CONFIG_IPV6)
const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
sizeof(struct ipv6hdr);
u32 cookie;
u16 mss;
if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
return -EINVAL;
mss = tcp_parse_mss_option(th, 0) ?: mss_clamp;
cookie = __cookie_v6_init_sequence(iph, th, &mss);
return cookie | ((u64)mss << 32);
#else
return -EPROTONOSUPPORT;
#endif
}
static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
.func = bpf_tcp_raw_gen_syncookie_ipv6,
.gpl_only = true, /* __cookie_v6_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct ipv6hdr),
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};
BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
struct tcphdr *, th)
{
u32 cookie = ntohl(th->ack_seq) - 1;
if (__cookie_v4_check(iph, th, cookie) > 0)
return 0;
return -EACCES;
}
static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
.func = bpf_tcp_raw_check_syncookie_ipv4,
.gpl_only = true, /* __cookie_v4_check is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct iphdr),
.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg2_size = sizeof(struct tcphdr),
};
BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
struct tcphdr *, th)
{
#if IS_BUILTIN(CONFIG_IPV6)
u32 cookie = ntohl(th->ack_seq) - 1;
if (__cookie_v6_check(iph, th, cookie) > 0)
return 0;
return -EACCES;
#else
return -EPROTONOSUPPORT;
#endif
}
static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
.func = bpf_tcp_raw_check_syncookie_ipv6,
.gpl_only = true, /* __cookie_v6_check is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct ipv6hdr),
.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg2_size = sizeof(struct tcphdr),
};
#endif /* CONFIG_SYN_COOKIES */
#endif /* CONFIG_INET */ #endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func) bool bpf_helper_changes_pkt_data(void *func)
...@@ -7807,6 +7915,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -7807,6 +7915,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_assign_proto; return &bpf_sk_assign_proto;
case BPF_FUNC_skb_set_tstamp: case BPF_FUNC_skb_set_tstamp:
return &bpf_skb_set_tstamp_proto; return &bpf_skb_set_tstamp_proto;
#ifdef CONFIG_SYN_COOKIES
case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
return &bpf_tcp_raw_check_syncookie_ipv4_proto;
case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
return &bpf_tcp_raw_check_syncookie_ipv6_proto;
#endif
#endif #endif
default: default:
return bpf_sk_base_func_proto(func_id); return bpf_sk_base_func_proto(func_id);
...@@ -7856,6 +7974,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -7856,6 +7974,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_check_syncookie_proto; return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_tcp_gen_syncookie: case BPF_FUNC_tcp_gen_syncookie:
return &bpf_tcp_gen_syncookie_proto; return &bpf_tcp_gen_syncookie_proto;
#ifdef CONFIG_SYN_COOKIES
case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
return &bpf_tcp_raw_check_syncookie_ipv4_proto;
case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
return &bpf_tcp_raw_check_syncookie_ipv6_proto;
#endif
#endif #endif
default: default:
return bpf_sk_base_func_proto(func_id); return bpf_sk_base_func_proto(func_id);
......
...@@ -3967,7 +3967,7 @@ static bool smc_parse_options(const struct tcphdr *th, ...@@ -3967,7 +3967,7 @@ static bool smc_parse_options(const struct tcphdr *th,
/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
* value on success. * value on success.
*/ */
static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
{ {
const unsigned char *ptr = (const unsigned char *)(th + 1); const unsigned char *ptr = (const unsigned char *)(th + 1);
int length = (th->doff * 4) - sizeof(struct tcphdr); int length = (th->doff * 4) - sizeof(struct tcphdr);
...@@ -4006,6 +4006,7 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) ...@@ -4006,6 +4006,7 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
} }
return mss; return mss;
} }
EXPORT_SYMBOL_GPL(tcp_parse_mss_option);
/* Look for tcp options. Normally only called on SYN and SYNACK packets. /* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when * But, this can also be called on packets in the established flow when
......
...@@ -635,6 +635,8 @@ class PrinterHelpers(Printer): ...@@ -635,6 +635,8 @@ class PrinterHelpers(Printer):
'struct bpf_timer', 'struct bpf_timer',
'struct mptcp_sock', 'struct mptcp_sock',
'struct bpf_dynptr', 'struct bpf_dynptr',
'struct iphdr',
'struct ipv6hdr',
] ]
known_types = { known_types = {
'...', '...',
...@@ -686,6 +688,8 @@ class PrinterHelpers(Printer): ...@@ -686,6 +688,8 @@ class PrinterHelpers(Printer):
'struct bpf_timer', 'struct bpf_timer',
'struct mptcp_sock', 'struct mptcp_sock',
'struct bpf_dynptr', 'struct bpf_dynptr',
'struct iphdr',
'struct ipv6hdr',
} }
mapped_types = { mapped_types = {
'u8': '__u8', 'u8': '__u8',
......
...@@ -3597,10 +3597,11 @@ union bpf_attr { ...@@ -3597,10 +3597,11 @@ union bpf_attr {
* *
* *iph* points to the start of the IPv4 or IPv6 header, while * *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or * *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**). * **sizeof**\ (**struct ipv6hdr**).
* *
* *th* points to the start of the TCP header, while *th_len* * *th* points to the start of the TCP header, while *th_len*
* contains **sizeof**\ (**struct tcphdr**). * contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return * Return
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise. * error otherwise.
...@@ -3783,10 +3784,11 @@ union bpf_attr { ...@@ -3783,10 +3784,11 @@ union bpf_attr {
* *
* *iph* points to the start of the IPv4 or IPv6 header, while * *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or * *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**). * **sizeof**\ (**struct ipv6hdr**).
* *
* *th* points to the start of the TCP header, while *th_len* * *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header. * contains the length of the TCP header with options (at least
* **sizeof**\ (**struct tcphdr**)).
* Return * Return
* On success, lower 32 bits hold the generated SYN cookie in * On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie, * followed by 16 bits which hold the MSS value for that cookie,
...@@ -5249,6 +5251,80 @@ union bpf_attr { ...@@ -5249,6 +5251,80 @@ union bpf_attr {
* Pointer to the underlying dynptr data, NULL if the dynptr is * Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length * read-only, if the dynptr is invalid, or if the offset and length
* is out of bounds. * is out of bounds.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IPv4/TCP headers, *iph* and *th*, without depending on a
* listening socket.
*
* *iph* points to the IPv4 header.
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if *th_len* is invalid.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IPv6/TCP headers, *iph* and *th*, without depending on a
* listening socket.
*
* *iph* points to the IPv6 header.
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if *th_len* is invalid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*
* long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK
* without depending on a listening socket.
*
* *iph* points to the IPv4 header.
*
* *th* points to the TCP header.
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK.
*
* On failure, the returned value is one of the following:
*
* **-EACCES** if the SYN cookie is not valid.
*
* long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK
* without depending on a listening socket.
*
* *iph* points to the IPv6 header.
*
* *th* points to the TCP header.
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK.
*
* On failure, the returned value is one of the following:
*
* **-EACCES** if the SYN cookie is not valid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -5455,6 +5531,10 @@ union bpf_attr { ...@@ -5455,6 +5531,10 @@ union bpf_attr {
FN(dynptr_read), \ FN(dynptr_read), \
FN(dynptr_write), \ FN(dynptr_write), \
FN(dynptr_data), \ FN(dynptr_data), \
FN(tcp_raw_gen_syncookie_ipv4), \
FN(tcp_raw_gen_syncookie_ipv6), \
FN(tcp_raw_check_syncookie_ipv4), \
FN(tcp_raw_check_syncookie_ipv6), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
...@@ -43,3 +43,4 @@ test_cpp ...@@ -43,3 +43,4 @@ test_cpp
*.tmp *.tmp
xdpxceiver xdpxceiver
xdp_redirect_multi xdp_redirect_multi
xdp_synproxy
...@@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ ...@@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
xdpxceiver xdp_redirect_multi xdpxceiver xdp_redirect_multi xdp_synproxy
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
...@@ -504,6 +504,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ ...@@ -504,6 +504,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
cap_helpers.c cap_helpers.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(OUTPUT)/liburandom_read.so \ $(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
ima_setup.sh \ ima_setup.sh \
$(wildcard progs/btf_dump_test_case_*.c) $(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
......
// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#define _GNU_SOURCE
#include <test_progs.h>
#include <network_helpers.h>
#include <ctype.h>
#define CMD_OUT_BUF_SIZE 1023
#define SYS(cmd) ({ \
if (!ASSERT_OK(system(cmd), (cmd))) \
goto out; \
})
#define SYS_OUT(cmd, ...) ({ \
char buf[1024]; \
snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \
FILE *f = popen(buf, "r"); \
if (!ASSERT_OK_PTR(f, buf)) \
goto out; \
f; \
})
/* out must be at least `size * 4 + 1` bytes long */
static void escape_str(char *out, const char *in, size_t size)
{
static const char *hex = "0123456789ABCDEF";
size_t i;
for (i = 0; i < size; i++) {
if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') {
*out++ = in[i];
} else {
*out++ = '\\';
*out++ = 'x';
*out++ = hex[(in[i] >> 4) & 0xf];
*out++ = hex[in[i] & 0xf];
}
}
*out++ = '\0';
}
static bool expect_str(char *buf, size_t size, const char *str, const char *name)
{
static char escbuf_expected[CMD_OUT_BUF_SIZE * 4];
static char escbuf_actual[CMD_OUT_BUF_SIZE * 4];
static int duration = 0;
bool ok;
ok = size == strlen(str) && !memcmp(buf, str, size);
if (!ok) {
escape_str(escbuf_expected, str, strlen(str));
escape_str(escbuf_actual, buf, size);
}
CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n",
name, escbuf_actual, escbuf_expected);
return ok;
}
static void test_synproxy(bool xdp)
{
int server_fd = -1, client_fd = -1, accept_fd = -1;
char *prog_id, *prog_id_end;
struct nstoken *ns = NULL;
FILE *ctrl_file = NULL;
char buf[CMD_OUT_BUF_SIZE];
size_t size;
SYS("ip netns add synproxy");
SYS("ip link add tmp0 type veth peer name tmp1");
SYS("ip link set tmp1 netns synproxy");
SYS("ip link set tmp0 up");
SYS("ip addr replace 198.18.0.1/24 dev tmp0");
/* When checksum offload is enabled, the XDP program sees wrong
* checksums and drops packets.
*/
SYS("ethtool -K tmp0 tx off");
if (xdp)
/* Workaround required for veth. */
SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null");
ns = open_netns("synproxy");
if (!ASSERT_OK_PTR(ns, "setns"))
goto out;
SYS("ip link set lo up");
SYS("ip link set tmp1 up");
SYS("ip addr replace 198.18.0.2/24 dev tmp1");
SYS("sysctl -w net.ipv4.tcp_syncookies=2");
SYS("sysctl -w net.ipv4.tcp_timestamps=1");
SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
SYS("iptables -t raw -I PREROUTING \
-i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
SYS("iptables -t filter -A INPUT \
-i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
-j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
SYS("iptables -t filter -A INPUT \
-i tmp1 -m state --state INVALID -j DROP");
ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
--single --mss4 1460 --mss6 1440 \
--wscale 7 --ttl 64%s", xdp ? "" : " --tc");
size = fread(buf, 1, sizeof(buf), ctrl_file);
pclose(ctrl_file);
if (!expect_str(buf, size, "Total SYNACKs generated: 0\n",
"initial SYNACKs"))
goto out;
if (!xdp) {
ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress");
size = fread(buf, 1, sizeof(buf), ctrl_file);
pclose(ctrl_file);
prog_id = memmem(buf, size, " id ", 4);
if (!ASSERT_OK_PTR(prog_id, "find prog id"))
goto out;
prog_id += 4;
if (!ASSERT_LT(prog_id, buf + size, "find prog id begin"))
goto out;
prog_id_end = prog_id;
while (prog_id_end < buf + size && *prog_id_end >= '0' &&
*prog_id_end <= '9')
prog_id_end++;
if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end"))
goto out;
*prog_id_end = '\0';
}
server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0);
if (!ASSERT_GE(server_fd, 0, "start_server"))
goto out;
close_netns(ns);
ns = NULL;
client_fd = connect_to_fd(server_fd, 10000);
if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
goto out;
accept_fd = accept(server_fd, NULL, NULL);
if (!ASSERT_GE(accept_fd, 0, "accept"))
goto out;
ns = open_netns("synproxy");
if (!ASSERT_OK_PTR(ns, "setns"))
goto out;
if (xdp)
ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single");
else
ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single",
prog_id);
size = fread(buf, 1, sizeof(buf), ctrl_file);
pclose(ctrl_file);
if (!expect_str(buf, size, "Total SYNACKs generated: 1\n",
"SYNACKs after connection"))
goto out;
out:
if (accept_fd >= 0)
close(accept_fd);
if (client_fd >= 0)
close(client_fd);
if (server_fd >= 0)
close(server_fd);
if (ns)
close_netns(ns);
system("ip link del tmp0");
system("ip netns del synproxy");
}
void test_xdp_synproxy(void)
{
if (test__start_subtest("xdp"))
test_synproxy(true);
if (test__start_subtest("tc"))
test_synproxy(false);
}
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment