Commit 4429bdc4 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'New BPF helpers to accelerate synproxy'

Maxim Mikityanskiy says:

====================

The first patch of this series is a documentation fix.

The second patch allows BPF helpers to accept memory regions of fixed
size without doing runtime size checks.

The two next patches add new functionality that allows XDP to
accelerate iptables synproxy.

v1 of this series [1] used to include a patch that exposed conntrack
lookup to BPF using stable helpers. It was superseded by series [2] by
Kumar Kartikeya Dwivedi, which implements this functionality using
unstable helpers.

The third patch adds new helpers to issue and check SYN cookies without
binding to a socket, which is useful in the synproxy scenario.

The fourth patch adds a selftest, which includes an XDP program and a
userspace control application. The XDP program uses socketless SYN
cookie helpers and queries conntrack status instead of socket status.
The userspace control application allows to tune parameters of the XDP
program. This program also serves as a minimal example of usage of the
new functionality.

The last two patches expose the new helpers to TC BPF and extend the
selftest.

The draft of the new functionality was presented on Netdev 0x15 [3].

v2 changes:

Split into two series, submitted bugfixes to bpf, dropped the conntrack
patches, implemented the timestamp cookie in BPF using bpf_loop, dropped
the timestamp cookie patch.

v3 changes:

Moved some patches from bpf to bpf-next, dropped the patch that changed
error codes, split the new helpers into IPv4/IPv6, added verifier
functionality to accept memory regions of fixed size.

v4 changes:

Converted the selftest to the test_progs runner. Replaced some
deprecated functions in xdp_synproxy userspace helper.

v5 changes:

Fixed a bug in the selftest. Added questionable functionality to support
new helpers in TC BPF, added selftests for it.

v6 changes:

Wrap the new helpers themselves into #ifdef CONFIG_SYN_COOKIES, replaced
fclose with pclose and fixed the MSS for IPv6 in the selftest.

v7 changes:

Fixed the off-by-one error in indices, changed the section name to
"xdp", added missing kernel config options to vmtest in CI.

v8 changes:

Properly rebased, dropped the first patch (the same change was applied
by someone else), updated the cover letter.

v9 changes:

Fixed selftests for no_alu32.

v10 changes:

Selftests for s390x were blacklisted due to lack of support of kfunc,
rebased the series, split selftests to separate commits, created
ARG_PTR_TO_FIXED_SIZE_MEM and packed arg_size, addressed the rest of
comments.

[1]: https://lore.kernel.org/bpf/20211020095815.GJ28644@breakpoint.cc/t/
[2]: https://lore.kernel.org/bpf/20220114163953.1455836-1-memxor@gmail.com/
[3]: https://netdevconf.info/0x15/session.html?Accelerating-synproxy-with-XDP
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 88bf1858 784d5dc0
...@@ -401,6 +401,9 @@ enum bpf_type_flag { ...@@ -401,6 +401,9 @@ enum bpf_type_flag {
/* DYNPTR points to a ringbuf record. */ /* DYNPTR points to a ringbuf record. */
DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS),
/* Size is known at compile time. */
MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX, __BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
}; };
...@@ -464,6 +467,8 @@ enum bpf_arg_type { ...@@ -464,6 +467,8 @@ enum bpf_arg_type {
* all bytes or clear them in error case. * all bytes or clear them in error case.
*/ */
ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM,
/* Pointer to valid memory of size known at compile time. */
ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM,
/* This must be the last entry. Its purpose is to ensure the enum is /* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag. * wide enough to hold the higher bits reserved for bpf_type_flag.
...@@ -529,6 +534,14 @@ struct bpf_func_proto { ...@@ -529,6 +534,14 @@ struct bpf_func_proto {
u32 *arg5_btf_id; u32 *arg5_btf_id;
}; };
u32 *arg_btf_id[5]; u32 *arg_btf_id[5];
struct {
size_t arg1_size;
size_t arg2_size;
size_t arg3_size;
size_t arg4_size;
size_t arg5_size;
};
size_t arg_size[5];
}; };
int *ret_btf_id; /* return value btf_id */ int *ret_btf_id; /* return value btf_id */
bool (*allowed)(const struct bpf_prog *prog); bool (*allowed)(const struct bpf_prog *prog);
......
...@@ -432,6 +432,7 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, ...@@ -432,6 +432,7 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
struct tcphdr *th, u32 *cookie); struct tcphdr *th, u32 *cookie);
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
struct tcphdr *th, u32 *cookie); struct tcphdr *th, u32 *cookie);
u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss);
u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops, const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct tcphdr *th); struct sock *sk, struct tcphdr *th);
......
...@@ -3597,10 +3597,11 @@ union bpf_attr { ...@@ -3597,10 +3597,11 @@ union bpf_attr {
* *
* *iph* points to the start of the IPv4 or IPv6 header, while * *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or * *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**). * **sizeof**\ (**struct ipv6hdr**).
* *
* *th* points to the start of the TCP header, while *th_len* * *th* points to the start of the TCP header, while *th_len*
* contains **sizeof**\ (**struct tcphdr**). * contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return * Return
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise. * error otherwise.
...@@ -3783,10 +3784,11 @@ union bpf_attr { ...@@ -3783,10 +3784,11 @@ union bpf_attr {
* *
* *iph* points to the start of the IPv4 or IPv6 header, while * *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or * *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**). * **sizeof**\ (**struct ipv6hdr**).
* *
* *th* points to the start of the TCP header, while *th_len* * *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header. * contains the length of the TCP header with options (at least
* **sizeof**\ (**struct tcphdr**)).
* Return * Return
* On success, lower 32 bits hold the generated SYN cookie in * On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie, * followed by 16 bits which hold the MSS value for that cookie,
...@@ -5249,6 +5251,80 @@ union bpf_attr { ...@@ -5249,6 +5251,80 @@ union bpf_attr {
* Pointer to the underlying dynptr data, NULL if the dynptr is * Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length * read-only, if the dynptr is invalid, or if the offset and length
* is out of bounds. * is out of bounds.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IPv4/TCP headers, *iph* and *th*, without depending on a
* listening socket.
*
* *iph* points to the IPv4 header.
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if *th_len* is invalid.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IPv6/TCP headers, *iph* and *th*, without depending on a
* listening socket.
*
* *iph* points to the IPv6 header.
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if *th_len* is invalid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*
* long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK
* without depending on a listening socket.
*
* *iph* points to the IPv4 header.
*
* *th* points to the TCP header.
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK.
*
* On failure, the returned value is one of the following:
*
* **-EACCES** if the SYN cookie is not valid.
*
* long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK
* without depending on a listening socket.
*
* *iph* points to the IPv6 header.
*
* *th* points to the TCP header.
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK.
*
* On failure, the returned value is one of the following:
*
* **-EACCES** if the SYN cookie is not valid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -5455,6 +5531,10 @@ union bpf_attr { ...@@ -5455,6 +5531,10 @@ union bpf_attr {
FN(dynptr_read), \ FN(dynptr_read), \
FN(dynptr_write), \ FN(dynptr_write), \
FN(dynptr_data), \ FN(dynptr_data), \
FN(tcp_raw_gen_syncookie_ipv4), \
FN(tcp_raw_gen_syncookie_ipv6), \
FN(tcp_raw_check_syncookie_ipv4), \
FN(tcp_raw_check_syncookie_ipv6), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
...@@ -5848,6 +5848,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, ...@@ -5848,6 +5848,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno]; struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_arg_type arg_type = fn->arg_type[arg]; enum bpf_arg_type arg_type = fn->arg_type[arg];
enum bpf_reg_type type = reg->type; enum bpf_reg_type type = reg->type;
u32 *arg_btf_id = NULL;
int err = 0; int err = 0;
if (arg_type == ARG_DONTCARE) if (arg_type == ARG_DONTCARE)
...@@ -5884,7 +5885,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, ...@@ -5884,7 +5885,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
*/ */
goto skip_type_check; goto skip_type_check;
err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta); /* arg_btf_id and arg_size are in a union. */
if (base_type(arg_type) == ARG_PTR_TO_BTF_ID)
arg_btf_id = fn->arg_btf_id[arg];
err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
if (err) if (err)
return err; return err;
...@@ -6011,6 +6016,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, ...@@ -6011,6 +6016,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
* next is_mem_size argument below. * next is_mem_size argument below.
*/ */
meta->raw_mode = arg_type & MEM_UNINIT; meta->raw_mode = arg_type & MEM_UNINIT;
if (arg_type & MEM_FIXED_SIZE) {
err = check_helper_mem_access(env, regno,
fn->arg_size[arg], false,
meta);
}
} else if (arg_type_is_mem_size(arg_type)) { } else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
...@@ -6400,11 +6410,19 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn) ...@@ -6400,11 +6410,19 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
return count <= 1; return count <= 1;
} }
static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
enum bpf_arg_type arg_next)
{ {
return (base_type(arg_curr) == ARG_PTR_TO_MEM) != bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
arg_type_is_mem_size(arg_next); bool has_size = fn->arg_size[arg] != 0;
bool is_next_size = false;
if (arg + 1 < ARRAY_SIZE(fn->arg_type))
is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
return is_next_size;
return has_size == is_next_size || is_next_size == is_fixed;
} }
static bool check_arg_pair_ok(const struct bpf_func_proto *fn) static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
...@@ -6415,11 +6433,11 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) ...@@ -6415,11 +6433,11 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
* helper function specification. * helper function specification.
*/ */
if (arg_type_is_mem_size(fn->arg1_type) || if (arg_type_is_mem_size(fn->arg1_type) ||
base_type(fn->arg5_type) == ARG_PTR_TO_MEM || check_args_pair_invalid(fn, 0) ||
check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || check_args_pair_invalid(fn, 1) ||
check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || check_args_pair_invalid(fn, 2) ||
check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || check_args_pair_invalid(fn, 3) ||
check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) check_args_pair_invalid(fn, 4))
return false; return false;
return true; return true;
...@@ -6460,7 +6478,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) ...@@ -6460,7 +6478,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false; return false;
if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
/* arg_btf_id and arg_size are in a union. */
(base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
!(fn->arg_type[i] & MEM_FIXED_SIZE)))
return false; return false;
} }
......
...@@ -7444,6 +7444,114 @@ static const struct bpf_func_proto bpf_skb_set_tstamp_proto = { ...@@ -7444,6 +7444,114 @@ static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
#ifdef CONFIG_SYN_COOKIES
BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph,
struct tcphdr *, th, u32, th_len)
{
u32 cookie;
u16 mss;
if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
return -EINVAL;
mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT;
cookie = __cookie_v4_init_sequence(iph, th, &mss);
return cookie | ((u64)mss << 32);
}
static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
.func = bpf_tcp_raw_gen_syncookie_ipv4,
.gpl_only = true, /* __cookie_v4_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct iphdr),
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};
BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
struct tcphdr *, th, u32, th_len)
{
#if IS_BUILTIN(CONFIG_IPV6)
const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
sizeof(struct ipv6hdr);
u32 cookie;
u16 mss;
if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
return -EINVAL;
mss = tcp_parse_mss_option(th, 0) ?: mss_clamp;
cookie = __cookie_v6_init_sequence(iph, th, &mss);
return cookie | ((u64)mss << 32);
#else
return -EPROTONOSUPPORT;
#endif
}
static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
.func = bpf_tcp_raw_gen_syncookie_ipv6,
.gpl_only = true, /* __cookie_v6_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct ipv6hdr),
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};
BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
struct tcphdr *, th)
{
u32 cookie = ntohl(th->ack_seq) - 1;
if (__cookie_v4_check(iph, th, cookie) > 0)
return 0;
return -EACCES;
}
static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
.func = bpf_tcp_raw_check_syncookie_ipv4,
.gpl_only = true, /* __cookie_v4_check is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct iphdr),
.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg2_size = sizeof(struct tcphdr),
};
BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
struct tcphdr *, th)
{
#if IS_BUILTIN(CONFIG_IPV6)
u32 cookie = ntohl(th->ack_seq) - 1;
if (__cookie_v6_check(iph, th, cookie) > 0)
return 0;
return -EACCES;
#else
return -EPROTONOSUPPORT;
#endif
}
static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
.func = bpf_tcp_raw_check_syncookie_ipv6,
.gpl_only = true, /* __cookie_v6_check is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct ipv6hdr),
.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg2_size = sizeof(struct tcphdr),
};
#endif /* CONFIG_SYN_COOKIES */
#endif /* CONFIG_INET */ #endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func) bool bpf_helper_changes_pkt_data(void *func)
...@@ -7807,6 +7915,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -7807,6 +7915,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_assign_proto; return &bpf_sk_assign_proto;
case BPF_FUNC_skb_set_tstamp: case BPF_FUNC_skb_set_tstamp:
return &bpf_skb_set_tstamp_proto; return &bpf_skb_set_tstamp_proto;
#ifdef CONFIG_SYN_COOKIES
case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
return &bpf_tcp_raw_check_syncookie_ipv4_proto;
case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
return &bpf_tcp_raw_check_syncookie_ipv6_proto;
#endif
#endif #endif
default: default:
return bpf_sk_base_func_proto(func_id); return bpf_sk_base_func_proto(func_id);
...@@ -7856,6 +7974,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -7856,6 +7974,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_check_syncookie_proto; return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_tcp_gen_syncookie: case BPF_FUNC_tcp_gen_syncookie:
return &bpf_tcp_gen_syncookie_proto; return &bpf_tcp_gen_syncookie_proto;
#ifdef CONFIG_SYN_COOKIES
case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
return &bpf_tcp_raw_check_syncookie_ipv4_proto;
case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
return &bpf_tcp_raw_check_syncookie_ipv6_proto;
#endif
#endif #endif
default: default:
return bpf_sk_base_func_proto(func_id); return bpf_sk_base_func_proto(func_id);
......
...@@ -3967,7 +3967,7 @@ static bool smc_parse_options(const struct tcphdr *th, ...@@ -3967,7 +3967,7 @@ static bool smc_parse_options(const struct tcphdr *th,
/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
* value on success. * value on success.
*/ */
static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
{ {
const unsigned char *ptr = (const unsigned char *)(th + 1); const unsigned char *ptr = (const unsigned char *)(th + 1);
int length = (th->doff * 4) - sizeof(struct tcphdr); int length = (th->doff * 4) - sizeof(struct tcphdr);
...@@ -4006,6 +4006,7 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) ...@@ -4006,6 +4006,7 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
} }
return mss; return mss;
} }
EXPORT_SYMBOL_GPL(tcp_parse_mss_option);
/* Look for tcp options. Normally only called on SYN and SYNACK packets. /* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when * But, this can also be called on packets in the established flow when
......
...@@ -635,6 +635,8 @@ class PrinterHelpers(Printer): ...@@ -635,6 +635,8 @@ class PrinterHelpers(Printer):
'struct bpf_timer', 'struct bpf_timer',
'struct mptcp_sock', 'struct mptcp_sock',
'struct bpf_dynptr', 'struct bpf_dynptr',
'struct iphdr',
'struct ipv6hdr',
] ]
known_types = { known_types = {
'...', '...',
...@@ -686,6 +688,8 @@ class PrinterHelpers(Printer): ...@@ -686,6 +688,8 @@ class PrinterHelpers(Printer):
'struct bpf_timer', 'struct bpf_timer',
'struct mptcp_sock', 'struct mptcp_sock',
'struct bpf_dynptr', 'struct bpf_dynptr',
'struct iphdr',
'struct ipv6hdr',
} }
mapped_types = { mapped_types = {
'u8': '__u8', 'u8': '__u8',
......
...@@ -3597,10 +3597,11 @@ union bpf_attr { ...@@ -3597,10 +3597,11 @@ union bpf_attr {
* *
* *iph* points to the start of the IPv4 or IPv6 header, while * *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or * *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**). * **sizeof**\ (**struct ipv6hdr**).
* *
* *th* points to the start of the TCP header, while *th_len* * *th* points to the start of the TCP header, while *th_len*
* contains **sizeof**\ (**struct tcphdr**). * contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return * Return
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise. * error otherwise.
...@@ -3783,10 +3784,11 @@ union bpf_attr { ...@@ -3783,10 +3784,11 @@ union bpf_attr {
* *
* *iph* points to the start of the IPv4 or IPv6 header, while * *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or * *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**). * **sizeof**\ (**struct ipv6hdr**).
* *
* *th* points to the start of the TCP header, while *th_len* * *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header. * contains the length of the TCP header with options (at least
* **sizeof**\ (**struct tcphdr**)).
* Return * Return
* On success, lower 32 bits hold the generated SYN cookie in * On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie, * followed by 16 bits which hold the MSS value for that cookie,
...@@ -5249,6 +5251,80 @@ union bpf_attr { ...@@ -5249,6 +5251,80 @@ union bpf_attr {
* Pointer to the underlying dynptr data, NULL if the dynptr is * Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length * read-only, if the dynptr is invalid, or if the offset and length
* is out of bounds. * is out of bounds.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IPv4/TCP headers, *iph* and *th*, without depending on a
* listening socket.
*
* *iph* points to the IPv4 header.
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if *th_len* is invalid.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IPv6/TCP headers, *iph* and *th*, without depending on a
* listening socket.
*
* *iph* points to the IPv6 header.
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header (at least
* **sizeof**\ (**struct tcphdr**)).
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if *th_len* is invalid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*
* long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK
* without depending on a listening socket.
*
* *iph* points to the IPv4 header.
*
* *th* points to the TCP header.
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK.
*
* On failure, the returned value is one of the following:
*
* **-EACCES** if the SYN cookie is not valid.
*
* long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK
* without depending on a listening socket.
*
* *iph* points to the IPv6 header.
*
* *th* points to the TCP header.
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK.
*
* On failure, the returned value is one of the following:
*
* **-EACCES** if the SYN cookie is not valid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -5455,6 +5531,10 @@ union bpf_attr { ...@@ -5455,6 +5531,10 @@ union bpf_attr {
FN(dynptr_read), \ FN(dynptr_read), \
FN(dynptr_write), \ FN(dynptr_write), \
FN(dynptr_data), \ FN(dynptr_data), \
FN(tcp_raw_gen_syncookie_ipv4), \
FN(tcp_raw_gen_syncookie_ipv6), \
FN(tcp_raw_check_syncookie_ipv4), \
FN(tcp_raw_check_syncookie_ipv6), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
...@@ -43,3 +43,4 @@ test_cpp ...@@ -43,3 +43,4 @@ test_cpp
*.tmp *.tmp
xdpxceiver xdpxceiver
xdp_redirect_multi xdp_redirect_multi
xdp_synproxy
...@@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ ...@@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
xdpxceiver xdp_redirect_multi xdpxceiver xdp_redirect_multi xdp_synproxy
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
...@@ -504,6 +504,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ ...@@ -504,6 +504,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
cap_helpers.c cap_helpers.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(OUTPUT)/liburandom_read.so \ $(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
ima_setup.sh \ ima_setup.sh \
$(wildcard progs/btf_dump_test_case_*.c) $(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
......
// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#define _GNU_SOURCE
#include <test_progs.h>
#include <network_helpers.h>
#include <ctype.h>
#define CMD_OUT_BUF_SIZE 1023
#define SYS(cmd) ({ \
if (!ASSERT_OK(system(cmd), (cmd))) \
goto out; \
})
#define SYS_OUT(cmd, ...) ({ \
char buf[1024]; \
snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \
FILE *f = popen(buf, "r"); \
if (!ASSERT_OK_PTR(f, buf)) \
goto out; \
f; \
})
/* out must be at least `size * 4 + 1` bytes long */
static void escape_str(char *out, const char *in, size_t size)
{
static const char *hex = "0123456789ABCDEF";
size_t i;
for (i = 0; i < size; i++) {
if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') {
*out++ = in[i];
} else {
*out++ = '\\';
*out++ = 'x';
*out++ = hex[(in[i] >> 4) & 0xf];
*out++ = hex[in[i] & 0xf];
}
}
*out++ = '\0';
}
static bool expect_str(char *buf, size_t size, const char *str, const char *name)
{
static char escbuf_expected[CMD_OUT_BUF_SIZE * 4];
static char escbuf_actual[CMD_OUT_BUF_SIZE * 4];
static int duration = 0;
bool ok;
ok = size == strlen(str) && !memcmp(buf, str, size);
if (!ok) {
escape_str(escbuf_expected, str, strlen(str));
escape_str(escbuf_actual, buf, size);
}
CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n",
name, escbuf_actual, escbuf_expected);
return ok;
}
static void test_synproxy(bool xdp)
{
int server_fd = -1, client_fd = -1, accept_fd = -1;
char *prog_id, *prog_id_end;
struct nstoken *ns = NULL;
FILE *ctrl_file = NULL;
char buf[CMD_OUT_BUF_SIZE];
size_t size;
SYS("ip netns add synproxy");
SYS("ip link add tmp0 type veth peer name tmp1");
SYS("ip link set tmp1 netns synproxy");
SYS("ip link set tmp0 up");
SYS("ip addr replace 198.18.0.1/24 dev tmp0");
/* When checksum offload is enabled, the XDP program sees wrong
* checksums and drops packets.
*/
SYS("ethtool -K tmp0 tx off");
if (xdp)
/* Workaround required for veth. */
SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null");
ns = open_netns("synproxy");
if (!ASSERT_OK_PTR(ns, "setns"))
goto out;
SYS("ip link set lo up");
SYS("ip link set tmp1 up");
SYS("ip addr replace 198.18.0.2/24 dev tmp1");
SYS("sysctl -w net.ipv4.tcp_syncookies=2");
SYS("sysctl -w net.ipv4.tcp_timestamps=1");
SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
SYS("iptables -t raw -I PREROUTING \
-i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
SYS("iptables -t filter -A INPUT \
-i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
-j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
SYS("iptables -t filter -A INPUT \
-i tmp1 -m state --state INVALID -j DROP");
ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
--single --mss4 1460 --mss6 1440 \
--wscale 7 --ttl 64%s", xdp ? "" : " --tc");
size = fread(buf, 1, sizeof(buf), ctrl_file);
pclose(ctrl_file);
if (!expect_str(buf, size, "Total SYNACKs generated: 0\n",
"initial SYNACKs"))
goto out;
if (!xdp) {
ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress");
size = fread(buf, 1, sizeof(buf), ctrl_file);
pclose(ctrl_file);
prog_id = memmem(buf, size, " id ", 4);
if (!ASSERT_OK_PTR(prog_id, "find prog id"))
goto out;
prog_id += 4;
if (!ASSERT_LT(prog_id, buf + size, "find prog id begin"))
goto out;
prog_id_end = prog_id;
while (prog_id_end < buf + size && *prog_id_end >= '0' &&
*prog_id_end <= '9')
prog_id_end++;
if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end"))
goto out;
*prog_id_end = '\0';
}
server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0);
if (!ASSERT_GE(server_fd, 0, "start_server"))
goto out;
close_netns(ns);
ns = NULL;
client_fd = connect_to_fd(server_fd, 10000);
if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
goto out;
accept_fd = accept(server_fd, NULL, NULL);
if (!ASSERT_GE(accept_fd, 0, "accept"))
goto out;
ns = open_netns("synproxy");
if (!ASSERT_OK_PTR(ns, "setns"))
goto out;
if (xdp)
ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single");
else
ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single",
prog_id);
size = fread(buf, 1, sizeof(buf), ctrl_file);
pclose(ctrl_file);
if (!expect_str(buf, size, "Total SYNACKs generated: 1\n",
"SYNACKs after connection"))
goto out;
out:
if (accept_fd >= 0)
close(accept_fd);
if (client_fd >= 0)
close(client_fd);
if (server_fd >= 0)
close(server_fd);
if (ns)
close_netns(ns);
system("ip link del tmp0");
system("ip netns del synproxy");
}
void test_xdp_synproxy(void)
{
if (test__start_subtest("xdp"))
test_synproxy(true);
if (test__start_subtest("tc"))
test_synproxy(false);
}
// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <asm/errno.h>
#define TC_ACT_OK 0
#define TC_ACT_SHOT 2
#define NSEC_PER_SEC 1000000000L
#define ETH_ALEN 6
#define ETH_P_IP 0x0800
#define ETH_P_IPV6 0x86DD
#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3])
#define IP_DF 0x4000
#define IP_MF 0x2000
#define IP_OFFSET 0x1fff
#define NEXTHDR_TCP 6
#define TCPOPT_NOP 1
#define TCPOPT_EOL 0
#define TCPOPT_MSS 2
#define TCPOPT_WINDOW 3
#define TCPOPT_SACK_PERM 4
#define TCPOPT_TIMESTAMP 8
#define TCPOLEN_MSS 4
#define TCPOLEN_WINDOW 3
#define TCPOLEN_SACK_PERM 2
#define TCPOLEN_TIMESTAMP 10
#define TCP_TS_HZ 1000
#define TS_OPT_WSCALE_MASK 0xf
#define TS_OPT_SACK (1 << 4)
#define TS_OPT_ECN (1 << 5)
#define TSBITS 6
#define TSMASK (((__u32)1 << TSBITS) - 1)
#define TCP_MAX_WSCALE 14U
#define IPV4_MAXLEN 60
#define TCP_MAXLEN 60
#define DEFAULT_MSS4 1460
#define DEFAULT_MSS6 1440
#define DEFAULT_WSCALE 7
#define DEFAULT_TTL 64
#define MAX_ALLOWED_PORTS 8
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
#define __get_unaligned_t(type, ptr) ({ \
const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \
__pptr->x; \
})
#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, __u32);
__type(value, __u64);
__uint(max_entries, 2);
} values SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, __u32);
__type(value, __u16);
__uint(max_entries, MAX_ALLOWED_PORTS);
} allowed_ports SEC(".maps");
extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx,
struct bpf_sock_tuple *bpf_tuple,
__u32 len_tuple,
struct bpf_ct_opts *opts,
__u32 len_opts) __ksym;
extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx,
struct bpf_sock_tuple *bpf_tuple,
u32 len_tuple,
struct bpf_ct_opts *opts,
u32 len_opts) __ksym;
extern void bpf_ct_release(struct nf_conn *ct) __ksym;
static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
{
__u8 tmp[ETH_ALEN];
__builtin_memcpy(tmp, a, ETH_ALEN);
__builtin_memcpy(a, b, ETH_ALEN);
__builtin_memcpy(b, tmp, ETH_ALEN);
}
static __always_inline __u16 csum_fold(__u32 csum)
{
csum = (csum & 0xffff) + (csum >> 16);
csum = (csum & 0xffff) + (csum >> 16);
return (__u16)~csum;
}
static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
__u32 len, __u8 proto,
__u32 csum)
{
__u64 s = csum;
s += (__u32)saddr;
s += (__u32)daddr;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
s += proto + len;
#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
s += (proto + len) << 8;
#else
#error Unknown endian
#endif
s = (s & 0xffffffff) + (s >> 32);
s = (s & 0xffffffff) + (s >> 32);
return csum_fold((__u32)s);
}
static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __u32 csum)
{
__u64 sum = csum;
int i;
#pragma unroll
for (i = 0; i < 4; i++)
sum += (__u32)saddr->in6_u.u6_addr32[i];
#pragma unroll
for (i = 0; i < 4; i++)
sum += (__u32)daddr->in6_u.u6_addr32[i];
/* Don't combine additions to avoid 32-bit overflow. */
sum += bpf_htonl(len);
sum += bpf_htonl(proto);
sum = (sum & 0xffffffff) + (sum >> 32);
sum = (sum & 0xffffffff) + (sum >> 32);
return csum_fold((__u32)sum);
}
static __always_inline __u64 tcp_clock_ns(void)
{
return bpf_ktime_get_ns();
}
static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
{
return ns / (NSEC_PER_SEC / TCP_TS_HZ);
}
static __always_inline __u32 tcp_time_stamp_raw(void)
{
return tcp_ns_to_ts(tcp_clock_ns());
}
struct tcpopt_context {
__u8 *ptr;
__u8 *end;
void *data_end;
__be32 *tsecr;
__u8 wscale;
bool option_timestamp;
bool option_sack;
};
static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
{
__u8 opcode, opsize;
if (ctx->ptr >= ctx->end)
return 1;
if (ctx->ptr >= ctx->data_end)
return 1;
opcode = ctx->ptr[0];
if (opcode == TCPOPT_EOL)
return 1;
if (opcode == TCPOPT_NOP) {
++ctx->ptr;
return 0;
}
if (ctx->ptr + 1 >= ctx->end)
return 1;
if (ctx->ptr + 1 >= ctx->data_end)
return 1;
opsize = ctx->ptr[1];
if (opsize < 2)
return 1;
if (ctx->ptr + opsize > ctx->end)
return 1;
switch (opcode) {
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
break;
case TCPOPT_TIMESTAMP:
if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
ctx->option_timestamp = true;
/* Client's tsval becomes our tsecr. */
*ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
}
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM)
ctx->option_sack = true;
break;
}
ctx->ptr += opsize;
return 0;
}
static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
{
int i;
for (i = 0; i < 7; i++)
if (tscookie_tcpopt_parse(context))
return 1;
return 0;
}
static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
__u16 tcp_len, __be32 *tsval,
__be32 *tsecr, void *data_end)
{
struct tcpopt_context loop_ctx = {
.ptr = (__u8 *)(tcp_header + 1),
.end = (__u8 *)tcp_header + tcp_len,
.data_end = data_end,
.tsecr = tsecr,
.wscale = TS_OPT_WSCALE_MASK,
.option_timestamp = false,
.option_sack = false,
};
u32 cookie;
bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0);
if (!loop_ctx.option_timestamp)
return false;
cookie = tcp_time_stamp_raw() & ~TSMASK;
cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK;
if (loop_ctx.option_sack)
cookie |= TS_OPT_SACK;
if (tcp_header->ece && tcp_header->cwr)
cookie |= TS_OPT_ECN;
*tsval = bpf_htonl(cookie);
return true;
}
static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale,
__u8 *ttl, bool ipv6)
{
__u32 key = 0;
__u64 *value;
value = bpf_map_lookup_elem(&values, &key);
if (value && *value != 0) {
if (ipv6)
*mss = (*value >> 32) & 0xffff;
else
*mss = *value & 0xffff;
*wscale = (*value >> 16) & 0xf;
*ttl = (*value >> 24) & 0xff;
return;
}
*mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4;
*wscale = DEFAULT_WSCALE;
*ttl = DEFAULT_TTL;
}
static __always_inline void values_inc_synacks(void)
{
__u32 key = 1;
__u32 *value;
value = bpf_map_lookup_elem(&values, &key);
if (value)
__sync_fetch_and_add(value, 1);
}
static __always_inline bool check_port_allowed(__u16 port)
{
__u32 i;
for (i = 0; i < MAX_ALLOWED_PORTS; i++) {
__u32 key = i;
__u16 *value;
value = bpf_map_lookup_elem(&allowed_ports, &key);
if (!value)
break;
/* 0 is a terminator value. Check it first to avoid matching on
* a forbidden port == 0 and returning true.
*/
if (*value == 0)
break;
if (*value == port)
return true;
}
return false;
}
struct header_pointers {
struct ethhdr *eth;
struct iphdr *ipv4;
struct ipv6hdr *ipv6;
struct tcphdr *tcp;
__u16 tcp_len;
};
static __always_inline int tcp_dissect(void *data, void *data_end,
struct header_pointers *hdr)
{
hdr->eth = data;
if (hdr->eth + 1 > data_end)
return XDP_DROP;
switch (bpf_ntohs(hdr->eth->h_proto)) {
case ETH_P_IP:
hdr->ipv6 = NULL;
hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
if (hdr->ipv4 + 1 > data_end)
return XDP_DROP;
if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4))
return XDP_DROP;
if (hdr->ipv4->version != 4)
return XDP_DROP;
if (hdr->ipv4->protocol != IPPROTO_TCP)
return XDP_PASS;
hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
break;
case ETH_P_IPV6:
hdr->ipv4 = NULL;
hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
if (hdr->ipv6 + 1 > data_end)
return XDP_DROP;
if (hdr->ipv6->version != 6)
return XDP_DROP;
/* XXX: Extension headers are not supported and could circumvent
* XDP SYN flood protection.
*/
if (hdr->ipv6->nexthdr != NEXTHDR_TCP)
return XDP_PASS;
hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
break;
default:
/* XXX: VLANs will circumvent XDP SYN flood protection. */
return XDP_PASS;
}
if (hdr->tcp + 1 > data_end)
return XDP_DROP;
hdr->tcp_len = hdr->tcp->doff * 4;
if (hdr->tcp_len < sizeof(*hdr->tcp))
return XDP_DROP;
return XDP_TX;
}
static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp)
{
struct bpf_ct_opts ct_lookup_opts = {
.netns_id = BPF_F_CURRENT_NETNS,
.l4proto = IPPROTO_TCP,
};
struct bpf_sock_tuple tup = {};
struct nf_conn *ct;
__u32 tup_size;
if (hdr->ipv4) {
/* TCP doesn't normally use fragments, and XDP can't reassemble
* them.
*/
if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF))
return XDP_DROP;
tup.ipv4.saddr = hdr->ipv4->saddr;
tup.ipv4.daddr = hdr->ipv4->daddr;
tup.ipv4.sport = hdr->tcp->source;
tup.ipv4.dport = hdr->tcp->dest;
tup_size = sizeof(tup.ipv4);
} else if (hdr->ipv6) {
__builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr));
__builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr));
tup.ipv6.sport = hdr->tcp->source;
tup.ipv6.dport = hdr->tcp->dest;
tup_size = sizeof(tup.ipv6);
} else {
/* The verifier can't track that either ipv4 or ipv6 is not
* NULL.
*/
return XDP_ABORTED;
}
if (xdp)
ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
else
ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
if (ct) {
unsigned long status = ct->status;
bpf_ct_release(ct);
if (status & IPS_CONFIRMED_BIT)
return XDP_PASS;
} else if (ct_lookup_opts.error != -ENOENT) {
return XDP_ABORTED;
}
/* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */
return XDP_TX;
}
static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss,
__u8 wscale)
{
__be32 *start = buf;
*buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
if (!tsopt)
return buf - start;
if (tsopt[0] & bpf_htonl(1 << 4))
*buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) |
(TCPOLEN_SACK_PERM << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
else
*buf++ = bpf_htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
*buf++ = tsopt[0];
*buf++ = tsopt[1];
if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf))
*buf++ = bpf_htonl((TCPOPT_NOP << 24) |
(TCPOPT_WINDOW << 16) |
(TCPOLEN_WINDOW << 8) |
wscale);
return buf - start;
}
static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header,
__u32 cookie, __be32 *tsopt,
__u16 mss, __u8 wscale)
{
void *tcp_options;
tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK;
if (tsopt && (tsopt[0] & bpf_htonl(1 << 5)))
tcp_flag_word(tcp_header) |= TCP_FLAG_ECE;
tcp_header->doff = 5; /* doff is part of tcp_flag_word. */
swap(tcp_header->source, tcp_header->dest);
tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1);
tcp_header->seq = bpf_htonl(cookie);
tcp_header->window = 0;
tcp_header->urg_ptr = 0;
tcp_header->check = 0; /* Calculate checksum later. */
tcp_options = (void *)(tcp_header + 1);
tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale);
}
static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr,
__u32 cookie, __be32 *tsopt)
{
__u8 wscale;
__u16 mss;
__u8 ttl;
values_get_tcpipopts(&mss, &wscale, &ttl, false);
swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
swap(hdr->ipv4->saddr, hdr->ipv4->daddr);
hdr->ipv4->check = 0; /* Calculate checksum later. */
hdr->ipv4->tos = 0;
hdr->ipv4->id = 0;
hdr->ipv4->ttl = ttl;
tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
hdr->tcp_len = hdr->tcp->doff * 4;
hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len);
}
static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr,
__u32 cookie, __be32 *tsopt)
{
__u8 wscale;
__u16 mss;
__u8 ttl;
values_get_tcpipopts(&mss, &wscale, &ttl, true);
swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
swap(hdr->ipv6->saddr, hdr->ipv6->daddr);
*(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000);
hdr->ipv6->hop_limit = ttl;
tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
hdr->tcp_len = hdr->tcp->doff * 4;
hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len);
}
static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
void *ctx,
void *data, void *data_end,
bool xdp)
{
__u32 old_pkt_size, new_pkt_size;
/* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the
* BPF verifier if tsopt is not volatile. Volatile forces it to store
* the pointer value and use it directly, otherwise tcp_mkoptions is
* (mis)compiled like this:
* if (!tsopt)
* return buf - start;
* reg = stored_return_value_of_tscookie_init;
* if (reg)
* tsopt = tsopt_buf;
* else
* tsopt = NULL;
* ...
* *buf++ = tsopt[1];
* It creates a dead branch where tsopt is assigned NULL, but the
* verifier can't prove it's dead and blocks the program.
*/
__be32 * volatile tsopt = NULL;
__be32 tsopt_buf[2] = {};
__u16 ip_len;
__u32 cookie;
__s64 value;
/* Checksum is not yet verified, but both checksum failure and TCP
* header checks return XDP_DROP, so the order doesn't matter.
*/
if (hdr->tcp->fin || hdr->tcp->rst)
return XDP_DROP;
/* Issue SYN cookies on allowed ports, drop SYN packets on blocked
* ports.
*/
if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest)))
return XDP_DROP;
if (hdr->ipv4) {
/* Check the IPv4 and TCP checksums before creating a SYNACK. */
value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0);
if (value < 0)
return XDP_ABORTED;
if (csum_fold(value) != 0)
return XDP_DROP; /* Bad IPv4 checksum. */
value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
if (value < 0)
return XDP_ABORTED;
if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr,
hdr->tcp_len, IPPROTO_TCP, value) != 0)
return XDP_DROP; /* Bad TCP checksum. */
ip_len = sizeof(*hdr->ipv4);
value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp,
hdr->tcp_len);
} else if (hdr->ipv6) {
/* Check the TCP checksum before creating a SYNACK. */
value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
if (value < 0)
return XDP_ABORTED;
if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr,
hdr->tcp_len, IPPROTO_TCP, value) != 0)
return XDP_DROP; /* Bad TCP checksum. */
ip_len = sizeof(*hdr->ipv6);
value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp,
hdr->tcp_len);
} else {
return XDP_ABORTED;
}
if (value < 0)
return XDP_ABORTED;
cookie = (__u32)value;
if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
&tsopt_buf[0], &tsopt_buf[1], data_end))
tsopt = tsopt_buf;
/* Check that there is enough space for a SYNACK. It also covers
* the check that the destination of the __builtin_memmove below
* doesn't overflow.
*/
if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end)
return XDP_ABORTED;
if (hdr->ipv4) {
if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) {
struct tcphdr *new_tcp_header;
new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4);
__builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp));
hdr->tcp = new_tcp_header;
hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4;
}
tcpv4_gen_synack(hdr, cookie, tsopt);
} else if (hdr->ipv6) {
tcpv6_gen_synack(hdr, cookie, tsopt);
} else {
return XDP_ABORTED;
}
/* Recalculate checksums. */
hdr->tcp->check = 0;
value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
if (value < 0)
return XDP_ABORTED;
if (hdr->ipv4) {
hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr,
hdr->ipv4->daddr,
hdr->tcp_len,
IPPROTO_TCP,
value);
hdr->ipv4->check = 0;
value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0);
if (value < 0)
return XDP_ABORTED;
hdr->ipv4->check = csum_fold(value);
} else if (hdr->ipv6) {
hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr,
&hdr->ipv6->daddr,
hdr->tcp_len,
IPPROTO_TCP,
value);
} else {
return XDP_ABORTED;
}
/* Set the new packet size. */
old_pkt_size = data_end - data;
new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4;
if (xdp) {
if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size))
return XDP_ABORTED;
} else {
if (bpf_skb_change_tail(ctx, new_pkt_size, 0))
return XDP_ABORTED;
}
values_inc_synacks();
return XDP_TX;
}
static __always_inline int syncookie_handle_ack(struct header_pointers *hdr)
{
int err;
if (hdr->tcp->rst)
return XDP_DROP;
if (hdr->ipv4)
err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp);
else if (hdr->ipv6)
err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp);
else
return XDP_ABORTED;
if (err)
return XDP_DROP;
return XDP_PASS;
}
static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end,
struct header_pointers *hdr, bool xdp)
{
struct bpf_ct_opts ct_lookup_opts = {
.netns_id = BPF_F_CURRENT_NETNS,
.l4proto = IPPROTO_TCP,
};
int ret;
ret = tcp_dissect(data, data_end, hdr);
if (ret != XDP_TX)
return ret;
ret = tcp_lookup(ctx, hdr, xdp);
if (ret != XDP_TX)
return ret;
/* Packet is TCP and doesn't belong to an established connection. */
if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1)
return XDP_DROP;
/* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len
* to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier.
*/
if (xdp) {
if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len))
return XDP_ABORTED;
} else {
/* Without volatile the verifier throws this error:
* R9 32-bit pointer arithmetic prohibited
*/
volatile u64 old_len = data_end - data;
if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0))
return XDP_ABORTED;
}
return XDP_TX;
}
static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end,
struct header_pointers *hdr, bool xdp)
{
if (hdr->ipv4) {
hdr->eth = data;
hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
/* IPV4_MAXLEN is needed when calculating checksum.
* At least sizeof(struct iphdr) is needed here to access ihl.
*/
if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end)
return XDP_ABORTED;
hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
} else if (hdr->ipv6) {
hdr->eth = data;
hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
} else {
return XDP_ABORTED;
}
if ((void *)hdr->tcp + TCP_MAXLEN > data_end)
return XDP_ABORTED;
/* We run out of registers, tcp_len gets spilled to the stack, and the
* verifier forgets its min and max values checked above in tcp_dissect.
*/
hdr->tcp_len = hdr->tcp->doff * 4;
if (hdr->tcp_len < sizeof(*hdr->tcp))
return XDP_ABORTED;
return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) :
syncookie_handle_ack(hdr);
}
SEC("xdp")
int syncookie_xdp(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct header_pointers hdr;
int ret;
ret = syncookie_part1(ctx, data, data_end, &hdr, true);
if (ret != XDP_TX)
return ret;
data_end = (void *)(long)ctx->data_end;
data = (void *)(long)ctx->data;
return syncookie_part2(ctx, data, data_end, &hdr, true);
}
SEC("tc")
int syncookie_tc(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
struct header_pointers hdr;
int ret;
ret = syncookie_part1(skb, data, data_end, &hdr, false);
if (ret != XDP_TX)
return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT;
data_end = (void *)(long)skb->data_end;
data = (void *)(long)skb->data;
ret = syncookie_part2(skb, data, data_end, &hdr, false);
switch (ret) {
case XDP_PASS:
return TC_ACT_OK;
case XDP_TX:
return bpf_redirect(skb->ifindex, 0);
default:
return TC_ACT_SHOT;
}
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#include <stdnoreturn.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <getopt.h>
#include <signal.h>
#include <sys/types.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <net/if.h>
#include <linux/if_link.h>
#include <linux/limits.h>
static unsigned int ifindex;
static __u32 attached_prog_id;
static bool attached_tc;
static void noreturn cleanup(int sig)
{
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
int prog_fd;
int err;
if (attached_prog_id == 0)
exit(0);
if (attached_tc) {
LIBBPF_OPTS(bpf_tc_hook, hook,
.ifindex = ifindex,
.attach_point = BPF_TC_INGRESS);
err = bpf_tc_hook_destroy(&hook);
if (err < 0) {
fprintf(stderr, "Error: bpf_tc_hook_destroy: %s\n", strerror(-err));
fprintf(stderr, "Failed to destroy the TC hook\n");
exit(1);
}
exit(0);
}
prog_fd = bpf_prog_get_fd_by_id(attached_prog_id);
if (prog_fd < 0) {
fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
err = bpf_xdp_attach(ifindex, -1, 0, NULL);
if (err < 0) {
fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err));
fprintf(stderr, "Failed to detach XDP program\n");
exit(1);
}
} else {
opts.old_prog_fd = prog_fd;
err = bpf_xdp_attach(ifindex, -1, XDP_FLAGS_REPLACE, &opts);
close(prog_fd);
if (err < 0) {
fprintf(stderr, "Error: bpf_set_link_xdp_fd_opts: %s\n", strerror(-err));
/* Not an error if already replaced by someone else. */
if (err != -EEXIST) {
fprintf(stderr, "Failed to detach XDP program\n");
exit(1);
}
}
}
exit(0);
}
static noreturn void usage(const char *progname)
{
fprintf(stderr, "Usage: %s [--iface <iface>|--prog <prog_id>] [--mss4 <mss ipv4> --mss6 <mss ipv6> --wscale <wscale> --ttl <ttl>] [--ports <port1>,<port2>,...] [--single] [--tc]\n",
progname);
exit(1);
}
static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit)
{
unsigned long res;
char *endptr;
errno = 0;
res = strtoul(arg, &endptr, 10);
if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit)
usage(progname);
return res;
}
static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id,
__u64 *tcpipopts, char **ports, bool *single, bool *tc)
{
static struct option long_options[] = {
{ "help", no_argument, NULL, 'h' },
{ "iface", required_argument, NULL, 'i' },
{ "prog", required_argument, NULL, 'x' },
{ "mss4", required_argument, NULL, 4 },
{ "mss6", required_argument, NULL, 6 },
{ "wscale", required_argument, NULL, 'w' },
{ "ttl", required_argument, NULL, 't' },
{ "ports", required_argument, NULL, 'p' },
{ "single", no_argument, NULL, 's' },
{ "tc", no_argument, NULL, 'c' },
{ NULL, 0, NULL, 0 },
};
unsigned long mss4, mss6, wscale, ttl;
unsigned int tcpipopts_mask = 0;
if (argc < 2)
usage(argv[0]);
*ifindex = 0;
*prog_id = 0;
*tcpipopts = 0;
*ports = NULL;
*single = false;
while (true) {
int opt;
opt = getopt_long(argc, argv, "", long_options, NULL);
if (opt == -1)
break;
switch (opt) {
case 'h':
usage(argv[0]);
break;
case 'i':
*ifindex = if_nametoindex(optarg);
if (*ifindex == 0)
usage(argv[0]);
break;
case 'x':
*prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX);
if (*prog_id == 0)
usage(argv[0]);
break;
case 4:
mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
tcpipopts_mask |= 1 << 0;
break;
case 6:
mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
tcpipopts_mask |= 1 << 1;
break;
case 'w':
wscale = parse_arg_ul(argv[0], optarg, 14);
tcpipopts_mask |= 1 << 2;
break;
case 't':
ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX);
tcpipopts_mask |= 1 << 3;
break;
case 'p':
*ports = optarg;
break;
case 's':
*single = true;
break;
case 'c':
*tc = true;
break;
default:
usage(argv[0]);
}
}
if (optind < argc)
usage(argv[0]);
if (tcpipopts_mask == 0xf) {
if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0)
usage(argv[0]);
*tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4;
} else if (tcpipopts_mask != 0) {
usage(argv[0]);
}
if (*ifindex != 0 && *prog_id != 0)
usage(argv[0]);
if (*ifindex == 0 && *prog_id == 0)
usage(argv[0]);
}
static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc)
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
char xdp_filename[PATH_MAX];
struct bpf_program *prog;
struct bpf_object *obj;
int prog_fd;
int err;
snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv0);
obj = bpf_object__open_file(xdp_filename, NULL);
err = libbpf_get_error(obj);
if (err < 0) {
fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
return err;
}
err = bpf_object__load(obj);
if (err < 0) {
fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
return err;
}
prog = bpf_object__find_program_by_name(obj, tc ? "syncookie_tc" : "syncookie_xdp");
if (!prog) {
fprintf(stderr, "Error: bpf_object__find_program_by_name: program was not found\n");
return -ENOENT;
}
prog_fd = bpf_program__fd(prog);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err < 0) {
fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
goto out;
}
attached_tc = tc;
attached_prog_id = info.id;
signal(SIGINT, cleanup);
signal(SIGTERM, cleanup);
if (tc) {
LIBBPF_OPTS(bpf_tc_hook, hook,
.ifindex = ifindex,
.attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_tc_opts, opts,
.handle = 1,
.priority = 1,
.prog_fd = prog_fd);
err = bpf_tc_hook_create(&hook);
if (err < 0) {
fprintf(stderr, "Error: bpf_tc_hook_create: %s\n",
strerror(-err));
goto fail;
}
err = bpf_tc_attach(&hook, &opts);
if (err < 0) {
fprintf(stderr, "Error: bpf_tc_attach: %s\n",
strerror(-err));
goto fail;
}
} else {
err = bpf_xdp_attach(ifindex, prog_fd,
XDP_FLAGS_UPDATE_IF_NOEXIST, NULL);
if (err < 0) {
fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n",
strerror(-err));
goto fail;
}
}
err = 0;
out:
bpf_object__close(obj);
return err;
fail:
signal(SIGINT, SIG_DFL);
signal(SIGTERM, SIG_DFL);
attached_prog_id = 0;
goto out;
}
static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd)
{
struct bpf_prog_info prog_info;
__u32 map_ids[8];
__u32 info_len;
int prog_fd;
int err;
int i;
*values_map_fd = -1;
*ports_map_fd = -1;
prog_fd = bpf_prog_get_fd_by_id(prog_id);
if (prog_fd < 0) {
fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
return prog_fd;
}
prog_info = (struct bpf_prog_info) {
.nr_map_ids = 8,
.map_ids = (__u64)map_ids,
};
info_len = sizeof(prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
if (err != 0) {
fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
goto out;
}
if (prog_info.nr_map_ids < 2) {
fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n",
prog_info.nr_map_ids);
err = -ENOENT;
goto out;
}
for (i = 0; i < prog_info.nr_map_ids; i++) {
struct bpf_map_info map_info = {};
int map_fd;
err = bpf_map_get_fd_by_id(map_ids[i]);
if (err < 0) {
fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err));
goto err_close_map_fds;
}
map_fd = err;
info_len = sizeof(map_info);
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
if (err != 0) {
fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
close(map_fd);
goto err_close_map_fds;
}
if (strcmp(map_info.name, "values") == 0) {
*values_map_fd = map_fd;
continue;
}
if (strcmp(map_info.name, "allowed_ports") == 0) {
*ports_map_fd = map_fd;
continue;
}
close(map_fd);
}
if (*values_map_fd != -1 && *ports_map_fd != -1) {
err = 0;
goto out;
}
err = -ENOENT;
err_close_map_fds:
if (*values_map_fd != -1)
close(*values_map_fd);
if (*ports_map_fd != -1)
close(*ports_map_fd);
*values_map_fd = -1;
*ports_map_fd = -1;
out:
close(prog_fd);
return err;
}
int main(int argc, char *argv[])
{
int values_map_fd, ports_map_fd;
__u64 tcpipopts;
bool firstiter;
__u64 prevcnt;
__u32 prog_id;
char *ports;
bool single;
int err = 0;
bool tc;
parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports,
&single, &tc);
if (prog_id == 0) {
if (!tc) {
err = bpf_xdp_query_id(ifindex, 0, &prog_id);
if (err < 0) {
fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n",
strerror(-err));
goto out;
}
}
if (prog_id == 0) {
err = syncookie_attach(argv[0], ifindex, tc);
if (err < 0)
goto out;
prog_id = attached_prog_id;
}
}
err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd);
if (err < 0)
goto out;
if (ports) {
__u16 port_last = 0;
__u32 port_idx = 0;
char *p = ports;
fprintf(stderr, "Replacing allowed ports\n");
while (p && *p != '\0') {
char *token = strsep(&p, ",");
__u16 port;
port = parse_arg_ul(argv[0], token, UINT16_MAX);
err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY);
if (err != 0) {
fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
fprintf(stderr, "Failed to add port %u (index %u)\n",
port, port_idx);
goto out_close_maps;
}
fprintf(stderr, "Added port %u\n", port);
port_idx++;
}
err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY);
if (err != 0) {
fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n",
port_idx);
goto out_close_maps;
}
}
if (tcpipopts) {
__u32 key = 0;
fprintf(stderr, "Replacing TCP/IP options\n");
err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY);
if (err != 0) {
fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
goto out_close_maps;
}
}
if ((ports || tcpipopts) && attached_prog_id == 0 && !single)
goto out_close_maps;
prevcnt = 0;
firstiter = true;
while (true) {
__u32 key = 1;
__u64 value;
err = bpf_map_lookup_elem(values_map_fd, &key, &value);
if (err != 0) {
fprintf(stderr, "Error: bpf_map_lookup_elem: %s\n", strerror(-err));
goto out_close_maps;
}
if (firstiter) {
prevcnt = value;
firstiter = false;
}
if (single) {
printf("Total SYNACKs generated: %llu\n", value);
break;
}
printf("SYNACKs generated: %llu (total %llu)\n", value - prevcnt, value);
prevcnt = value;
sleep(1);
}
out_close_maps:
close(values_map_fd);
close(ports_map_fd);
out:
return err == 0 ? 0 : 1;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment