Commit 116e7dbe authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'gen-syn-cookie'

Petar Penkov says:

====================
This patch series introduces a BPF helper function that allows generating SYN
cookies from BPF. Currently, this helper is enabled at both the TC hook and the
XDP hook.

The first two patches in the series add/modify several TCP helper functions to
allow for SKB-less operation, as is the case at the XDP hook.

The third patch introduces the bpf_tcp_gen_syncookie helper function which
generates a SYN cookie for either XDP or TC programs. The return value of
this function contains both the MSS value, encoded in the cookie, and the
cookie itself.

The last three patches sync tools/ and add a test.

Performance evaluation:
I sent 10Mpps to a fixed port on a host with 2 10G bonded Mellanox 4 NICs from
random IPv6 source addresses. Without XDP I observed 7.2Mpps (syn-acks) being
sent out if the IPv6 packets carry 20 bytes of TCP options or 7.6Mpps if they
carry no options. If I attached a simple program that checks if a packet is
IPv6/TCP/SYN, looks up the socket, issues a cookie, and sends it back out after
swapping src/dest, recomputing the checksum, and setting the ACK flag, I
observed 10Mpps being sent back out.

Changes since v1:
1/ Added performance numbers to the cover letter
2/ Patch 2: Refactored a bit to fix compilation issues
3/ Patch 3: Changed ENOTSUPP to EOPNOTSUPP at Toke's suggestion

Changes since RFC:
1/ Cookie is returned in host order at Alexei's suggestion
2/ If cookies are not enabled via a sysctl, the helper function returns
   -ENOENT instead of -EINVAL at Lorenz's suggestion
3/ Fixed documentation to properly reflect that MSS is 16 bits at
   Lorenz's suggestion
4/ BPF helper requires TCP length to match ->doff field, rather than to simply
   be no more than 20 bytes at Eric and Alexei's suggestion
5/ Packet type is looked up from the packet version field, rather than from the
   socket. v4 packets are rejected on v6-only sockets but should work with
   dual stack listeners at Eric's suggestion
6/ Removed unnecessary `net` argument from helper function in patch 2 at
   Lorenz's suggestion
7/ Changed test to only pass MSS option so we can convince the verifier that the
   memory access is not out of bounds

Note that 7/ below illustrates the verifier might need to be extended to allow
passing a variable tcph->doff to the helper function like below:

__u32 thlen = tcph->doff * 4;
if (thlen < sizeof(*tcph))
	return;
__s64 cookie = bpf_tcp_gen_syncookie(sk, ipv4h, 20, tcph, thlen);
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents d3406913 91bc3578
......@@ -414,6 +414,16 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
int estab, struct tcp_fastopen_cookie *foc);
const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
/*
* BPF SKB-less helpers
*/
u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
struct tcphdr *th, u32 *cookie);
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
struct tcphdr *th, u32 *cookie);
u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct tcphdr *th);
/*
* TCP v4 functions exported for the inet6 API
*/
......
......@@ -2714,6 +2714,33 @@ union bpf_attr {
* **-EPERM** if no permission to send the *sig*.
*
* **-EAGAIN** if bpf program can try again.
*
* s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
*
* *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**).
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header.
*
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** SYN cookie cannot be issued due to error
*
* **-ENOENT** SYN cookie should not be issued (no SYN flood)
*
* **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
*
* **-EPROTONOSUPPORT** IP packet version is not 4 or 6
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -2825,7 +2852,8 @@ union bpf_attr {
FN(strtoul), \
FN(sk_storage_get), \
FN(sk_storage_delete), \
FN(send_signal),
FN(send_signal), \
FN(tcp_gen_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
......@@ -5855,6 +5855,75 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
.arg5_type = ARG_CONST_SIZE,
};
BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
struct tcphdr *, th, u32, th_len)
{
#ifdef CONFIG_SYN_COOKIES
u32 cookie;
u16 mss;
if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
return -EINVAL;
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
return -ENOENT;
if (!th->syn || th->ack || th->fin || th->rst)
return -EINVAL;
if (unlikely(iph_len < sizeof(struct iphdr)))
return -EINVAL;
/* Both struct iphdr and struct ipv6hdr have the version field at the
* same offset so we can cast to the shorter header (struct iphdr).
*/
switch (((struct iphdr *)iph)->version) {
case 4:
if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
return -EINVAL;
mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
break;
#if IS_BUILTIN(CONFIG_IPV6)
case 6:
if (unlikely(iph_len < sizeof(struct ipv6hdr)))
return -EINVAL;
if (sk->sk_family != AF_INET6)
return -EINVAL;
mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
break;
#endif /* CONFIG_IPV6 */
default:
return -EPROTONOSUPPORT;
}
if (mss <= 0)
return -ENOENT;
return cookie | ((u64)mss << 32);
#else
return -EOPNOTSUPP;
#endif /* CONFIG_SYN_COOKIES */
}
static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
.func = bpf_tcp_gen_syncookie,
.gpl_only = true, /* __cookie_v*_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE,
};
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
......@@ -6144,6 +6213,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_skb_ecn_set_ce:
return &bpf_skb_ecn_set_ce_proto;
case BPF_FUNC_tcp_gen_syncookie:
return &bpf_tcp_gen_syncookie_proto;
#endif
default:
return bpf_base_func_proto(func_id);
......@@ -6183,6 +6254,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_skc_lookup_tcp_proto;
case BPF_FUNC_tcp_check_syncookie:
return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_tcp_gen_syncookie:
return &bpf_tcp_gen_syncookie_proto;
#endif
default:
return bpf_base_func_proto(func_id);
......
......@@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th,
#endif
}
/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
* value on success.
*/
static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
{
const unsigned char *ptr = (const unsigned char *)(th + 1);
int length = (th->doff * 4) - sizeof(struct tcphdr);
u16 mss = 0;
while (length > 0) {
int opcode = *ptr++;
int opsize;
switch (opcode) {
case TCPOPT_EOL:
return mss;
case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
length--;
continue;
default:
if (length < 2)
return mss;
opsize = *ptr++;
if (opsize < 2) /* "silly options" */
return mss;
if (opsize > length)
return mss; /* fail on partial options */
if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
u16 in_mss = get_unaligned_be16(ptr);
if (in_mss) {
if (user_mss && user_mss < in_mss)
in_mss = user_mss;
mss = in_mss;
}
}
ptr += opsize - 2;
length -= opsize;
}
}
return mss;
}
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
......@@ -6422,9 +6465,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
/*
* Return true if a syncookie should be sent
*/
static bool tcp_syn_flood_action(const struct sock *sk,
const struct sk_buff *skb,
const char *proto)
static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
......@@ -6444,7 +6485,7 @@ static bool tcp_syn_flood_action(const struct sock *sk,
net->ipv4.sysctl_tcp_syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, ntohs(tcp_hdr(skb)->dest), msg);
proto, sk->sk_num, msg);
return want_cookie;
}
......@@ -6466,6 +6507,36 @@ static void tcp_reqsk_record_syn(const struct sock *sk,
}
}
/* If a SYN cookie is required and supported, returns a clamped MSS value to be
* used for SYN cookie generation.
*/
u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct tcphdr *th)
{
struct tcp_sock *tp = tcp_sk(sk);
u16 mss;
if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
!inet_csk_reqsk_queue_is_full(sk))
return 0;
if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
return 0;
if (sk_acceptq_is_full(sk)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
return 0;
}
mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
if (!mss)
mss = af_ops->mss_clamp;
return mss;
}
EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
......@@ -6487,7 +6558,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
*/
if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
if (!want_cookie)
goto drop;
}
......
......@@ -1515,6 +1515,21 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
return sk;
}
u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
struct tcphdr *th, u32 *cookie)
{
u16 mss = 0;
#ifdef CONFIG_SYN_COOKIES
mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
&tcp_request_sock_ipv4_ops, sk, th);
if (mss) {
*cookie = __cookie_v4_init_sequence(iph, th, &mss);
tcp_synq_overflow(sk);
}
#endif
return mss;
}
/* The socket must have it's spinlock held when we get
* here, unless it is a TCP_LISTEN socket.
*
......
......@@ -1063,6 +1063,21 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
return sk;
}
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
struct tcphdr *th, u32 *cookie)
{
u16 mss = 0;
#ifdef CONFIG_SYN_COOKIES
mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
&tcp_request_sock_ipv6_ops, sk, th);
if (mss) {
*cookie = __cookie_v6_init_sequence(iph, th, &mss);
tcp_synq_overflow(sk);
}
#endif
return mss;
}
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
if (skb->protocol == htons(ETH_P_IP))
......
......@@ -1572,8 +1572,11 @@ union bpf_attr {
* but this is only implemented for native XDP (with driver
* support) as of this writing).
*
* All values for *flags* are reserved for future usage, and must
* be left at zero.
* The lower two bits of *flags* are used as the return code if
* the map lookup fails. This is so that the return value can be
* one of the XDP program return codes up to XDP_TX, as chosen by
* the caller. Any higher bits in the *flags* argument must be
* unset.
*
* When used to redirect packets to net devices, this helper
* provides a high performance increase over **bpf_redirect**\ ().
......@@ -2711,6 +2714,33 @@ union bpf_attr {
* **-EPERM** if no permission to send the *sig*.
*
* **-EAGAIN** if bpf program can try again.
*
* s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
*
* *iph* points to the start of the IPv4 or IPv6 header, while
* *iph_len* contains **sizeof**\ (**struct iphdr**) or
* **sizeof**\ (**struct ip6hdr**).
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header.
*
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
* and the top 16 bits are unused.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** SYN cookie cannot be issued due to error
*
* **-ENOENT** SYN cookie should not be issued (no SYN flood)
*
* **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
*
* **-EPROTONOSUPPORT** IP packet version is not 4 or 6
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -2822,7 +2852,8 @@ union bpf_attr {
FN(strtoul), \
FN(sk_storage_get), \
FN(sk_storage_delete), \
FN(send_signal),
FN(send_signal), \
FN(tcp_gen_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
......@@ -228,6 +228,9 @@ static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
(void *)BPF_FUNC_sk_storage_delete;
static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
static long long (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *ip,
int ip_len, void *tcp, int tcp_len) =
(void *) BPF_FUNC_tcp_gen_syncookie;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
......
......@@ -19,10 +19,29 @@
struct bpf_map_def SEC("maps") results = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.max_entries = 1,
.value_size = sizeof(__u32),
.max_entries = 3,
};
static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
void *iph, __u32 ip_size,
struct tcphdr *tcph)
{
__u32 thlen = tcph->doff * 4;
if (tcph->syn && !tcph->ack) {
// packet should only have an MSS option
if (thlen != 24)
return 0;
if ((void *)tcph + thlen > data_end)
return 0;
return bpf_tcp_gen_syncookie(sk, iph, ip_size, tcph, thlen);
}
return 0;
}
static __always_inline void check_syncookie(void *ctx, void *data,
void *data_end)
{
......@@ -33,8 +52,10 @@ static __always_inline void check_syncookie(void *ctx, void *data,
struct ipv6hdr *ipv6h;
struct tcphdr *tcph;
int ret;
__u32 key_mss = 2;
__u32 key_gen = 1;
__u32 key = 0;
__u64 value = 1;
__s64 seq_mss;
ethh = data;
if (ethh + 1 > data_end)
......@@ -66,6 +87,9 @@ static __always_inline void check_syncookie(void *ctx, void *data,
if (sk->state != BPF_TCP_LISTEN)
goto release;
seq_mss = gen_syncookie(data_end, sk, ipv4h, sizeof(*ipv4h),
tcph);
ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h),
tcph, sizeof(*tcph));
break;
......@@ -95,6 +119,9 @@ static __always_inline void check_syncookie(void *ctx, void *data,
if (sk->state != BPF_TCP_LISTEN)
goto release;
seq_mss = gen_syncookie(data_end, sk, ipv6h, sizeof(*ipv6h),
tcph);
ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h),
tcph, sizeof(*tcph));
break;
......@@ -103,8 +130,19 @@ static __always_inline void check_syncookie(void *ctx, void *data,
return;
}
if (ret == 0)
bpf_map_update_elem(&results, &key, &value, 0);
if (seq_mss > 0) {
__u32 cookie = (__u32)seq_mss;
__u32 mss = seq_mss >> 32;
bpf_map_update_elem(&results, &key_gen, &cookie, 0);
bpf_map_update_elem(&results, &key_mss, &mss, 0);
}
if (ret == 0) {
__u32 cookie = bpf_ntohl(tcph->ack_seq) - 1;
bpf_map_update_elem(&results, &key, &cookie, 0);
}
release:
bpf_sk_release(sk);
......
......@@ -37,6 +37,9 @@ setup()
ns1_exec ip link set lo up
ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
ns1_exec sysctl -w net.ipv4.tcp_window_scaling=0
ns1_exec sysctl -w net.ipv4.tcp_timestamps=0
ns1_exec sysctl -w net.ipv4.tcp_sack=0
wait_for_ip 127.0.0.1
wait_for_ip ::1
......
......@@ -2,6 +2,7 @@
// Copyright (c) 2018 Facebook
// Copyright (c) 2019 Cloudflare
#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
......@@ -77,7 +78,7 @@ static int connect_to_server(int server_fd)
return fd;
}
static int get_map_fd_by_prog_id(int prog_id)
static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
......@@ -104,6 +105,8 @@ static int get_map_fd_by_prog_id(int prog_id)
goto err;
}
*xdp = info.type == BPF_PROG_TYPE_XDP;
map_fd = bpf_map_get_fd_by_id(map_ids[0]);
if (map_fd < 0)
log_err("Failed to get fd by map id %d", map_ids[0]);
......@@ -113,18 +116,32 @@ static int get_map_fd_by_prog_id(int prog_id)
return map_fd;
}
static int run_test(int server_fd, int results_fd)
static int run_test(int server_fd, int results_fd, bool xdp)
{
int client = -1, srv_client = -1;
int ret = 0;
__u32 key = 0;
__u64 value = 0;
__u32 key_gen = 1;
__u32 key_mss = 2;
__u32 value = 0;
__u32 value_gen = 0;
__u32 value_mss = 0;
if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) {
log_err("Can't clear results");
goto err;
}
if (bpf_map_update_elem(results_fd, &key_gen, &value_gen, 0) < 0) {
log_err("Can't clear results");
goto err;
}
if (bpf_map_update_elem(results_fd, &key_mss, &value_mss, 0) < 0) {
log_err("Can't clear results");
goto err;
}
client = connect_to_server(server_fd);
if (client == -1)
goto err;
......@@ -140,8 +157,35 @@ static int run_test(int server_fd, int results_fd)
goto err;
}
if (value != 1) {
log_err("Didn't match syncookie: %llu", value);
if (value == 0) {
log_err("Didn't match syncookie: %u", value);
goto err;
}
if (bpf_map_lookup_elem(results_fd, &key_gen, &value_gen) < 0) {
log_err("Can't lookup result");
goto err;
}
if (xdp && value_gen == 0) {
// SYN packets do not get passed through generic XDP, skip the
// rest of the test.
printf("Skipping XDP cookie check\n");
goto out;
}
if (bpf_map_lookup_elem(results_fd, &key_mss, &value_mss) < 0) {
log_err("Can't lookup result");
goto err;
}
if (value != value_gen) {
log_err("BPF generated cookie does not match kernel one");
goto err;
}
if (value_mss < 536 || value_mss > USHRT_MAX) {
log_err("Unexpected MSS retrieved");
goto err;
}
......@@ -163,13 +207,14 @@ int main(int argc, char **argv)
int server_v6 = -1;
int results = -1;
int err = 0;
bool xdp;
if (argc < 2) {
fprintf(stderr, "Usage: %s prog_id\n", argv[0]);
exit(1);
}
results = get_map_fd_by_prog_id(atoi(argv[1]));
results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp);
if (results < 0) {
log_err("Can't get map");
goto err;
......@@ -194,10 +239,10 @@ int main(int argc, char **argv)
if (server_v6 == -1)
goto err;
if (run_test(server, results))
if (run_test(server, results, xdp))
goto err;
if (run_test(server_v6, results))
if (run_test(server_v6, results, xdp))
goto err;
printf("ok\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment