Commit 5133a4a8 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2019-03-26

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) introduce bpf_tcp_check_syncookie() helper for XDP and tc, from Lorenz.

2) allow bpf_skb_ecn_set_ce() in tc, from Peter.

3) numerous bpf tc tunneling improvements, from Willem.

4) and other miscellaneous improvements from Adrian, Alan, Daniel, Ivan, Stanislav.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fa7e428c b4b6aa83
...@@ -205,6 +205,7 @@ enum bpf_return_type { ...@@ -205,6 +205,7 @@ enum bpf_return_type {
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
}; };
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
......
...@@ -1478,13 +1478,27 @@ union bpf_attr { ...@@ -1478,13 +1478,27 @@ union bpf_attr {
* Grow or shrink the room for data in the packet associated to * Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*. * *skb* by *len_diff*, and according to the selected *mode*.
* *
* There is a single supported mode at this time: * There are two supported modes at this time:
*
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
* (room space is added or removed below the layer 2 header).
* *
* * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
* (room space is added or removed below the layer 3 header). * (room space is added or removed below the layer 3 header).
* *
* All values for *flags* are reserved for future usage, and must * The following flags are supported at this time:
* be left at zero. *
* * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
* Adjusting mss in this way is not allowed for datagrams.
*
* * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
* * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
* Any new space is reserved to hold a tunnel header.
* Configure skb offsets and other fields accordingly.
*
* * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
* * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
* Use with ENCAP_L3 flags to further specify the tunnel type.
* *
* A call to this helper is susceptible to change the underlaying * A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers * packet buffer. Therefore, at load time, all checks on pointers
...@@ -2431,6 +2445,38 @@ union bpf_attr { ...@@ -2431,6 +2445,38 @@ union bpf_attr {
* Return * Return
* A **struct bpf_sock** pointer on success, or **NULL** in * A **struct bpf_sock** pointer on success, or **NULL** in
* case of failure. * case of failure.
*
* struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description
* Look for TCP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked,
* and if non-**NULL**, released via **bpf_sk_release**\ ().
*
* This function is identical to bpf_sk_lookup_tcp, except that it
* also returns timewait or request sockets. Use bpf_sk_fullsock
* or bpf_tcp_socket to access the full structure.
*
* This helper is available only if the kernel was compiled with
* **CONFIG_NET** configuration option.
* Return
* Pointer to **struct bpf_sock**, or **NULL** in case of failure.
* For sockets with reuseport option, the **struct bpf_sock**
* result is from **reuse->socks**\ [] using the hash of the tuple.
*
* int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether iph and th contain a valid SYN cookie ACK for
* the listening socket in sk.
*
* iph points to the start of the IPv4 or IPv6 header, while
* iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
*
* th points to the start of the TCP header, while th_len contains
* sizeof(struct tcphdr).
*
* Return
* 0 if iph and th are a valid SYN cookie ACK, or a negative error
* otherwise.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -2531,7 +2577,9 @@ union bpf_attr { ...@@ -2531,7 +2577,9 @@ union bpf_attr {
FN(sk_fullsock), \ FN(sk_fullsock), \
FN(tcp_sock), \ FN(tcp_sock), \
FN(skb_ecn_set_ce), \ FN(skb_ecn_set_ce), \
FN(get_listener_sock), FN(get_listener_sock), \
FN(skc_lookup_tcp), \
FN(tcp_check_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
...@@ -2590,9 +2638,18 @@ enum bpf_func_id { ...@@ -2590,9 +2638,18 @@ enum bpf_func_id {
/* Current network namespace */ /* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L) #define BPF_F_CURRENT_NETNS (-1L)
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
/* Mode for BPF_FUNC_skb_adjust_room helper. */ /* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode { enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET, BPF_ADJ_ROOM_NET,
BPF_ADJ_ROOM_MAC,
}; };
/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
......
...@@ -369,7 +369,8 @@ static bool is_release_function(enum bpf_func_id func_id) ...@@ -369,7 +369,8 @@ static bool is_release_function(enum bpf_func_id func_id)
static bool is_acquire_function(enum bpf_func_id func_id) static bool is_acquire_function(enum bpf_func_id func_id)
{ {
return func_id == BPF_FUNC_sk_lookup_tcp || return func_id == BPF_FUNC_sk_lookup_tcp ||
func_id == BPF_FUNC_sk_lookup_udp; func_id == BPF_FUNC_sk_lookup_udp ||
func_id == BPF_FUNC_skc_lookup_tcp;
} }
static bool is_ptr_cast_function(enum bpf_func_id func_id) static bool is_ptr_cast_function(enum bpf_func_id func_id)
...@@ -3147,19 +3148,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn ...@@ -3147,19 +3148,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0); mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
if (is_acquire_function(func_id)) {
int id = acquire_reference_state(env, insn_idx);
if (id < 0)
return id;
/* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = id;
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = id;
} else {
/* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].id = ++env->id_gen;
} } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
regs[BPF_REG_0].id = ++env->id_gen;
} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0); mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
...@@ -3170,9 +3163,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn ...@@ -3170,9 +3163,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL; return -EINVAL;
} }
if (is_ptr_cast_function(func_id)) if (is_ptr_cast_function(func_id)) {
/* For release_reference() */ /* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
} else if (is_acquire_function(func_id)) {
int id = acquire_reference_state(env, insn_idx);
if (id < 0)
return id;
/* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = id;
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = id;
}
do_refine_retval_range(regs, fn->ret_type, func_id, &meta); do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
......
This diff is collapsed.
...@@ -44,5 +44,6 @@ xdp_redirect_cpu ...@@ -44,5 +44,6 @@ xdp_redirect_cpu
xdp_redirect_map xdp_redirect_map
xdp_router_ipv4 xdp_router_ipv4
xdp_rxq_info xdp_rxq_info
xdp_sample_pkts
xdp_tx_iptunnel xdp_tx_iptunnel
xdpsock xdpsock
...@@ -1478,13 +1478,27 @@ union bpf_attr { ...@@ -1478,13 +1478,27 @@ union bpf_attr {
* Grow or shrink the room for data in the packet associated to * Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*. * *skb* by *len_diff*, and according to the selected *mode*.
* *
* There is a single supported mode at this time: * There are two supported modes at this time:
*
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
* (room space is added or removed below the layer 2 header).
* *
* * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
* (room space is added or removed below the layer 3 header). * (room space is added or removed below the layer 3 header).
* *
* All values for *flags* are reserved for future usage, and must * The following flags are supported at this time:
* be left at zero. *
* * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
* Adjusting mss in this way is not allowed for datagrams.
*
* * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
* * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
* Any new space is reserved to hold a tunnel header.
* Configure skb offsets and other fields accordingly.
*
* * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
* * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
* Use with ENCAP_L3 flags to further specify the tunnel type.
* *
* A call to this helper is susceptible to change the underlaying * A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers * packet buffer. Therefore, at load time, all checks on pointers
...@@ -2431,6 +2445,38 @@ union bpf_attr { ...@@ -2431,6 +2445,38 @@ union bpf_attr {
* Return * Return
* A **struct bpf_sock** pointer on success, or **NULL** in * A **struct bpf_sock** pointer on success, or **NULL** in
* case of failure. * case of failure.
*
* struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description
* Look for TCP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked,
* and if non-**NULL**, released via **bpf_sk_release**\ ().
*
* This function is identical to bpf_sk_lookup_tcp, except that it
* also returns timewait or request sockets. Use bpf_sk_fullsock
* or bpf_tcp_socket to access the full structure.
*
* This helper is available only if the kernel was compiled with
* **CONFIG_NET** configuration option.
* Return
* Pointer to **struct bpf_sock**, or **NULL** in case of failure.
* For sockets with reuseport option, the **struct bpf_sock**
* result is from **reuse->socks**\ [] using the hash of the tuple.
*
* int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether iph and th contain a valid SYN cookie ACK for
* the listening socket in sk.
*
* iph points to the start of the IPv4 or IPv6 header, while
* iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
*
* th points to the start of the TCP header, while th_len contains
* sizeof(struct tcphdr).
*
* Return
* 0 if iph and th are a valid SYN cookie ACK, or a negative error
* otherwise.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -2531,7 +2577,9 @@ union bpf_attr { ...@@ -2531,7 +2577,9 @@ union bpf_attr {
FN(sk_fullsock), \ FN(sk_fullsock), \
FN(tcp_sock), \ FN(tcp_sock), \
FN(skb_ecn_set_ce), \ FN(skb_ecn_set_ce), \
FN(get_listener_sock), FN(get_listener_sock), \
FN(skc_lookup_tcp), \
FN(tcp_check_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
...@@ -2590,9 +2638,18 @@ enum bpf_func_id { ...@@ -2590,9 +2638,18 @@ enum bpf_func_id {
/* Current network namespace */ /* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L) #define BPF_F_CURRENT_NETNS (-1L)
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
/* Mode for BPF_FUNC_skb_adjust_room helper. */ /* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode { enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET, BPF_ADJ_ROOM_NET,
BPF_ADJ_ROOM_MAC,
}; };
/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
......
...@@ -30,4 +30,5 @@ test_netcnt ...@@ -30,4 +30,5 @@ test_netcnt
test_section_names test_section_names
test_tcpnotify_user test_tcpnotify_user
test_libbpf test_libbpf
test_tcp_check_syncookie_user
alu32 alu32
...@@ -51,7 +51,10 @@ TEST_PROGS := test_kmod.sh \ ...@@ -51,7 +51,10 @@ TEST_PROGS := test_kmod.sh \
test_skb_cgroup_id.sh \ test_skb_cgroup_id.sh \
test_flow_dissector.sh \ test_flow_dissector.sh \
test_xdp_vlan.sh \ test_xdp_vlan.sh \
test_lwt_ip_encap.sh test_lwt_ip_encap.sh \
test_tcp_check_syncookie.sh \
test_tc_tunnel.sh \
test_tc_edt.sh
TEST_PROGS_EXTENDED := with_addr.sh \ TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \ with_tunnels.sh \
...@@ -60,7 +63,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ ...@@ -60,7 +63,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
# Compile but not part of 'make run_tests' # Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user
include ../lib.mk include ../lib.mk
...@@ -69,7 +72,7 @@ TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read ...@@ -69,7 +72,7 @@ TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
all: $(TEST_CUSTOM_PROGS) all: $(TEST_CUSTOM_PROGS)
$(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c
$(CC) -o $@ -static $< -Wl,--build-id $(CC) -o $@ $< -Wl,--build-id
BPFOBJ := $(OUTPUT)/libbpf.a BPFOBJ := $(OUTPUT)/libbpf.a
......
...@@ -159,6 +159,11 @@ static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, ...@@ -159,6 +159,11 @@ static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
int size, unsigned long long netns_id, int size, unsigned long long netns_id,
unsigned long long flags) = unsigned long long flags) =
(void *) BPF_FUNC_sk_lookup_tcp; (void *) BPF_FUNC_sk_lookup_tcp;
static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx,
struct bpf_sock_tuple *tuple,
int size, unsigned long long netns_id,
unsigned long long flags) =
(void *) BPF_FUNC_skc_lookup_tcp;
static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
struct bpf_sock_tuple *tuple, struct bpf_sock_tuple *tuple,
int size, unsigned long long netns_id, int size, unsigned long long netns_id,
...@@ -184,6 +189,9 @@ static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = ...@@ -184,6 +189,9 @@ static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_get_listener_sock; (void *) BPF_FUNC_get_listener_sock;
static int (*bpf_skb_ecn_set_ce)(void *ctx) = static int (*bpf_skb_ecn_set_ce)(void *ctx) =
(void *) BPF_FUNC_skb_ecn_set_ce; (void *) BPF_FUNC_skb_ecn_set_ce;
static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk,
void *ip, int ip_len, void *tcp, int tcp_len) =
(void *) BPF_FUNC_tcp_check_syncookie;
/* llvm builtin functions that eBPF C program may use to /* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions * emit BPF_LD_ABS and BPF_LD_IND instructions
...@@ -274,6 +282,9 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, ...@@ -274,6 +282,9 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
#elif defined(__TARGET_ARCH_s930x) #elif defined(__TARGET_ARCH_s930x)
#define bpf_target_s930x #define bpf_target_s930x
#define bpf_target_defined #define bpf_target_defined
#elif defined(__TARGET_ARCH_arm)
#define bpf_target_arm
#define bpf_target_defined
#elif defined(__TARGET_ARCH_arm64) #elif defined(__TARGET_ARCH_arm64)
#define bpf_target_arm64 #define bpf_target_arm64
#define bpf_target_defined #define bpf_target_defined
...@@ -296,6 +307,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, ...@@ -296,6 +307,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
#define bpf_target_x86 #define bpf_target_x86
#elif defined(__s390x__) #elif defined(__s390x__)
#define bpf_target_s930x #define bpf_target_s930x
#elif defined(__arm__)
#define bpf_target_arm
#elif defined(__aarch64__) #elif defined(__aarch64__)
#define bpf_target_arm64 #define bpf_target_arm64
#elif defined(__mips__) #elif defined(__mips__)
...@@ -333,6 +346,19 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, ...@@ -333,6 +346,19 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
#define PT_REGS_SP(x) ((x)->gprs[15]) #define PT_REGS_SP(x) ((x)->gprs[15])
#define PT_REGS_IP(x) ((x)->psw.addr) #define PT_REGS_IP(x) ((x)->psw.addr)
#elif defined(bpf_target_arm)
#define PT_REGS_PARM1(x) ((x)->uregs[0])
#define PT_REGS_PARM2(x) ((x)->uregs[1])
#define PT_REGS_PARM3(x) ((x)->uregs[2])
#define PT_REGS_PARM4(x) ((x)->uregs[3])
#define PT_REGS_PARM5(x) ((x)->uregs[4])
#define PT_REGS_RET(x) ((x)->uregs[14])
#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
#define PT_REGS_RC(x) ((x)->uregs[0])
#define PT_REGS_SP(x) ((x)->uregs[13])
#define PT_REGS_IP(x) ((x)->uregs[12])
#elif defined(bpf_target_arm64) #elif defined(bpf_target_arm64)
#define PT_REGS_PARM1(x) ((x)->regs[0]) #define PT_REGS_PARM1(x) ((x)->regs[0])
......
...@@ -23,3 +23,5 @@ CONFIG_LWTUNNEL=y ...@@ -23,3 +23,5 @@ CONFIG_LWTUNNEL=y
CONFIG_BPF_STREAM_PARSER=y CONFIG_BPF_STREAM_PARSER=y
CONFIG_XDP_SOCKETS=y CONFIG_XDP_SOCKETS=y
CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE_SYSCALLS=y
CONFIG_IPV6_TUNNEL=y
CONFIG_IPV6_GRE=y
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <test_progs.h> #include <test_progs.h>
static __u64 read_perf_max_sample_freq(void)
{
__u64 sample_freq = 5000; /* fallback to 5000 on error */
FILE *f;
f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
if (f == NULL)
return sample_freq;
fscanf(f, "%llu", &sample_freq);
fclose(f);
return sample_freq;
}
void test_stacktrace_build_id_nmi(void) void test_stacktrace_build_id_nmi(void)
{ {
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
const char *file = "./test_stacktrace_build_id.o"; const char *file = "./test_stacktrace_build_id.o";
int err, pmu_fd, prog_fd; int err, pmu_fd, prog_fd;
struct perf_event_attr attr = { struct perf_event_attr attr = {
.sample_freq = 5000,
.freq = 1, .freq = 1,
.type = PERF_TYPE_HARDWARE, .type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES, .config = PERF_COUNT_HW_CPU_CYCLES,
...@@ -20,6 +32,8 @@ void test_stacktrace_build_id_nmi(void) ...@@ -20,6 +32,8 @@ void test_stacktrace_build_id_nmi(void)
int build_id_matches = 0; int build_id_matches = 0;
int retry = 1; int retry = 1;
attr.sample_freq = read_perf_max_sample_freq();
retry: retry:
err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
......
// SPDX-License-Identifier: GPL-2.0
#include <stdint.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
/* the maximum delay we are willing to add (drop packets beyond that) */
#define TIME_HORIZON_NS (2000 * 1000 * 1000)
#define NS_PER_SEC 1000000000
#define ECN_HORIZON_NS 5000000
#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
/* flow_key => last_tstamp timestamp used */
struct bpf_map_def SEC("maps") flow_map = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(uint32_t),
.value_size = sizeof(uint64_t),
.max_entries = 1,
};
static inline int throttle_flow(struct __sk_buff *skb)
{
int key = 0;
uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
THROTTLE_RATE_BPS;
uint64_t now = bpf_ktime_get_ns();
uint64_t tstamp, next_tstamp = 0;
if (last_tstamp)
next_tstamp = *last_tstamp + delay_ns;
tstamp = skb->tstamp;
if (tstamp < now)
tstamp = now;
/* should we throttle? */
if (next_tstamp <= tstamp) {
if (bpf_map_update_elem(&flow_map, &key, &tstamp, BPF_ANY))
return TC_ACT_SHOT;
return TC_ACT_OK;
}
/* do not queue past the time horizon */
if (next_tstamp - now >= TIME_HORIZON_NS)
return TC_ACT_SHOT;
/* set ecn bit, if needed */
if (next_tstamp - now >= ECN_HORIZON_NS)
bpf_skb_ecn_set_ce(skb);
if (bpf_map_update_elem(&flow_map, &key, &next_tstamp, BPF_EXIST))
return TC_ACT_SHOT;
skb->tstamp = next_tstamp;
return TC_ACT_OK;
}
static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
{
void *data_end = (void *)(long)skb->data_end;
/* drop malformed packets */
if ((void *)(tcp + 1) > data_end)
return TC_ACT_SHOT;
if (tcp->dest == bpf_htons(9000))
return throttle_flow(skb);
return TC_ACT_OK;
}
static inline int handle_ipv4(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
struct iphdr *iph;
uint32_t ihl;
/* drop malformed packets */
if (data + sizeof(struct ethhdr) > data_end)
return TC_ACT_SHOT;
iph = (struct iphdr *)(data + sizeof(struct ethhdr));
if ((void *)(iph + 1) > data_end)
return TC_ACT_SHOT;
ihl = iph->ihl * 4;
if (((void *)iph) + ihl > data_end)
return TC_ACT_SHOT;
if (iph->protocol == IPPROTO_TCP)
return handle_tcp(skb, (struct tcphdr *)(((void *)iph) + ihl));
return TC_ACT_OK;
}
SEC("cls_test") int tc_prog(struct __sk_buff *skb)
{
if (skb->protocol == bpf_htons(ETH_P_IP))
return handle_ipv4(skb);
return TC_ACT_OK;
}
char __license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
/* In-place tunneling */
#include <stdbool.h>
#include <string.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include <linux/types.h>
#include "bpf_endian.h"
#include "bpf_helpers.h"
static const int cfg_port = 8000;
struct grev4hdr {
struct iphdr ip;
__be16 flags;
__be16 protocol;
} __attribute__((packed));
struct grev6hdr {
struct ipv6hdr ip;
__be16 flags;
__be16 protocol;
} __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph)
{
__u16 *iph16 = (__u16 *)iph;
__u32 csum;
int i;
iph->check = 0;
#pragma clang loop unroll(full)
for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
csum += *iph16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
{
struct grev4hdr h_outer;
struct iphdr iph_inner;
struct tcphdr tcph;
__u64 flags;
int olen;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
sizeof(iph_inner)) < 0)
return TC_ACT_OK;
/* filter only packets we want */
if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
return TC_ACT_OK;
if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
&tcph, sizeof(tcph)) < 0)
return TC_ACT_OK;
if (tcph.dest != __bpf_constant_htons(cfg_port))
return TC_ACT_OK;
flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
if (with_gre) {
flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
olen = sizeof(h_outer);
} else {
olen = sizeof(h_outer.ip);
}
/* add room between mac and network header */
if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
return TC_ACT_SHOT;
/* prepare new outer network header */
h_outer.ip = iph_inner;
h_outer.ip.tot_len = bpf_htons(olen +
bpf_htons(h_outer.ip.tot_len));
if (with_gre) {
h_outer.ip.protocol = IPPROTO_GRE;
h_outer.protocol = bpf_htons(ETH_P_IP);
h_outer.flags = 0;
} else {
h_outer.ip.protocol = IPPROTO_IPIP;
}
set_ipv4_csum((void *)&h_outer.ip);
/* store new outer network header */
if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
BPF_F_INVALIDATE_HASH) < 0)
return TC_ACT_SHOT;
return TC_ACT_OK;
}
static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
{
struct ipv6hdr iph_inner;
struct grev6hdr h_outer;
struct tcphdr tcph;
__u64 flags;
int olen;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
sizeof(iph_inner)) < 0)
return TC_ACT_OK;
/* filter only packets we want */
if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
&tcph, sizeof(tcph)) < 0)
return TC_ACT_OK;
if (tcph.dest != __bpf_constant_htons(cfg_port))
return TC_ACT_OK;
flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
if (with_gre) {
flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
olen = sizeof(h_outer);
} else {
olen = sizeof(h_outer.ip);
}
/* add room between mac and network header */
if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
return TC_ACT_SHOT;
/* prepare new outer network header */
h_outer.ip = iph_inner;
h_outer.ip.payload_len = bpf_htons(olen +
bpf_ntohs(h_outer.ip.payload_len));
if (with_gre) {
h_outer.ip.nexthdr = IPPROTO_GRE;
h_outer.protocol = bpf_htons(ETH_P_IPV6);
h_outer.flags = 0;
} else {
h_outer.ip.nexthdr = IPPROTO_IPV6;
}
/* store new outer network header */
if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
BPF_F_INVALIDATE_HASH) < 0)
return TC_ACT_SHOT;
return TC_ACT_OK;
}
SEC("encap_ipip")
int __encap_ipip(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
return encap_ipv4(skb, false);
else
return TC_ACT_OK;
}
SEC("encap_gre")
int __encap_gre(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
return encap_ipv4(skb, true);
else
return TC_ACT_OK;
}
SEC("encap_ip6tnl")
int __encap_ip6tnl(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
return encap_ipv6(skb, false);
else
return TC_ACT_OK;
}
SEC("encap_ip6gre")
int __encap_ip6gre(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
return encap_ipv6(skb, true);
else
return TC_ACT_OK;
}
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
char buf[sizeof(struct grev6hdr)];
int olen;
switch (proto) {
case IPPROTO_IPIP:
case IPPROTO_IPV6:
olen = len;
break;
case IPPROTO_GRE:
olen = len + 4 /* gre hdr */;
break;
default:
return TC_ACT_OK;
}
if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
BPF_F_ADJ_ROOM_FIXED_GSO))
return TC_ACT_SHOT;
return TC_ACT_OK;
}
static int decap_ipv4(struct __sk_buff *skb)
{
struct iphdr iph_outer;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
sizeof(iph_outer)) < 0)
return TC_ACT_OK;
if (iph_outer.ihl != 5)
return TC_ACT_OK;
return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
iph_outer.protocol);
}
static int decap_ipv6(struct __sk_buff *skb)
{
struct ipv6hdr iph_outer;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
sizeof(iph_outer)) < 0)
return TC_ACT_OK;
return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
iph_outer.nexthdr);
}
SEC("decap")
int decap_f(struct __sk_buff *skb)
{
switch (skb->protocol) {
case __bpf_constant_htons(ETH_P_IP):
return decap_ipv4(skb);
case __bpf_constant_htons(ETH_P_IPV6):
return decap_ipv6(skb);
default:
/* does not match, ignore */
return TC_ACT_OK;
}
}
char __license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
// Copyright (c) 2019 Cloudflare
#include <string.h>
#include <linux/bpf.h>
#include <linux/pkt_cls.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <sys/socket.h>
#include <linux/tcp.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
struct bpf_map_def SEC("maps") results = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.max_entries = 1,
};
static __always_inline void check_syncookie(void *ctx, void *data,
void *data_end)
{
struct bpf_sock_tuple tup;
struct bpf_sock *sk;
struct ethhdr *ethh;
struct iphdr *ipv4h;
struct ipv6hdr *ipv6h;
struct tcphdr *tcph;
int ret;
__u32 key = 0;
__u64 value = 1;
ethh = data;
if (ethh + 1 > data_end)
return;
switch (bpf_ntohs(ethh->h_proto)) {
case ETH_P_IP:
ipv4h = data + sizeof(struct ethhdr);
if (ipv4h + 1 > data_end)
return;
if (ipv4h->ihl != 5)
return;
tcph = data + sizeof(struct ethhdr) + sizeof(struct iphdr);
if (tcph + 1 > data_end)
return;
tup.ipv4.saddr = ipv4h->saddr;
tup.ipv4.daddr = ipv4h->daddr;
tup.ipv4.sport = tcph->source;
tup.ipv4.dport = tcph->dest;
sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv4),
BPF_F_CURRENT_NETNS, 0);
if (!sk)
return;
if (sk->state != BPF_TCP_LISTEN)
goto release;
ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h),
tcph, sizeof(*tcph));
break;
case ETH_P_IPV6:
ipv6h = data + sizeof(struct ethhdr);
if (ipv6h + 1 > data_end)
return;
if (ipv6h->nexthdr != IPPROTO_TCP)
return;
tcph = data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
if (tcph + 1 > data_end)
return;
memcpy(tup.ipv6.saddr, &ipv6h->saddr, sizeof(tup.ipv6.saddr));
memcpy(tup.ipv6.daddr, &ipv6h->daddr, sizeof(tup.ipv6.daddr));
tup.ipv6.sport = tcph->source;
tup.ipv6.dport = tcph->dest;
sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv6),
BPF_F_CURRENT_NETNS, 0);
if (!sk)
return;
if (sk->state != BPF_TCP_LISTEN)
goto release;
ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h),
tcph, sizeof(*tcph));
break;
default:
return;
}
if (ret == 0)
bpf_map_update_elem(&results, &key, &value, 0);
release:
bpf_sk_release(sk);
}
SEC("clsact/check_syncookie")
int check_syncookie_clsact(struct __sk_buff *skb)
{
check_syncookie(skb, (void *)(long)skb->data,
(void *)(long)skb->data_end);
return TC_ACT_OK;
}
SEC("xdp/check_syncookie")
int check_syncookie_xdp(struct xdp_md *ctx)
{
check_syncookie(ctx, (void *)(long)ctx->data,
(void *)(long)ctx->data_end);
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# This test installs a TC bpf program that throttles a TCP flow
# with dst port = 9000 down to 5MBps. Then it measures actual
# throughput of the flow.
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
echo "FAIL"
exit 1
fi
# check that nc, dd, and timeout are present
command -v nc >/dev/null 2>&1 || \
{ echo >&2 "nc is not available"; exit 1; }
command -v dd >/dev/null 2>&1 || \
{ echo >&2 "nc is not available"; exit 1; }
command -v timeout >/dev/null 2>&1 || \
{ echo >&2 "timeout is not available"; exit 1; }
readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
readonly IP_SRC="172.16.1.100"
readonly IP_DST="172.16.2.100"
cleanup()
{
ip netns del ${NS_SRC}
ip netns del ${NS_DST}
}
trap cleanup EXIT
set -e # exit on error
ip netns add "${NS_SRC}"
ip netns add "${NS_DST}"
ip link add veth_src type veth peer name veth_dst
ip link set veth_src netns ${NS_SRC}
ip link set veth_dst netns ${NS_DST}
ip -netns ${NS_SRC} addr add ${IP_SRC}/24 dev veth_src
ip -netns ${NS_DST} addr add ${IP_DST}/24 dev veth_dst
ip -netns ${NS_SRC} link set dev veth_src up
ip -netns ${NS_DST} link set dev veth_dst up
ip -netns ${NS_SRC} route add ${IP_DST}/32 dev veth_src
ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst
# set up TC on TX
ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
bpf da obj test_tc_edt.o sec cls_test
# start the listener
ip netns exec ${NS_DST} bash -c \
"nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &"
declare -i NC_PID=$!
sleep 1
declare -ir TIMEOUT=20
declare -ir EXPECTED_BPS=5000000
# run the load, capture RX bytes on DST
declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
cat /sys/class/net/veth_dst/statistics/rx_bytes )
set +e
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
set -e
declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
cat /sys/class/net/veth_dst/statistics/rx_bytes )
declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
$1, ($2-$3)*100.0/$3}'
# Pass the test if the actual bps is within 1% of the expected bps.
# The difference is usually about 0.1% on a 20-sec test, and ==> zero
# the longer the test runs.
declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
awk 'function abs(x){return ((x < 0.0) ? -x : x)}
{if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
else { print "0"} }' )
if [ "${RES}" == "0" ] ; then
echo "PASS"
else
echo "FAIL"
exit 1
fi
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# In-place tunneling
# must match the port that the bpf program filters on
readonly port=8000
readonly ns_prefix="ns-$$-"
readonly ns1="${ns_prefix}1"
readonly ns2="${ns_prefix}2"
readonly ns1_v4=192.168.1.1
readonly ns2_v4=192.168.1.2
readonly ns1_v6=fd::1
readonly ns2_v6=fd::2
readonly infile="$(mktemp)"
readonly outfile="$(mktemp)"
setup() {
ip netns add "${ns1}"
ip netns add "${ns2}"
ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
peer name veth2 mtu 1500 netns "${ns2}"
ip netns exec "${ns1}" ethtool -K veth1 tso off
ip -netns "${ns1}" link set veth1 up
ip -netns "${ns2}" link set veth2 up
ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
# clamp route to reserve room for tunnel headers
ip -netns "${ns1}" -4 route flush table main
ip -netns "${ns1}" -6 route flush table main
ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1
ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1
sleep 1
dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
}
cleanup() {
ip netns del "${ns2}"
ip netns del "${ns1}"
if [[ -f "${outfile}" ]]; then
rm "${outfile}"
fi
if [[ -f "${infile}" ]]; then
rm "${infile}"
fi
}
server_listen() {
ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
server_pid=$!
sleep 0.2
}
client_connect() {
ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}"
echo $?
}
verify_data() {
wait "${server_pid}"
# sha1sum returns two fields [sha1] [filepath]
# convert to bash array and access first elem
insum=($(sha1sum ${infile}))
outsum=($(sha1sum ${outfile}))
if [[ "${insum[0]}" != "${outsum[0]}" ]]; then
echo "data mismatch"
exit 1
fi
}
set -e
# no arguments: automated test, run all
if [[ "$#" -eq "0" ]]; then
echo "ipip"
$0 ipv4 ipip 100
echo "ip6ip6"
$0 ipv6 ip6tnl 100
echo "ip gre"
$0 ipv4 gre 100
echo "ip6 gre"
$0 ipv6 ip6gre 100
echo "ip gre gso"
$0 ipv4 gre 2000
echo "ip6 gre gso"
$0 ipv6 ip6gre 2000
echo "OK. All tests passed"
exit 0
fi
if [[ "$#" -ne "3" ]]; then
echo "Usage: $0"
echo " or: $0 <ipv4|ipv6> <tuntype> <data_len>"
exit 1
fi
case "$1" in
"ipv4")
readonly addr1="${ns1_v4}"
readonly addr2="${ns2_v4}"
readonly netcat_opt=-4
;;
"ipv6")
readonly addr1="${ns1_v6}"
readonly addr2="${ns2_v6}"
readonly netcat_opt=-6
;;
*)
echo "unknown arg: $1"
exit 1
;;
esac
readonly tuntype=$2
readonly datalen=$3
echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}"
trap cleanup EXIT
setup
# basic communication works
echo "test basic connectivity"
server_listen
client_connect
verify_data
# clientside, insert bpf program to encap all TCP to port ${port}
# client can no longer connect
ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
ip netns exec "${ns1}" tc filter add dev veth1 egress \
bpf direct-action object-file ./test_tc_tunnel.o \
section "encap_${tuntype}"
echo "test bpf encap without decap (expect failure)"
server_listen
! client_connect
# serverside, insert decap module
# server is still running
# client can connect again
ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
remote "${addr1}" local "${addr2}"
# Because packets are decapped by the tunnel they arrive on testtun0 from
# the IP stack perspective. Ensure reverse path filtering is disabled
# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
# expected veth2 (veth2 is where 192.168.1.2 is configured).
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
# rp needs to be disabled for both all and testtun0 as the rp value is
# selected as the max of the "all" and device-specific values.
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
ip netns exec "${ns2}" ip link set dev testtun0 up
echo "test bpf encap with tunnel device decap"
client_connect
verify_data
# serverside, use BPF for decap
ip netns exec "${ns2}" ip link del dev testtun0
ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
ip netns exec "${ns2}" tc filter add dev veth2 ingress \
bpf direct-action object-file ./test_tc_tunnel.o section decap
server_listen
echo "test bpf encap with bpf decap"
client_connect
verify_data
echo OK
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2018 Facebook
# Copyright (c) 2019 Cloudflare
set -eu
wait_for_ip()
{
local _i
printf "Wait for IP %s to become available " "$1"
for _i in $(seq ${MAX_PING_TRIES}); do
printf "."
if ns1_exec ping -c 1 -W 1 "$1" >/dev/null 2>&1; then
echo " OK"
return
fi
sleep 1
done
echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
exit 1
}
get_prog_id()
{
awk '/ id / {sub(/.* id /, "", $0); print($1)}'
}
ns1_exec()
{
ip netns exec ns1 "$@"
}
setup()
{
ip netns add ns1
ns1_exec ip link set lo up
ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
wait_for_ip 127.0.0.1
wait_for_ip ::1
}
cleanup()
{
ip netns del ns1 2>/dev/null || :
}
main()
{
trap cleanup EXIT 2 3 6 15
setup
printf "Testing clsact..."
ns1_exec tc qdisc add dev "${TEST_IF}" clsact
ns1_exec tc filter add dev "${TEST_IF}" ingress \
bpf obj "${BPF_PROG_OBJ}" sec "${CLSACT_SECTION}" da
BPF_PROG_ID=$(ns1_exec tc filter show dev "${TEST_IF}" ingress | \
get_prog_id)
ns1_exec "${PROG}" "${BPF_PROG_ID}"
ns1_exec tc qdisc del dev "${TEST_IF}" clsact
printf "Testing XDP..."
ns1_exec ip link set "${TEST_IF}" xdp \
object "${BPF_PROG_OBJ}" section "${XDP_SECTION}"
BPF_PROG_ID=$(ns1_exec ip link show "${TEST_IF}" | get_prog_id)
ns1_exec "${PROG}" "${BPF_PROG_ID}"
}
DIR=$(dirname $0)
TEST_IF=lo
MAX_PING_TRIES=5
BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
CLSACT_SECTION="clsact/check_syncookie"
XDP_SECTION="xdp/check_syncookie"
BPF_PROG_ID=0
PROG="${DIR}/test_tcp_check_syncookie_user"
main
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
// Copyright (c) 2019 Cloudflare
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
static int start_server(const struct sockaddr *addr, socklen_t len)
{
int fd;
fd = socket(addr->sa_family, SOCK_STREAM, 0);
if (fd == -1) {
log_err("Failed to create server socket");
goto out;
}
if (bind(fd, addr, len) == -1) {
log_err("Failed to bind server socket");
goto close_out;
}
if (listen(fd, 128) == -1) {
log_err("Failed to listen on server socket");
goto close_out;
}
goto out;
close_out:
close(fd);
fd = -1;
out:
return fd;
}
static int connect_to_server(int server_fd)
{
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
int fd = -1;
if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
log_err("Failed to get server addr");
goto out;
}
fd = socket(addr.ss_family, SOCK_STREAM, 0);
if (fd == -1) {
log_err("Failed to create client socket");
goto out;
}
if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
log_err("Fail to connect to server");
goto close_out;
}
goto out;
close_out:
close(fd);
fd = -1;
out:
return fd;
}
static int get_map_fd_by_prog_id(int prog_id)
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
__u32 map_ids[1];
int prog_fd = -1;
int map_fd = -1;
prog_fd = bpf_prog_get_fd_by_id(prog_id);
if (prog_fd < 0) {
log_err("Failed to get fd by prog id %d", prog_id);
goto err;
}
info.nr_map_ids = 1;
info.map_ids = (__u64)(unsigned long)map_ids;
if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
log_err("Failed to get info by prog fd %d", prog_fd);
goto err;
}
if (!info.nr_map_ids) {
log_err("No maps found for prog fd %d", prog_fd);
goto err;
}
map_fd = bpf_map_get_fd_by_id(map_ids[0]);
if (map_fd < 0)
log_err("Failed to get fd by map id %d", map_ids[0]);
err:
if (prog_fd >= 0)
close(prog_fd);
return map_fd;
}
static int run_test(int server_fd, int results_fd)
{
int client = -1, srv_client = -1;
int ret = 0;
__u32 key = 0;
__u64 value = 0;
if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) {
log_err("Can't clear results");
goto err;
}
client = connect_to_server(server_fd);
if (client == -1)
goto err;
srv_client = accept(server_fd, NULL, 0);
if (srv_client == -1) {
log_err("Can't accept connection");
goto err;
}
if (bpf_map_lookup_elem(results_fd, &key, &value) < 0) {
log_err("Can't lookup result");
goto err;
}
if (value != 1) {
log_err("Didn't match syncookie: %llu", value);
goto err;
}
goto out;
err:
ret = 1;
out:
close(client);
close(srv_client);
return ret;
}
int main(int argc, char **argv)
{
struct sockaddr_in addr4;
struct sockaddr_in6 addr6;
int server = -1;
int server_v6 = -1;
int results = -1;
int err = 0;
if (argc < 2) {
fprintf(stderr, "Usage: %s prog_id\n", argv[0]);
exit(1);
}
results = get_map_fd_by_prog_id(atoi(argv[1]));
if (results < 0) {
log_err("Can't get map");
goto err;
}
memset(&addr4, 0, sizeof(addr4));
addr4.sin_family = AF_INET;
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
addr4.sin_port = 0;
memset(&addr6, 0, sizeof(addr6));
addr6.sin6_family = AF_INET6;
addr6.sin6_addr = in6addr_loopback;
addr6.sin6_port = 0;
server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
if (server == -1)
goto err;
server_v6 = start_server((const struct sockaddr *)&addr6,
sizeof(addr6));
if (server_v6 == -1)
goto err;
if (run_test(server, results))
goto err;
if (run_test(server_v6, results))
goto err;
printf("ok\n");
goto out;
err:
err = 1;
out:
close(server);
close(server_v6);
close(results);
return err;
}
...@@ -198,7 +198,7 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) ...@@ -198,7 +198,7 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
} }
/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
#define BPF_SK_LOOKUP \ #define BPF_SK_LOOKUP(func) \
/* struct bpf_sock_tuple tuple = {} */ \ /* struct bpf_sock_tuple tuple = {} */ \
BPF_MOV64_IMM(BPF_REG_2, 0), \ BPF_MOV64_IMM(BPF_REG_2, 0), \
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), \ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), \
...@@ -207,13 +207,13 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) ...@@ -207,13 +207,13 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32), \ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32), \
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40), \ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40), \
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48), \ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48), \
/* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */ \ /* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */ \
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), \ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), \
BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), \ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), \
BPF_MOV64_IMM(BPF_REG_4, 0), \ BPF_MOV64_IMM(BPF_REG_4, 0), \
BPF_MOV64_IMM(BPF_REG_5, 0), \ BPF_MOV64_IMM(BPF_REG_5, 0), \
BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp) BPF_EMIT_CALL(BPF_FUNC_ ## func)
/* BPF_DIRECT_PKT_R2 contains 7 instructions, it initializes default return /* BPF_DIRECT_PKT_R2 contains 7 instructions, it initializes default return
* value into 0 and does necessary preparation for direct packet access * value into 0 and does necessary preparation for direct packet access
......
...@@ -7,11 +7,19 @@ ...@@ -7,11 +7,19 @@
#define BUF_SIZE 256 #define BUF_SIZE 256
static __attribute__((noinline))
void urandom_read(int fd, int count)
{
char buf[BUF_SIZE];
int i;
for (i = 0; i < count; ++i)
read(fd, buf, BUF_SIZE);
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
int fd = open("/dev/urandom", O_RDONLY); int fd = open("/dev/urandom", O_RDONLY);
int i;
char buf[BUF_SIZE];
int count = 4; int count = 4;
if (fd < 0) if (fd < 0)
...@@ -20,8 +28,7 @@ int main(int argc, char *argv[]) ...@@ -20,8 +28,7 @@ int main(int argc, char *argv[])
if (argc == 2) if (argc == 2)
count = atoi(argv[1]); count = atoi(argv[1]);
for (i = 0; i < count; ++i) urandom_read(fd, count);
read(fd, buf, BUF_SIZE);
close(fd); close(fd);
return 0; return 0;
......
...@@ -242,7 +242,7 @@ ...@@ -242,7 +242,7 @@
.insns = { .insns = {
BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
/* struct bpf_sock *sock = bpf_sock_lookup(...); */ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
BPF_SK_LOOKUP, BPF_SK_LOOKUP(sk_lookup_tcp),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
/* u64 foo; */ /* u64 foo; */
/* void *target = &foo; */ /* void *target = &foo; */
...@@ -276,7 +276,7 @@ ...@@ -276,7 +276,7 @@
.insns = { .insns = {
BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
/* struct bpf_sock *sock = bpf_sock_lookup(...); */ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
BPF_SK_LOOKUP, BPF_SK_LOOKUP(sk_lookup_tcp),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
/* u64 foo; */ /* u64 foo; */
/* void *target = &foo; */ /* void *target = &foo; */
...@@ -307,7 +307,7 @@ ...@@ -307,7 +307,7 @@
.insns = { .insns = {
BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
/* struct bpf_sock *sock = bpf_sock_lookup(...); */ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
BPF_SK_LOOKUP, BPF_SK_LOOKUP(sk_lookup_tcp),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
/* u64 foo; */ /* u64 foo; */
/* void *target = &foo; */ /* void *target = &foo; */
...@@ -339,7 +339,7 @@ ...@@ -339,7 +339,7 @@
.insns = { .insns = {
BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
/* struct bpf_sock *sock = bpf_sock_lookup(...); */ /* struct bpf_sock *sock = bpf_sock_lookup(...); */
BPF_SK_LOOKUP, BPF_SK_LOOKUP(sk_lookup_tcp),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
/* u64 foo; */ /* u64 foo; */
/* void *target = &foo; */ /* void *target = &foo; */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment