Commit b061017f authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Daniel Borkmann

selftests/bpf: add realistic loop tests

Add a bunch of loop tests. Most of them are created by replacing
'#pragma unroll' with '#pragma clang loop unroll(disable)'

Several tests are artificially large:
  /* partial unroll. llvm will unroll loop ~150 times.
   * C loop count -> 600.
   * Asm loop count -> 4.
   * 16k insns in loop body.
   * Total of 5 such loops. Total program size ~82k insns.
   */
  "./pyperf600.o",

  /* no unroll at all.
   * C loop count -> 600.
   * ASM loop count -> 600.
   * ~110 insns in loop body.
   * Total of 5 such loops. Total program size ~1500 insns.
   */
  "./pyperf600_nounroll.o",

  /* partial unroll. 19k insn in a loop.
   * Total program size 20.8k insn.
   * ~350k processed_insns
   */
  "./strobemeta.o",
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andriin@fb.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 0d3679e9
...@@ -5,7 +5,7 @@ static int libbpf_debug_print(enum libbpf_print_level level, ...@@ -5,7 +5,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
const char *format, va_list args) const char *format, va_list args)
{ {
if (level != LIBBPF_DEBUG) if (level != LIBBPF_DEBUG)
return 0; return vfprintf(stderr, format, args);
if (!strstr(format, "verifier log")) if (!strstr(format, "verifier log"))
return 0; return 0;
...@@ -32,24 +32,69 @@ static int check_load(const char *file, enum bpf_prog_type type) ...@@ -32,24 +32,69 @@ static int check_load(const char *file, enum bpf_prog_type type)
void test_bpf_verif_scale(void) void test_bpf_verif_scale(void)
{ {
const char *scale[] = { const char *sched_cls[] = {
"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o" "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o",
}; };
const char *pyperf[] = { const char *raw_tp[] = {
"./pyperf50.o", "./pyperf100.o", "./pyperf180.o" /* full unroll by llvm */
"./pyperf50.o", "./pyperf100.o", "./pyperf180.o",
/* partial unroll. llvm will unroll loop ~150 times.
* C loop count -> 600.
* Asm loop count -> 4.
* 16k insns in loop body.
* Total of 5 such loops. Total program size ~82k insns.
*/
"./pyperf600.o",
/* no unroll at all.
* C loop count -> 600.
* ASM loop count -> 600.
* ~110 insns in loop body.
* Total of 5 such loops. Total program size ~1500 insns.
*/
"./pyperf600_nounroll.o",
"./loop1.o", "./loop2.o",
/* partial unroll. 19k insn in a loop.
* Total program size 20.8k insn.
* ~350k processed_insns
*/
"./strobemeta.o",
/* no unroll, tiny loops */
"./strobemeta_nounroll1.o",
"./strobemeta_nounroll2.o",
};
const char *cg_sysctl[] = {
"./test_sysctl_loop1.o", "./test_sysctl_loop2.o",
}; };
int err, i; int err, i;
if (verifier_stats) if (verifier_stats)
libbpf_set_print(libbpf_debug_print); libbpf_set_print(libbpf_debug_print);
for (i = 0; i < ARRAY_SIZE(scale); i++) { err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT);
err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS); printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL");
printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK");
for (i = 0; i < ARRAY_SIZE(sched_cls); i++) {
err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS);
printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK");
} }
for (i = 0; i < ARRAY_SIZE(pyperf); i++) { for (i = 0; i < ARRAY_SIZE(raw_tp); i++) {
err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT); err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK"); printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK");
} }
for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) {
err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL);
printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK");
}
err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP);
printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK");
err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL);
printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");
} }
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
char _license[] SEC("license") = "GPL";
SEC("raw_tracepoint/kfree_skb")
int nested_loops(volatile struct pt_regs* ctx)
{
int i, j, sum = 0, m;
for (j = 0; j < 300; j++)
for (i = 0; i < j; i++) {
if (j & 1)
m = ctx->rax;
else
m = j;
sum += i * m;
}
return sum;
}
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
char _license[] SEC("license") = "GPL";
SEC("raw_tracepoint/consume_skb")
int while_true(volatile struct pt_regs* ctx)
{
int i = 0;
while (true) {
if (ctx->rax & 1)
i += 3;
else
i += 7;
if (i > 40)
break;
}
return i;
}
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
char _license[] SEC("license") = "GPL";
SEC("raw_tracepoint/consume_skb")
int while_true(volatile struct pt_regs* ctx)
{
__u64 i = 0, sum = 0;
do {
i++;
sum += ctx->rax;
} while (i < 0x100000000ULL);
return sum;
}
...@@ -220,7 +220,11 @@ static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs * ...@@ -220,7 +220,11 @@ static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL) if (symbol_counter == NULL)
return 0; return 0;
#pragma unroll #ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma clang loop unroll(full)
#endif
/* Unwind python stack */ /* Unwind python stack */
for (int i = 0; i < STACK_MAX_LEN; ++i) { for (int i = 0; i < STACK_MAX_LEN; ++i) {
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
......
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 600
/* clang will not unroll the loop 600 times.
* Instead it will unroll it to the amount it deemed
* appropriate, but the loop will still execute 600 times.
* Total program size is around 90k insns
*/
#include "pyperf.h"
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 600
#define NO_UNROLL
/* clang will not unroll at all.
* Total program size is around 2k insns
*/
#include "pyperf.h"
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
// Copyright (c) 2019 Facebook
#define STROBE_MAX_INTS 2
#define STROBE_MAX_STRS 25
#define STROBE_MAX_MAPS 100
#define STROBE_MAX_MAP_ENTRIES 20
/* full unroll by llvm #undef NO_UNROLL */
#include "strobemeta.h"
This diff is collapsed.
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
// Copyright (c) 2019 Facebook
#define STROBE_MAX_INTS 2
#define STROBE_MAX_STRS 25
#define STROBE_MAX_MAPS 13
#define STROBE_MAX_MAP_ENTRIES 20
#define NO_UNROLL
#include "strobemeta.h"
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
// Copyright (c) 2019 Facebook
#define STROBE_MAX_INTS 2
#define STROBE_MAX_STRS 25
#define STROBE_MAX_MAPS 30
#define STROBE_MAX_MAP_ENTRIES 20
#define NO_UNROLL
#include "strobemeta.h"
#include <stddef.h>
#include <inttypes.h>
#include <errno.h>
#include <linux/seg6_local.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
#define SR6_FLAG_ALERT (1 << 4)
#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
#define BPF_PACKET_HEADER __attribute__((packed))
struct ip6_t {
unsigned int ver:4;
unsigned int priority:8;
unsigned int flow_label:20;
unsigned short payload_len;
unsigned char next_header;
unsigned char hop_limit;
unsigned long long src_hi;
unsigned long long src_lo;
unsigned long long dst_hi;
unsigned long long dst_lo;
} BPF_PACKET_HEADER;
struct ip6_addr_t {
unsigned long long hi;
unsigned long long lo;
} BPF_PACKET_HEADER;
struct ip6_srh_t {
unsigned char nexthdr;
unsigned char hdrlen;
unsigned char type;
unsigned char segments_left;
unsigned char first_segment;
unsigned char flags;
unsigned short tag;
struct ip6_addr_t segments[0];
} BPF_PACKET_HEADER;
struct sr6_tlv_t {
unsigned char type;
unsigned char len;
unsigned char value[0];
} BPF_PACKET_HEADER;
static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
{
void *cursor, *data_end;
struct ip6_srh_t *srh;
struct ip6_t *ip;
uint8_t *ipver;
data_end = (void *)(long)skb->data_end;
cursor = (void *)(long)skb->data;
ipver = (uint8_t *)cursor;
if ((void *)ipver + sizeof(*ipver) > data_end)
return NULL;
if ((*ipver >> 4) != 6)
return NULL;
ip = cursor_advance(cursor, sizeof(*ip));
if ((void *)ip + sizeof(*ip) > data_end)
return NULL;
if (ip->next_header != 43)
return NULL;
srh = cursor_advance(cursor, sizeof(*srh));
if ((void *)srh + sizeof(*srh) > data_end)
return NULL;
if (srh->type != 4)
return NULL;
return srh;
}
static __attribute__((always_inline))
int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
uint32_t old_pad, uint32_t pad_off)
{
int err;
if (new_pad != old_pad) {
err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
(int) new_pad - (int) old_pad);
if (err)
return err;
}
if (new_pad > 0) {
char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0};
struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
pad_tlv->type = SR6_TLV_PADDING;
pad_tlv->len = new_pad - 2;
err = bpf_lwt_seg6_store_bytes(skb, pad_off,
(void *)pad_tlv_buf, new_pad);
if (err)
return err;
}
return 0;
}
static __attribute__((always_inline))
int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
uint32_t *tlv_off, uint32_t *pad_size,
uint32_t *pad_off)
{
uint32_t srh_off, cur_off;
int offset_valid = 0;
int err;
srh_off = (char *)srh - (char *)(long)skb->data;
// cur_off = end of segments, start of possible TLVs
cur_off = srh_off + sizeof(*srh) +
sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
*pad_off = 0;
// we can only go as far as ~10 TLVs due to the BPF max stack size
#pragma clang loop unroll(disable)
for (int i = 0; i < 100; i++) {
struct sr6_tlv_t tlv;
if (cur_off == *tlv_off)
offset_valid = 1;
if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
break;
err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
if (err)
return err;
if (tlv.type == SR6_TLV_PADDING) {
*pad_size = tlv.len + sizeof(tlv);
*pad_off = cur_off;
if (*tlv_off == srh_off) {
*tlv_off = cur_off;
offset_valid = 1;
}
break;
} else if (tlv.type == SR6_TLV_HMAC) {
break;
}
cur_off += sizeof(tlv) + tlv.len;
} // we reached the padding or HMAC TLVs, or the end of the SRH
if (*pad_off == 0)
*pad_off = cur_off;
if (*tlv_off == -1)
*tlv_off = cur_off;
else if (!offset_valid)
return -EINVAL;
return 0;
}
static __attribute__((always_inline))
int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
struct sr6_tlv_t *itlv, uint8_t tlv_size)
{
uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
uint8_t len_remaining, new_pad;
uint32_t pad_off = 0;
uint32_t pad_size = 0;
uint32_t partial_srh_len;
int err;
if (tlv_off != -1)
tlv_off += srh_off;
if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
return -EINVAL;
err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
if (err)
return err;
err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
if (err)
return err;
err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
if (err)
return err;
// the following can't be moved inside update_tlv_pad because the
// bpf verifier has some issues with it
pad_off += sizeof(*itlv) + itlv->len;
partial_srh_len = pad_off - srh_off;
len_remaining = partial_srh_len % 8;
new_pad = 8 - len_remaining;
if (new_pad == 1) // cannot pad for 1 byte only
new_pad = 9;
else if (new_pad == 8)
new_pad = 0;
return update_tlv_pad(skb, new_pad, pad_size, pad_off);
}
// Add an Egress TLV fc00::4, add the flag A,
// and apply End.X action to fc42::1
SEC("lwt_seg6local")
int __add_egr_x(struct __sk_buff *skb)
{
unsigned long long hi = 0xfc42000000000000;
unsigned long long lo = 0x1;
struct ip6_srh_t *srh = get_srh(skb);
uint8_t new_flags = SR6_FLAG_ALERT;
struct ip6_addr_t addr;
int err, offset;
if (srh == NULL)
return BPF_DROP;
uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
(struct sr6_tlv_t *)&tlv, 20);
if (err)
return BPF_DROP;
offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
err = bpf_lwt_seg6_store_bytes(skb, offset,
(void *)&new_flags, sizeof(new_flags));
if (err)
return BPF_DROP;
addr.lo = htonll(lo);
addr.hi = htonll(hi);
err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
(void *)&addr, sizeof(addr));
if (err)
return BPF_DROP;
return BPF_REDIRECT;
}
char __license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <stdint.h>
#include <string.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
#define TCP_MEM_LOOPS 28 /* because 30 doesn't fit into 512 bytes of stack */
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
unsigned char i;
char name[64];
int ret;
memset(name, 0, sizeof(name));
ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
#pragma clang loop unroll(disable)
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
return 1;
}
SEC("cgroup/sysctl")
int sysctl_tcp_mem(struct bpf_sysctl *ctx)
{
unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
char value[MAX_VALUE_STR_LEN];
unsigned char i, off = 0;
int ret;
if (ctx->write)
return 0;
if (!is_tcp_mem(ctx))
return 0;
ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
#pragma clang loop unroll(disable)
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
return 0;
off += ret & MAX_ULONG_STR_LEN;
}
return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <stdint.h>
#include <string.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
#define TCP_MEM_LOOPS 20 /* because 30 doesn't fit into 512 bytes of stack */
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
{
volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
unsigned char i;
char name[64];
int ret;
memset(name, 0, sizeof(name));
ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
#pragma clang loop unroll(disable)
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
return 1;
}
SEC("cgroup/sysctl")
int sysctl_tcp_mem(struct bpf_sysctl *ctx)
{
unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
char value[MAX_VALUE_STR_LEN];
unsigned char i, off = 0;
int ret;
if (ctx->write)
return 0;
if (!is_tcp_mem(ctx))
return 0;
ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
#pragma clang loop unroll(disable)
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
return 0;
off += ret & MAX_ULONG_STR_LEN;
}
return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <stddef.h>
#include <string.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include <sys/socket.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
#include "test_iptunnel_common.h"
int _version SEC("version") = 1;
struct bpf_map_def SEC("maps") rxcnt = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.max_entries = 256,
};
struct bpf_map_def SEC("maps") vip2tnl = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(struct vip),
.value_size = sizeof(struct iptnl_info),
.max_entries = MAX_IPTNL_ENTRIES,
};
static __always_inline void count_tx(__u32 protocol)
{
__u64 *rxcnt_count;
rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
if (rxcnt_count)
*rxcnt_count += 1;
}
static __always_inline int get_dport(void *trans_data, void *data_end,
__u8 protocol)
{
struct tcphdr *th;
struct udphdr *uh;
switch (protocol) {
case IPPROTO_TCP:
th = (struct tcphdr *)trans_data;
if (th + 1 > data_end)
return -1;
return th->dest;
case IPPROTO_UDP:
uh = (struct udphdr *)trans_data;
if (uh + 1 > data_end)
return -1;
return uh->dest;
default:
return 0;
}
}
static __always_inline void set_ethhdr(struct ethhdr *new_eth,
const struct ethhdr *old_eth,
const struct iptnl_info *tnl,
__be16 h_proto)
{
memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
new_eth->h_proto = h_proto;
}
static __always_inline int handle_ipv4(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
struct iptnl_info *tnl;
struct ethhdr *new_eth;
struct ethhdr *old_eth;
struct iphdr *iph = data + sizeof(struct ethhdr);
__u16 *next_iph;
__u16 payload_len;
struct vip vip = {};
int dport;
__u32 csum = 0;
int i;
if (iph + 1 > data_end)
return XDP_DROP;
dport = get_dport(iph + 1, data_end, iph->protocol);
if (dport == -1)
return XDP_DROP;
vip.protocol = iph->protocol;
vip.family = AF_INET;
vip.daddr.v4 = iph->daddr;
vip.dport = dport;
payload_len = bpf_ntohs(iph->tot_len);
tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
/* It only does v4-in-v4 */
if (!tnl || tnl->family != AF_INET)
return XDP_PASS;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
return XDP_DROP;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
iph = data + sizeof(*new_eth);
old_eth = data + sizeof(*iph);
if (new_eth + 1 > data_end ||
old_eth + 1 > data_end ||
iph + 1 > data_end)
return XDP_DROP;
set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
iph->version = 4;
iph->ihl = sizeof(*iph) >> 2;
iph->frag_off = 0;
iph->protocol = IPPROTO_IPIP;
iph->check = 0;
iph->tos = 0;
iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
iph->daddr = tnl->daddr.v4;
iph->saddr = tnl->saddr.v4;
iph->ttl = 8;
next_iph = (__u16 *)iph;
#pragma clang loop unroll(disable)
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
count_tx(vip.protocol);
return XDP_TX;
}
static __always_inline int handle_ipv6(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
struct iptnl_info *tnl;
struct ethhdr *new_eth;
struct ethhdr *old_eth;
struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
__u16 payload_len;
struct vip vip = {};
int dport;
if (ip6h + 1 > data_end)
return XDP_DROP;
dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
if (dport == -1)
return XDP_DROP;
vip.protocol = ip6h->nexthdr;
vip.family = AF_INET6;
memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
vip.dport = dport;
payload_len = ip6h->payload_len;
tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
/* It only does v6-in-v6 */
if (!tnl || tnl->family != AF_INET6)
return XDP_PASS;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
return XDP_DROP;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
ip6h = data + sizeof(*new_eth);
old_eth = data + sizeof(*ip6h);
if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
ip6h + 1 > data_end)
return XDP_DROP;
set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
ip6h->version = 6;
ip6h->priority = 0;
memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
ip6h->nexthdr = IPPROTO_IPV6;
ip6h->hop_limit = 8;
memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
count_tx(vip.protocol);
return XDP_TX;
}
SEC("xdp_tx_iptunnel")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
struct ethhdr *eth = data;
__u16 h_proto;
if (eth + 1 > data_end)
return XDP_DROP;
h_proto = eth->h_proto;
if (h_proto == bpf_htons(ETH_P_IP))
return handle_ipv4(xdp);
else if (h_proto == bpf_htons(ETH_P_IPV6))
return handle_ipv6(xdp);
else
return XDP_DROP;
}
char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment