Merge branch 'bpf-jmp-seq-limit'

Alexei Starovoitov says: ==================== Patch 1 - jmp sequence limit Patch 2 - improve existing tests Patch 3 - add pyperf-based realistic bpf program that takes advantage of higher limit and use it as a stress test v1->v2: fixed nit in patch 3. added Andrii's acks ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

Merge branch 'bpf-jmp-seq-limit'
Alexei Starovoitov says: ==================== Patch 1 - jmp sequence limit Patch 2 - improve existing tests Patch 3 - add pyperf-based realistic bpf program that takes advantage of higher limit and use it as a stress test v1->v2: fixed nit in patch 3. added Andrii's acks ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
29c677c8 · Daniel Borkmann · 9efc7794 · 7c944106 · 29c677c8 · 29c677c8
Commit 29c677c8 authored May 23, 2019 by Daniel Borkmann
7 changed files
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -176,7 +176,7 @@ struct bpf_verifier_stack_elem {
 	struct bpf_verifier_stack_elem *next;
 };

-#define BPF_COMPLEXITY_LIMIT_STACK	1024
+#define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
 #define BPF_COMPLEXITY_LIMIT_STATES	64

 #define BPF_MAP_PTR_UNPRIV	1UL
@@ -782,8 +782,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 	if (err)
 		goto err;
 	elem->st.speculative |= speculative;
-	if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
-		verbose(env, "BPF program is too complex\n");
+	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
+		verbose(env, "The sequence of %d jumps is too complex.\n",
+			env->stack_size);
 		goto err;
 	}
 	return &elem->st;

--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -12,7 +12,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
 	return vfprintf(stderr, "%s", args);
 }

-static int check_load(const char *file)
+static int check_load(const char *file, enum bpf_prog_type type)
 {
 	struct bpf_prog_load_attr attr;
 	struct bpf_object *obj = NULL;
@@ -20,7 +20,7 @@ static int check_load(const char *file)

 	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
 	attr.file = file;
-	attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+	attr.prog_type = type;
 	attr.log_level = 4;
 	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
 	bpf_object__close(obj);
@@ -31,19 +31,24 @@ static int check_load(const char *file)

 void test_bpf_verif_scale(void)
 {
-	const char *file1 = "./test_verif_scale1.o";
-	const char *file2 = "./test_verif_scale2.o";
-	const char *file3 = "./test_verif_scale3.o";
-	int err;
+	const char *scale[] = {
+		"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o"
+	};
+	const char *pyperf[] = {
+		"./pyperf50.o",	"./pyperf100.o", "./pyperf180.o"
+	};
+	int err, i;

 	if (verifier_stats)
 		libbpf_set_print(libbpf_debug_print);

-	err = check_load(file1);
-	err |= check_load(file2);
-	err |= check_load(file3);
-	if (!err)
-		printf("test_verif_scale:OK\n");
-	else
-		printf("test_verif_scale:FAIL\n");
+	for (i = 0; i < ARRAY_SIZE(scale); i++) {
+		err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS);
+		printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK");
+	}
+
+	for (i = 0; i < ARRAY_SIZE(pyperf); i++) {
+		err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
+		printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK");
+	}
 }
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define FUNCTION_NAME_LEN 64
+#define FILE_NAME_LEN 128
+#define TASK_COMM_LEN 16
+
+typedef struct {
+	int PyThreadState_frame;
+	int PyThreadState_thread;
+	int PyFrameObject_back;
+	int PyFrameObject_code;
+	int PyFrameObject_lineno;
+	int PyCodeObject_filename;
+	int PyCodeObject_name;
+	int String_data;
+	int String_size;
+} OffsetConfig;
+
+typedef struct {
+	uintptr_t current_state_addr;
+	uintptr_t tls_key_addr;
+	OffsetConfig offsets;
+	bool use_tls;
+} PidData;
+
+typedef struct {
+	uint32_t success;
+} Stats;
+
+typedef struct {
+	char name[FUNCTION_NAME_LEN];
+	char file[FILE_NAME_LEN];
+} Symbol;
+
+typedef struct {
+	uint32_t pid;
+	uint32_t tid;
+	char comm[TASK_COMM_LEN];
+	int32_t kernel_stack_id;
+	int32_t user_stack_id;
+	bool thread_current;
+	bool pthread_match;
+	bool stack_complete;
+	int16_t stack_len;
+	int32_t stack[STACK_MAX_LEN];
+
+	int has_meta;
+	int metadata;
+	char dummy_safeguard;
+} Event;
+
+
+struct bpf_elf_map {
+	__u32 type;
+	__u32 size_key;
+	__u32 size_value;
+	__u32 max_elem;
+	__u32 flags;
+};
+
+typedef int pid_t;
+
+typedef struct {
+	void* f_back; // PyFrameObject.f_back, previous frame
+	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
+	void* co_filename; // PyCodeObject.co_filename
+	void* co_name; // PyCodeObject.co_name
+} FrameData;
+
+static inline __attribute__((__always_inline__)) void*
+get_thread_state(void* tls_base, PidData* pidData)
+{
+	void* thread_state;
+	int key;
+
+	bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
+	bpf_probe_read(&thread_state, sizeof(thread_state),
+		       tls_base + 0x310 + key * 0x10 + 0x08);
+	return thread_state;
+}
+
+static inline __attribute__((__always_inline__)) bool
+get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol)
+{
+	// read data from PyFrameObject
+	bpf_probe_read(&frame->f_back,
+		       sizeof(frame->f_back),
+		       frame_ptr + pidData->offsets.PyFrameObject_back);
+	bpf_probe_read(&frame->f_code,
+		       sizeof(frame->f_code),
+		       frame_ptr + pidData->offsets.PyFrameObject_code);
+
+	// read data from PyCodeObject
+	if (!frame->f_code)
+		return false;
+	bpf_probe_read(&frame->co_filename,
+		       sizeof(frame->co_filename),
+		       frame->f_code + pidData->offsets.PyCodeObject_filename);
+	bpf_probe_read(&frame->co_name,
+		       sizeof(frame->co_name),
+		       frame->f_code + pidData->offsets.PyCodeObject_name);
+	// read actual names into symbol
+	if (frame->co_filename)
+		bpf_probe_read_str(&symbol->file,
+				   sizeof(symbol->file),
+				   frame->co_filename + pidData->offsets.String_data);
+	if (frame->co_name)
+		bpf_probe_read_str(&symbol->name,
+				   sizeof(symbol->name),
+				   frame->co_name + pidData->offsets.String_data);
+	return true;
+}
+
+struct bpf_elf_map SEC("maps") pidmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.size_key = sizeof(int),
+	.size_value = sizeof(PidData),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") eventmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.size_key = sizeof(int),
+	.size_value = sizeof(Event),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") symbolmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.size_key = sizeof(Symbol),
+	.size_value = sizeof(int),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") statsmap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.size_key = sizeof(Stats),
+	.size_value = sizeof(int),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") perfmap = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.size_key = sizeof(int),
+	.size_value = sizeof(int),
+	.max_elem = 32,
+};
+
+struct bpf_elf_map SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.size_key = sizeof(int),
+	.size_value = sizeof(long long) * 127,
+	.max_elem = 1000,
+};
+
+static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx)
+{
+	uint64_t pid_tgid = bpf_get_current_pid_tgid();
+	pid_t pid = (pid_t)(pid_tgid >> 32);
+	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
+	if (!pidData)
+		return 0;
+
+	int zero = 0;
+	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
+	if (!event)
+		return 0;
+
+	event->pid = pid;
+
+	event->tid = (pid_t)pid_tgid;
+	bpf_get_current_comm(&event->comm, sizeof(event->comm));
+
+	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
+
+	void* thread_state_current = (void*)0;
+	bpf_probe_read(&thread_state_current,
+		       sizeof(thread_state_current),
+		       (void*)(long)pidData->current_state_addr);
+
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+	void* tls_base = (void*)task;
+
+	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
+		: thread_state_current;
+	event->thread_current = thread_state == thread_state_current;
+
+	if (pidData->use_tls) {
+		uint64_t pthread_created;
+		uint64_t pthread_self;
+		bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
+
+		bpf_probe_read(&pthread_created,
+			       sizeof(pthread_created),
+			       thread_state + pidData->offsets.PyThreadState_thread);
+		event->pthread_match = pthread_created == pthread_self;
+	} else {
+		event->pthread_match = 1;
+	}
+
+	if (event->pthread_match || !pidData->use_tls) {
+		void* frame_ptr;
+		FrameData frame;
+		Symbol sym = {};
+		int cur_cpu = bpf_get_smp_processor_id();
+
+		bpf_probe_read(&frame_ptr,
+			       sizeof(frame_ptr),
+			       thread_state + pidData->offsets.PyThreadState_frame);
+
+		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
+		if (symbol_counter == NULL)
+			return 0;
+#pragma unroll
+		/* Unwind python stack */
+		for (int i = 0; i < STACK_MAX_LEN; ++i) {
+			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
+				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
+				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
+				if (!symbol_id) {
+					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
+					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
+					if (!symbol_id)
+						return 0;
+				}
+				if (*symbol_id == new_symbol_id)
+					(*symbol_counter)++;
+				event->stack[i] = *symbol_id;
+				event->stack_len = i + 1;
+				frame_ptr = frame.f_back;
+			}
+		}
+		event->stack_complete = frame_ptr == NULL;
+	} else {
+		event->stack_complete = 1;
+	}
+
+	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
+	if (stats)
+		stats->success++;
+
+	event->has_meta = 0;
+	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
+	return 0;
+}
+
+SEC("raw_tracepoint/kfree_skb")
+int on_event(struct pt_regs* ctx)
+{
+	int i, ret = 0;
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	return ret;
+}
+
+char _license[] SEC("license") = "GPL";
--- a/tools/testing/selftests/bpf/progs/pyperf100.c
+++ b/tools/testing/selftests/bpf/progs/pyperf100.c
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 100
+#include "pyperf.h"
--- a/tools/testing/selftests/bpf/progs/pyperf180.c
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 180
+#include "pyperf.h"
--- a/tools/testing/selftests/bpf/progs/pyperf50.c
+++ b/tools/testing/selftests/bpf/progs/pyperf50.c
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 50
+#include "pyperf.h"
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -210,33 +210,35 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 	self->retval = (uint32_t)res;
 }

-/* test the sequence of 1k jumps */
+#define MAX_JMP_SEQ 8192
+
+/* test the sequence of 8k jumps */
 static void bpf_fill_scale1(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
 	int i = 0, k = 0;

 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
-	/* test to check that the sequence of 1024 jumps is acceptable */
-	while (k++ < 1024) {
+	/* test to check that the long sequence of jumps is acceptable */
+	while (k++ < MAX_JMP_SEQ) {
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_get_prandom_u32);
-		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % 64 + 1));
 	}
-	/* every jump adds 1024 steps to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+	/* every jump adds 1 step to insn_processed, so to stay exactly
+	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
 	 */
-	while (i < MAX_TEST_INSNS - 1025)
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
 		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
 	self->retval = 42;
 }

-/* test the sequence of 1k jumps in inner most function (function depth 8)*/
+/* test the sequence of 8k jumps in inner most function (function depth 8)*/
 static void bpf_fill_scale2(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
@@ -248,19 +250,20 @@ static void bpf_fill_scale2(struct bpf_test *self)
 		insn[i++] = BPF_EXIT_INSN();
 	}
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
-	/* test to check that the sequence of 1024 jumps is acceptable */
-	while (k++ < 1024) {
+	/* test to check that the long sequence of jumps is acceptable */
+	k = 0;
+	while (k++ < MAX_JMP_SEQ) {
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_get_prandom_u32);
-		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % (64 - 4 * FUNC_NEST) + 1));
 	}
-	/* every jump adds 1024 steps to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+	/* every jump adds 1 step to insn_processed, so to stay exactly
+	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
 	 */
-	while (i < MAX_TEST_INSNS - 1025)
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
 		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;