Commit e9b4e606 authored by Jiri Olsa's avatar Jiri Olsa Committed by Alexei Starovoitov

bpf: Allow to resolve bpf trampoline and dispatcher in unwind

When unwinding the stack we need to identify each address
to successfully continue. Adding latch tree to keep trampolines
for quick lookup during the unwind.

The patch uses first 48 bytes for latch tree node, leaving 4048
bytes from the rest of the page for trampoline or dispatcher
generated code.

It's still enough not to affect trampoline and dispatcher progs
maximum counts.
Signed-off-by: default avatarJiri Olsa <jolsa@kernel.org>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200123161508.915203-3-jolsa@kernel.org
parent 84ad7a7a
...@@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); ...@@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
void bpf_trampoline_put(struct bpf_trampoline *tr); void bpf_trampoline_put(struct bpf_trampoline *tr);
void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_INIT(name) { \ #define BPF_DISPATCHER_INIT(name) { \
.mutex = __MUTEX_INITIALIZER(name.mutex), \ .mutex = __MUTEX_INITIALIZER(name.mutex), \
.func = &name##func, \ .func = &name##func, \
...@@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void); ...@@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_PTR(name) (&name) #define BPF_DISPATCHER_PTR(name) (&name)
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to); struct bpf_prog *to);
struct bpf_image {
struct latch_tree_node tnode;
unsigned char data[];
};
#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
bool is_bpf_image_address(unsigned long address);
void *bpf_image_alloc(void);
#else #else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{ {
...@@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} ...@@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
struct bpf_prog *from, struct bpf_prog *from,
struct bpf_prog *to) {} struct bpf_prog *to) {}
static inline bool is_bpf_image_address(unsigned long address)
{
return false;
}
#endif #endif
struct bpf_func_info_aux { struct bpf_func_info_aux {
......
...@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) ...@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
noff = 0; noff = 0;
} else { } else {
old = d->image + d->image_off; old = d->image + d->image_off;
noff = d->image_off ^ (PAGE_SIZE / 2); noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
} }
new = d->num_progs ? d->image + noff : NULL; new = d->num_progs ? d->image + noff : NULL;
...@@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, ...@@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
mutex_lock(&d->mutex); mutex_lock(&d->mutex);
if (!d->image) { if (!d->image) {
d->image = bpf_jit_alloc_exec_page(); d->image = bpf_image_alloc();
if (!d->image) if (!d->image)
goto out; goto out;
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/rbtree_latch.h>
/* dummy _ops. The verifier will operate on target program's ops. */ /* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = { const struct bpf_verifier_ops bpf_extension_verifier_ops = {
...@@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = { ...@@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
static struct latch_tree_root image_tree __cacheline_aligned;
/* serializes access to trampoline_table */ /* serializes access to trampoline_table and image_tree */
static DEFINE_MUTEX(trampoline_mutex); static DEFINE_MUTEX(trampoline_mutex);
void *bpf_jit_alloc_exec_page(void) static void *bpf_jit_alloc_exec_page(void)
{ {
void *image; void *image;
...@@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void) ...@@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void)
return image; return image;
} }
static __always_inline bool image_tree_less(struct latch_tree_node *a,
struct latch_tree_node *b)
{
struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
return ia < ib;
}
static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
{
void *image = container_of(n, struct bpf_image, tnode);
if (addr < image)
return -1;
if (addr >= image + PAGE_SIZE)
return 1;
return 0;
}
static const struct latch_tree_ops image_tree_ops = {
.less = image_tree_less,
.comp = image_tree_comp,
};
static void *__bpf_image_alloc(bool lock)
{
struct bpf_image *image;
image = bpf_jit_alloc_exec_page();
if (!image)
return NULL;
if (lock)
mutex_lock(&trampoline_mutex);
latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
if (lock)
mutex_unlock(&trampoline_mutex);
return image->data;
}
void *bpf_image_alloc(void)
{
return __bpf_image_alloc(true);
}
bool is_bpf_image_address(unsigned long addr)
{
bool ret;
rcu_read_lock();
ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
rcu_read_unlock();
return ret;
}
struct bpf_trampoline *bpf_trampoline_lookup(u64 key) struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{ {
struct bpf_trampoline *tr; struct bpf_trampoline *tr;
...@@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key) ...@@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
goto out; goto out;
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
image = bpf_jit_alloc_exec_page(); image = __bpf_image_alloc(false);
if (!image) { if (!image) {
kfree(tr); kfree(tr);
tr = NULL; tr = NULL;
...@@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) ...@@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
} }
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into PAGE_SIZE / 2 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/ */
#define BPF_MAX_TRAMP_PROGS 40 #define BPF_MAX_TRAMP_PROGS 40
static int bpf_trampoline_update(struct bpf_trampoline *tr) static int bpf_trampoline_update(struct bpf_trampoline *tr)
{ {
void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS]; struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY]; int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT]; int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
...@@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) ...@@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
*/ */
synchronize_rcu_tasks(); synchronize_rcu_tasks();
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
&tr->func.model, flags, &tr->func.model, flags,
fentry, fentry_cnt, fentry, fentry_cnt,
fexit, fexit_cnt, fexit, fexit_cnt,
...@@ -284,6 +344,8 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog) ...@@ -284,6 +344,8 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
void bpf_trampoline_put(struct bpf_trampoline *tr) void bpf_trampoline_put(struct bpf_trampoline *tr)
{ {
struct bpf_image *image;
if (!tr) if (!tr)
return; return;
mutex_lock(&trampoline_mutex); mutex_lock(&trampoline_mutex);
...@@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) ...@@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
goto out; goto out;
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
goto out; goto out;
image = container_of(tr->image, struct bpf_image, data);
latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
/* wait for tasks to get out of trampoline before freeing it */ /* wait for tasks to get out of trampoline before freeing it */
synchronize_rcu_tasks(); synchronize_rcu_tasks();
bpf_jit_free_exec(tr->image); bpf_jit_free_exec(image);
hlist_del(&tr->hlist); hlist_del(&tr->hlist);
kfree(tr); kfree(tr);
out: out:
......
...@@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr) ...@@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr)
* triggers a stack trace, or a WARN() that happens during * triggers a stack trace, or a WARN() that happens during
* coming back from idle, or cpu on or offlining. * coming back from idle, or cpu on or offlining.
* *
* is_module_text_address() as well as the kprobe slots * is_module_text_address() as well as the kprobe slots,
* and is_bpf_text_address() require RCU to be watching. * is_bpf_text_address() and is_bpf_image_address require
* RCU to be watching.
*/ */
no_rcu = !rcu_is_watching(); no_rcu = !rcu_is_watching();
...@@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr) ...@@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr)
goto out; goto out;
if (is_bpf_text_address(addr)) if (is_bpf_text_address(addr))
goto out; goto out;
if (is_bpf_image_address(addr))
goto out;
ret = 0; ret = 0;
out: out:
if (no_rcu) if (no_rcu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment