Commit c85e5594 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2023-12-06

We've added 4 non-merge commits during the last 6 day(s) which contain
a total of 7 files changed, 185 insertions(+), 55 deletions(-).

The main changes are:

1) Fix race found by syzkaller on prog_array_map_poke_run when
   a BPF program's kallsym symbols were still missing, from Jiri Olsa.

2) Fix BPF verifier's branch offset comparison for BPF_JMP32 | BPF_JA,
   from Yonghong Song.

3) Fix xsk's poll handling to only set mask on bound xsk sockets,
   from Yewon Choi.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: Add test for early update in prog_array_map_poke_run
  bpf: Fix prog_array_map_poke_run map poke update
  xsk: Skip polling event check for unbound socket
  bpf: Fix a verifier bug due to incorrect branch offset comparison with cpu=v4
====================

Link: https://lore.kernel.org/r/20231206220528.12093-1-daniel@iogearbox.netSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 0ad722bd ffed24ef
...@@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp ...@@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp
#endif #endif
WARN(1, "verification of programs using bpf_throw should have failed\n"); WARN(1, "verification of programs using bpf_throw should have failed\n");
} }
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old)
{
u8 *old_addr, *new_addr, *old_bypass_addr;
int ret;
old_bypass_addr = old ? NULL : poke->bypass_addr;
old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
/*
* On program loading or teardown, the program's kallsym entry
* might not be in place, so we use __bpf_arch_text_poke to skip
* the kallsyms check.
*/
if (new) {
ret = __bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
old_addr, new_addr);
BUG_ON(ret < 0);
if (!old) {
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
poke->bypass_addr,
NULL);
BUG_ON(ret < 0);
}
} else {
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
old_bypass_addr,
poke->bypass_addr);
BUG_ON(ret < 0);
/* let other CPUs finish the execution of program
* so that it will not possible to expose them
* to invalid nop, stack unwind, nop state
*/
if (!ret)
synchronize_rcu();
ret = __bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
old_addr, NULL);
BUG_ON(ret < 0);
}
}
...@@ -3175,6 +3175,9 @@ enum bpf_text_poke_type { ...@@ -3175,6 +3175,9 @@ enum bpf_text_poke_type {
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2); void *addr1, void *addr2);
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old);
void *bpf_arch_text_copy(void *dst, void *src, size_t len); void *bpf_arch_text_copy(void *dst, void *src, size_t len);
int bpf_arch_text_invalidate(void *dst, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len);
......
...@@ -1012,11 +1012,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map, ...@@ -1012,11 +1012,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map,
mutex_unlock(&aux->poke_mutex); mutex_unlock(&aux->poke_mutex);
} }
void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old)
{
WARN_ON_ONCE(1);
}
static void prog_array_map_poke_run(struct bpf_map *map, u32 key, static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
struct bpf_prog *old, struct bpf_prog *old,
struct bpf_prog *new) struct bpf_prog *new)
{ {
u8 *old_addr, *new_addr, *old_bypass_addr;
struct prog_poke_elem *elem; struct prog_poke_elem *elem;
struct bpf_array_aux *aux; struct bpf_array_aux *aux;
...@@ -1025,7 +1030,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, ...@@ -1025,7 +1030,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
list_for_each_entry(elem, &aux->poke_progs, list) { list_for_each_entry(elem, &aux->poke_progs, list) {
struct bpf_jit_poke_descriptor *poke; struct bpf_jit_poke_descriptor *poke;
int i, ret; int i;
for (i = 0; i < elem->aux->size_poke_tab; i++) { for (i = 0; i < elem->aux->size_poke_tab; i++) {
poke = &elem->aux->poke_tab[i]; poke = &elem->aux->poke_tab[i];
...@@ -1044,21 +1049,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, ...@@ -1044,21 +1049,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
* activated, so tail call updates can arrive from here * activated, so tail call updates can arrive from here
* while JIT is still finishing its final fixup for * while JIT is still finishing its final fixup for
* non-activated poke entries. * non-activated poke entries.
* 3) On program teardown, the program's kallsym entry gets * 3) Also programs reaching refcount of zero while patching
* removed out of RCU callback, but we can only untrack
* from sleepable context, therefore bpf_arch_text_poke()
* might not see that this is in BPF text section and
* bails out with -EINVAL. As these are unreachable since
* RCU grace period already passed, we simply skip them.
* 4) Also programs reaching refcount of zero while patching
* is in progress is okay since we're protected under * is in progress is okay since we're protected under
* poke_mutex and untrack the programs before the JIT * poke_mutex and untrack the programs before the JIT
* buffer is freed. When we're still in the middle of * buffer is freed.
* patching and suddenly kallsyms entry of the program
* gets evicted, we just skip the rest which is fine due
* to point 3).
* 5) Any other error happening below from bpf_arch_text_poke()
* is a unexpected bug.
*/ */
if (!READ_ONCE(poke->tailcall_target_stable)) if (!READ_ONCE(poke->tailcall_target_stable))
continue; continue;
...@@ -1068,39 +1062,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, ...@@ -1068,39 +1062,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
poke->tail_call.key != key) poke->tail_call.key != key)
continue; continue;
old_bypass_addr = old ? NULL : poke->bypass_addr; bpf_arch_poke_desc_update(poke, new, old);
old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
if (new) {
ret = bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
old_addr, new_addr);
BUG_ON(ret < 0 && ret != -EINVAL);
if (!old) {
ret = bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
poke->bypass_addr,
NULL);
BUG_ON(ret < 0 && ret != -EINVAL);
}
} else {
ret = bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
old_bypass_addr,
poke->bypass_addr);
BUG_ON(ret < 0 && ret != -EINVAL);
/* let other CPUs finish the execution of program
* so that it will not possible to expose them
* to invalid nop, stack unwind, nop state
*/
if (!ret)
synchronize_rcu();
ret = bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
old_addr, NULL);
BUG_ON(ret < 0 && ret != -EINVAL);
}
} }
} }
} }
......
...@@ -371,14 +371,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, ...@@ -371,14 +371,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
s32 end_new, s32 curr, const bool probe_pass) s32 end_new, s32 curr, const bool probe_pass)
{ {
const s32 off_min = S16_MIN, off_max = S16_MAX; s64 off_min, off_max, off;
s32 delta = end_new - end_old; s32 delta = end_new - end_old;
s32 off;
if (insn->code == (BPF_JMP32 | BPF_JA)) if (insn->code == (BPF_JMP32 | BPF_JA)) {
off = insn->imm; off = insn->imm;
else off_min = S32_MIN;
off_max = S32_MAX;
} else {
off = insn->off; off = insn->off;
off_min = S16_MIN;
off_max = S16_MAX;
}
if (curr < pos && curr + off + 1 >= end_old) if (curr < pos && curr + off + 1 >= end_old)
off += delta; off += delta;
......
...@@ -947,7 +947,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, ...@@ -947,7 +947,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
rcu_read_lock(); rcu_read_lock();
if (xsk_check_common(xs)) if (xsk_check_common(xs))
goto skip_tx; goto out;
pool = xs->pool; pool = xs->pool;
...@@ -959,12 +959,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, ...@@ -959,12 +959,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
xsk_generic_xmit(sk); xsk_generic_xmit(sk);
} }
skip_tx:
if (xs->rx && !xskq_prod_is_empty(xs->rx)) if (xs->rx && !xskq_prod_is_empty(xs->rx))
mask |= EPOLLIN | EPOLLRDNORM; mask |= EPOLLIN | EPOLLRDNORM;
if (xs->tx && xsk_tx_writeable(xs)) if (xs->tx && xsk_tx_writeable(xs))
mask |= EPOLLOUT | EPOLLWRNORM; mask |= EPOLLOUT | EPOLLWRNORM;
out:
rcu_read_unlock(); rcu_read_unlock();
return mask; return mask;
} }
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <unistd.h>
#include <test_progs.h> #include <test_progs.h>
#include <network_helpers.h> #include <network_helpers.h>
#include "tailcall_poke.skel.h"
/* test_tailcall_1 checks basic functionality by patching multiple locations /* test_tailcall_1 checks basic functionality by patching multiple locations
* in a single program for a single tail call slot with nop->jmp, jmp->nop * in a single program for a single tail call slot with nop->jmp, jmp->nop
...@@ -1105,6 +1108,85 @@ static void test_tailcall_bpf2bpf_fentry_entry(void) ...@@ -1105,6 +1108,85 @@ static void test_tailcall_bpf2bpf_fentry_entry(void)
bpf_object__close(tgt_obj); bpf_object__close(tgt_obj);
} }
#define JMP_TABLE "/sys/fs/bpf/jmp_table"
static int poke_thread_exit;
static void *poke_update(void *arg)
{
__u32 zero = 0, prog1_fd, prog2_fd, map_fd;
struct tailcall_poke *call = arg;
map_fd = bpf_map__fd(call->maps.jmp_table);
prog1_fd = bpf_program__fd(call->progs.call1);
prog2_fd = bpf_program__fd(call->progs.call2);
while (!poke_thread_exit) {
bpf_map_update_elem(map_fd, &zero, &prog1_fd, BPF_ANY);
bpf_map_update_elem(map_fd, &zero, &prog2_fd, BPF_ANY);
}
return NULL;
}
/*
* We are trying to hit prog array update during another program load
* that shares the same prog array map.
*
* For that we share the jmp_table map between two skeleton instances
* by pinning the jmp_table to same path. Then first skeleton instance
* periodically updates jmp_table in 'poke update' thread while we load
* the second skeleton instance in the main thread.
*/
static void test_tailcall_poke(void)
{
struct tailcall_poke *call, *test;
int err, cnt = 10;
pthread_t thread;
unlink(JMP_TABLE);
call = tailcall_poke__open_and_load();
if (!ASSERT_OK_PTR(call, "tailcall_poke__open"))
return;
err = bpf_map__pin(call->maps.jmp_table, JMP_TABLE);
if (!ASSERT_OK(err, "bpf_map__pin"))
goto out;
err = pthread_create(&thread, NULL, poke_update, call);
if (!ASSERT_OK(err, "new toggler"))
goto out;
while (cnt--) {
test = tailcall_poke__open();
if (!ASSERT_OK_PTR(test, "tailcall_poke__open"))
break;
err = bpf_map__set_pin_path(test->maps.jmp_table, JMP_TABLE);
if (!ASSERT_OK(err, "bpf_map__pin")) {
tailcall_poke__destroy(test);
break;
}
bpf_program__set_autoload(test->progs.test, true);
bpf_program__set_autoload(test->progs.call1, false);
bpf_program__set_autoload(test->progs.call2, false);
err = tailcall_poke__load(test);
tailcall_poke__destroy(test);
if (!ASSERT_OK(err, "tailcall_poke__load"))
break;
}
poke_thread_exit = 1;
ASSERT_OK(pthread_join(thread, NULL), "pthread_join");
out:
bpf_map__unpin(call->maps.jmp_table, JMP_TABLE);
tailcall_poke__destroy(call);
}
void test_tailcalls(void) void test_tailcalls(void)
{ {
if (test__start_subtest("tailcall_1")) if (test__start_subtest("tailcall_1"))
...@@ -1139,4 +1221,6 @@ void test_tailcalls(void) ...@@ -1139,4 +1221,6 @@ void test_tailcalls(void)
test_tailcall_bpf2bpf_fentry_fexit(); test_tailcall_bpf2bpf_fentry_fexit();
if (test__start_subtest("tailcall_bpf2bpf_fentry_entry")) if (test__start_subtest("tailcall_bpf2bpf_fentry_entry"))
test_tailcall_bpf2bpf_fentry_entry(); test_tailcall_bpf2bpf_fentry_entry();
if (test__start_subtest("tailcall_poke"))
test_tailcall_poke();
} }
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__uint(max_entries, 1);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
SEC("?fentry/bpf_fentry_test1")
int BPF_PROG(test, int a)
{
bpf_tail_call_static(ctx, &jmp_table, 0);
return 0;
}
SEC("fentry/bpf_fentry_test1")
int BPF_PROG(call1, int a)
{
return 0;
}
SEC("fentry/bpf_fentry_test1")
int BPF_PROG(call2, int a)
{
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment