Commit fdf45787 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf: Avoid unnecessary deadlock detection and failure in task storage'

Martin KaFai Lau says:

====================

From: Martin KaFai Lau <martin.lau@kernel.org>

The commit bc235cdb ("bpf: Prevent deadlock from recursive bpf_task_storage_[get|delete]")
added deadlock detection to avoid a tracing program from recurring
on the bpf_task_storage_{get,delete}() helpers.  These helpers acquire
a spin lock and it will lead to deadlock.

It is unnecessary for the bpf_lsm and bpf_iter programs which do
not recur.  The situation is the same as the existing
bpf_pid_task_storage_{lookup,delete}_elem() which are
used in the syscall and they also do not have deadlock detection.

This set is to add new bpf_task_storage_{get,delete}() helper proto
without the deadlock detection.  The set also removes the prog->active
check from the bpf_lsm and bpf_iter program.  Please see the individual
patch for details.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f3c51fe0 387b5321
......@@ -1649,13 +1649,8 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
struct bpf_prog *p = l->link.prog;
int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
if (p->aux->sleepable) {
enter_prog = (u64)__bpf_prog_enter_sleepable;
exit_prog = (u64)__bpf_prog_exit_sleepable;
} else {
enter_prog = (u64)__bpf_prog_enter;
exit_prog = (u64)__bpf_prog_exit;
}
enter_prog = (u64)bpf_trampoline_enter(p);
exit_prog = (u64)bpf_trampoline_exit(p);
if (l->cookie == 0) {
/* if cookie is zero, one instruction is enough to store it */
......
......@@ -1894,10 +1894,6 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_link *l, int stack_size,
int run_ctx_off, bool save_ret)
{
void (*exit)(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_exit;
u64 (*enter)(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_enter;
u8 *prog = *pprog;
u8 *jmp_insn;
int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
......@@ -1916,23 +1912,12 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
*/
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off);
if (p->aux->sleepable) {
enter = __bpf_prog_enter_sleepable;
exit = __bpf_prog_exit_sleepable;
} else if (p->type == BPF_PROG_TYPE_STRUCT_OPS) {
enter = __bpf_prog_enter_struct_ops;
exit = __bpf_prog_exit_struct_ops;
} else if (p->expected_attach_type == BPF_LSM_CGROUP) {
enter = __bpf_prog_enter_lsm_cgroup;
exit = __bpf_prog_exit_lsm_cgroup;
}
/* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
/* arg2: lea rsi, [rbp - ctx_cookie_off] */
EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
if (emit_call(&prog, enter, prog))
if (emit_call(&prog, bpf_trampoline_enter(p), prog))
return -EINVAL;
/* remember prog start time returned by __bpf_prog_enter */
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
......@@ -1977,7 +1962,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
/* arg3: lea rdx, [rbp - run_ctx_off] */
EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
if (emit_call(&prog, exit, prog))
if (emit_call(&prog, bpf_trampoline_exit(p), prog))
return -EINVAL;
*pprog = prog;
......
......@@ -854,22 +854,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
void *orig_call);
/* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx);
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx);
u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx);
u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx);
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
typedef u64 (*bpf_trampoline_enter_t)(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx);
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
struct bpf_ksym {
unsigned long start;
......@@ -2523,7 +2519,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
extern const struct bpf_func_proto bpf_task_storage_get_recur_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
......
......@@ -642,10 +642,23 @@ static inline u32 type_flag(u32 type)
}
/* only use after check_attach_btf_id() */
static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog)
{
return prog->type == BPF_PROG_TYPE_EXT ?
prog->aux->dst_prog->type : prog->type;
}
static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
{
switch (resolve_prog_type(prog)) {
case BPF_PROG_TYPE_TRACING:
return prog->expected_attach_type != BPF_TRACE_ITER;
case BPF_PROG_TYPE_STRUCT_OPS:
case BPF_PROG_TYPE_LSM:
return false;
default:
return true;
}
}
#endif /* _LINUX_BPF_VERIFIER_H */
......@@ -242,6 +242,7 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
__bpf_selem_unlink_storage(selem, use_trace_rcu);
}
/* If cacheit_lockit is false, this lookup function is lockless */
struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
......
......@@ -184,7 +184,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
return err;
}
static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
bool nobusy)
{
struct bpf_local_storage_data *sdata;
......@@ -192,6 +193,9 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
if (!sdata)
return -ENOENT;
if (!nobusy)
return -EBUSY;
bpf_selem_unlink(SELEM(sdata), true);
return 0;
......@@ -220,63 +224,108 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
}
bpf_task_storage_lock();
err = task_storage_delete(task, map);
err = task_storage_delete(task, map, true);
bpf_task_storage_unlock();
out:
put_pid(pid);
return err;
}
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
task, void *, value, u64, flags, gfp_t, gfp_flags)
/* Called by bpf_task_storage_get*() helpers */
static void *__bpf_task_storage_get(struct bpf_map *map,
struct task_struct *task, void *value,
u64 flags, gfp_t gfp_flags, bool nobusy)
{
struct bpf_local_storage_data *sdata;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
return (unsigned long)NULL;
if (!task)
return (unsigned long)NULL;
if (!bpf_task_storage_trylock())
return (unsigned long)NULL;
sdata = task_storage_lookup(task, map, true);
sdata = task_storage_lookup(task, map, nobusy);
if (sdata)
goto unlock;
return sdata->data;
/* only allocate new storage, when the task is refcounted */
if (refcount_read(&task->usage) &&
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST, gfp_flags);
return IS_ERR(sdata) ? NULL : sdata->data;
}
return NULL;
}
unlock:
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *,
task, void *, value, u64, flags, gfp_t, gfp_flags)
{
bool nobusy;
void *data;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
return (unsigned long)NULL;
nobusy = bpf_task_storage_trylock();
data = __bpf_task_storage_get(map, task, value, flags,
gfp_flags, nobusy);
if (nobusy)
bpf_task_storage_unlock();
return (unsigned long)data;
}
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
task, void *, value, u64, flags, gfp_t, gfp_flags)
{
void *data;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
return (unsigned long)NULL;
bpf_task_storage_lock();
data = __bpf_task_storage_get(map, task, value, flags,
gfp_flags, true);
bpf_task_storage_unlock();
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
return (unsigned long)data;
}
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *,
task)
{
bool nobusy;
int ret;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!task)
return -EINVAL;
if (!bpf_task_storage_trylock())
return -EBUSY;
nobusy = bpf_task_storage_trylock();
/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
ret = task_storage_delete(task, map, nobusy);
if (nobusy)
bpf_task_storage_unlock();
return ret;
}
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
task)
{
int ret;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!task)
return -EINVAL;
bpf_task_storage_lock();
/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
ret = task_storage_delete(task, map);
ret = task_storage_delete(task, map, true);
bpf_task_storage_unlock();
return ret;
}
......@@ -322,6 +371,17 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_owner_storage_ptr = task_storage_ptr,
};
const struct bpf_func_proto bpf_task_storage_get_recur_proto = {
.func = bpf_task_storage_get_recur,
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
const struct bpf_func_proto bpf_task_storage_get_proto = {
.func = bpf_task_storage_get,
.gpl_only = false,
......@@ -333,6 +393,15 @@ const struct bpf_func_proto bpf_task_storage_get_proto = {
.arg4_type = ARG_ANYTHING,
};
const struct bpf_func_proto bpf_task_storage_delete_recur_proto = {
.func = bpf_task_storage_delete_recur,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
};
const struct bpf_func_proto bpf_task_storage_delete_proto = {
.func = bpf_task_storage_delete,
.gpl_only = false,
......
......@@ -5133,13 +5133,14 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
run_ctx.bpf_cookie = 0;
run_ctx.saved_run_ctx = NULL;
if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) {
if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
/* recursion detected */
bpf_prog_put(prog);
return -EBUSY;
}
attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
__bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx);
__bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */,
&run_ctx);
bpf_prog_put(prog);
return 0;
#endif
......
......@@ -864,7 +864,7 @@ static __always_inline u64 notrace bpf_prog_start_time(void)
* [2..MAX_U64] - execute bpf prog and record execution time.
* This is start time.
*/
u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
__acquires(RCU)
{
rcu_read_lock();
......@@ -901,7 +901,8 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
}
}
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx)
static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx)
__releases(RCU)
{
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
......@@ -912,8 +913,8 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
rcu_read_unlock();
}
u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx)
static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx)
__acquires(RCU)
{
/* Runtime stats are exported via actual BPF_LSM_CGROUP
......@@ -927,8 +928,8 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
return NO_START_TIME;
}
void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx)
static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx)
__releases(RCU)
{
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
......@@ -937,7 +938,8 @@ void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
rcu_read_unlock();
}
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx)
{
rcu_read_lock_trace();
migrate_disable();
......@@ -953,8 +955,8 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_r
return bpf_prog_start_time();
}
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx)
void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx)
{
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
......@@ -964,8 +966,30 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
rcu_read_unlock_trace();
}
u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx)
static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx)
{
rcu_read_lock_trace();
migrate_disable();
might_fault();
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
return bpf_prog_start_time();
}
static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx)
{
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
update_prog_stats(prog, start);
migrate_enable();
rcu_read_unlock_trace();
}
static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx)
__acquires(RCU)
{
rcu_read_lock();
......@@ -976,8 +1000,8 @@ u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
return bpf_prog_start_time();
}
void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx)
static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx)
__releases(RCU)
{
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
......@@ -997,6 +1021,36 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
percpu_ref_put(&tr->pcref);
}
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
{
bool sleepable = prog->aux->sleepable;
if (bpf_prog_check_recur(prog))
return sleepable ? __bpf_prog_enter_sleepable_recur :
__bpf_prog_enter_recur;
if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
prog->expected_attach_type == BPF_LSM_CGROUP)
return __bpf_prog_enter_lsm_cgroup;
return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter;
}
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
{
bool sleepable = prog->aux->sleepable;
if (bpf_prog_check_recur(prog))
return sleepable ? __bpf_prog_exit_sleepable_recur :
__bpf_prog_exit_recur;
if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
prog->expected_attach_type == BPF_LSM_CGROUP)
return __bpf_prog_exit_lsm_cgroup;
return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit;
}
int __weak
arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
......
......@@ -6,6 +6,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf_perf_event.h>
#include <linux/btf.h>
#include <linux/filter.h>
......@@ -1488,8 +1489,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto;
case BPF_FUNC_task_storage_get:
if (bpf_prog_check_recur(prog))
return &bpf_task_storage_get_recur_proto;
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
if (bpf_prog_check_recur(prog))
return &bpf_task_storage_delete_recur_proto;
return &bpf_task_storage_delete_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
......
......@@ -3,12 +3,16 @@
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sys/types.h>
#include <test_progs.h>
#include "task_local_storage_helpers.h"
#include "task_local_storage.skel.h"
#include "task_local_storage_exit_creds.skel.h"
#include "task_ls_recursion.skel.h"
#include "task_storage_nodeadlock.skel.h"
static void test_sys_enter_exit(void)
{
......@@ -75,24 +79,160 @@ static void test_exit_creds(void)
static void test_recursion(void)
{
int err, map_fd, prog_fd, task_fd;
struct task_ls_recursion *skel;
int err;
struct bpf_prog_info info;
__u32 info_len = sizeof(info);
long value;
task_fd = sys_pidfd_open(getpid(), 0);
if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open"))
return;
skel = task_ls_recursion__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
goto out;
err = task_ls_recursion__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
/* trigger sys_enter, make sure it does not cause deadlock */
skel->bss->test_pid = getpid();
syscall(SYS_gettid);
skel->bss->test_pid = 0;
task_ls_recursion__detach(skel);
/* Refer to the comment in BPF_PROG(on_update) for
* the explanation on the value 201 and 100.
*/
map_fd = bpf_map__fd(skel->maps.map_a);
err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
ASSERT_OK(err, "lookup map_a");
ASSERT_EQ(value, 201, "map_a value");
ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy");
map_fd = bpf_map__fd(skel->maps.map_b);
err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
ASSERT_OK(err, "lookup map_b");
ASSERT_EQ(value, 100, "map_b value");
prog_fd = bpf_program__fd(skel->progs.on_lookup);
memset(&info, 0, sizeof(info));
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
ASSERT_OK(err, "get prog info");
ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion");
prog_fd = bpf_program__fd(skel->progs.on_update);
memset(&info, 0, sizeof(info));
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
ASSERT_OK(err, "get prog info");
ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion");
prog_fd = bpf_program__fd(skel->progs.on_enter);
memset(&info, 0, sizeof(info));
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
ASSERT_OK(err, "get prog info");
ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion");
out:
close(task_fd);
task_ls_recursion__destroy(skel);
}
static bool stop;
static void waitall(const pthread_t *tids, int nr)
{
int i;
stop = true;
for (i = 0; i < nr; i++)
pthread_join(tids[i], NULL);
}
static void *sock_create_loop(void *arg)
{
struct task_storage_nodeadlock *skel = arg;
int fd;
while (!stop) {
fd = socket(AF_INET, SOCK_STREAM, 0);
close(fd);
if (skel->bss->nr_get_errs || skel->bss->nr_del_errs)
stop = true;
}
return NULL;
}
static void test_nodeadlock(void)
{
struct task_storage_nodeadlock *skel;
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const int nr_threads = 32;
pthread_t tids[nr_threads];
int i, prog_fd, err;
cpu_set_t old, new;
/* Pin all threads to one cpu to increase the chance of preemption
* in a sleepable bpf prog.
*/
CPU_ZERO(&new);
CPU_SET(0, &new);
err = sched_getaffinity(getpid(), sizeof(old), &old);
if (!ASSERT_OK(err, "getaffinity"))
return;
err = sched_setaffinity(getpid(), sizeof(new), &new);
if (!ASSERT_OK(err, "setaffinity"))
return;
skel = task_storage_nodeadlock__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load"))
goto done;
/* Unnecessary recursion and deadlock detection are reproducible
* in the preemptible kernel.
*/
if (!skel->kconfig->CONFIG_PREEMPT) {
test__skip();
goto done;
}
err = task_storage_nodeadlock__attach(skel);
ASSERT_OK(err, "attach prog");
for (i = 0; i < nr_threads; i++) {
err = pthread_create(&tids[i], NULL, sock_create_loop, skel);
if (err) {
/* Only assert once here to avoid excessive
* PASS printing during test failure.
*/
ASSERT_OK(err, "pthread_create");
waitall(tids, i);
goto done;
}
}
/* With 32 threads, 1s is enough to reproduce the issue */
sleep(1);
waitall(tids, nr_threads);
info_len = sizeof(info);
prog_fd = bpf_program__fd(skel->progs.socket_post_create);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
ASSERT_OK(err, "get prog info");
ASSERT_EQ(info.recursion_misses, 0, "prog recursion");
ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy");
ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
done:
task_storage_nodeadlock__destroy(skel);
sched_setaffinity(getpid(), sizeof(old), &old);
}
void test_task_local_storage(void)
{
if (test__start_subtest("sys_enter_exit"))
......@@ -101,4 +241,6 @@ void test_task_local_storage(void)
test_exit_creds();
if (test__start_subtest("recursion"))
test_recursion();
if (test__start_subtest("nodeadlock"))
test_nodeadlock();
}
......@@ -5,7 +5,13 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#ifndef EBUSY
#define EBUSY 16
#endif
char _license[] SEC("license") = "GPL";
int nr_del_errs = 0;
int test_pid = 0;
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
......@@ -26,6 +32,13 @@ int BPF_PROG(on_lookup)
{
struct task_struct *task = bpf_get_current_task_btf();
if (!test_pid || task->pid != test_pid)
return 0;
/* The bpf_task_storage_delete will call
* bpf_local_storage_lookup. The prog->active will
* stop the recursion.
*/
bpf_task_storage_delete(&map_a, task);
bpf_task_storage_delete(&map_b, task);
return 0;
......@@ -37,11 +50,32 @@ int BPF_PROG(on_update)
struct task_struct *task = bpf_get_current_task_btf();
long *ptr;
if (!test_pid || task->pid != test_pid)
return 0;
ptr = bpf_task_storage_get(&map_a, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
/* ptr will not be NULL when it is called from
* the bpf_task_storage_get(&map_b,...F_CREATE) in
* the BPF_PROG(on_enter) below. It is because
* the value can be found in map_a and the kernel
* does not need to acquire any spin_lock.
*/
if (ptr) {
int err;
*ptr += 1;
err = bpf_task_storage_delete(&map_a, task);
if (err == -EBUSY)
nr_del_errs++;
}
/* This will still fail because map_b is empty and
* this BPF_PROG(on_update) has failed to acquire
* the percpu busy lock => meaning potential
* deadlock is detected and it will fail to create
* new storage.
*/
ptr = bpf_task_storage_get(&map_b, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
......@@ -57,14 +91,17 @@ int BPF_PROG(on_enter, struct pt_regs *regs, long id)
long *ptr;
task = bpf_get_current_task_btf();
if (!test_pid || task->pid != test_pid)
return 0;
ptr = bpf_task_storage_get(&map_a, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
if (ptr && !*ptr)
*ptr = 200;
ptr = bpf_task_storage_get(&map_b, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
if (ptr && !*ptr)
*ptr = 100;
return 0;
}
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
#ifndef EBUSY
#define EBUSY 16
#endif
extern bool CONFIG_PREEMPT __kconfig __weak;
int nr_get_errs = 0;
int nr_del_errs = 0;
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, int);
} task_storage SEC(".maps");
SEC("lsm.s/socket_post_create")
int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
int protocol, int kern)
{
struct task_struct *task;
int ret, zero = 0;
int *value;
if (!CONFIG_PREEMPT)
return 0;
task = bpf_get_current_task_btf();
value = bpf_task_storage_get(&task_storage, task, &zero,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!value)
__sync_fetch_and_add(&nr_get_errs, 1);
ret = bpf_task_storage_delete(&task_storage,
bpf_get_current_task_btf());
if (ret == -EBUSY)
__sync_fetch_and_add(&nr_del_errs, 1);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment