Commit 6099754a authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf: Add bpf_rcu_read_lock() support'

Yonghong Song says:

====================

Currently, without rcu attribute info in BTF, the verifier treats
rcu tagged pointer as a normal pointer. This might be a problem
for sleepable program where rcu_read_lock()/unlock() is not available.
For example, for a sleepable fentry program, if rcu protected memory
access is interleaved with a sleepable helper/kfunc, it is possible
the memory access after the sleepable helper/kfunc might be invalid
since the object might have been freed then. Even without
a sleepable helper/kfunc, without rcu_read_lock() protection,
it is possible that the rcu protected object might be release
in the middle of bpf program execution which may cause incorrect
result.

To prevent above cases, enable btf_type_tag("rcu") attributes,
introduce new bpf_rcu_read_lock/unlock() kfuncs and add verifier support.

In the rest of patch set, Patch 1 enabled btf_type_tag for __rcu
attribute. Patche 2 added might_sleep in bpf_func_proto. Patch 3 added new
bpf_rcu_read_lock/unlock() kfuncs and verifier support.
Patch 4 added some tests for these two new kfuncs.

Changelogs:
  v9 -> v10:
    . if no rcu tag support in vmlinux btf, using bpf_rcu_read_lock/unlock()
      will cause verification error.
    . at bpf_rcu_read_unlock(), invalidate rcu ptr to PTR_UNTRUSTED
      instead of SCALAR_VALUE.
    . a few other comment changes and other minor changes.
  v8 -> v9:
    . remove sleepable prog check for ld_abs/ind checking in rcu read
      lock region.
    . fix a test failure with gcc-compiled kernel.
    . a couple of other minor fixes.
  v7 -> v8:
    . add might_sleep in bpf_func_proto so we can easily identify whether
      a helper is sleepable or not.
    . do not enforce rcu rules for sleepable, e.g., rcu dereference must
      be in a bpf_rcu_read_lock region. This is to keep old code working
      fine.
    . Mark 'b' in 'b = a->b' (b is tagged with __rcu) as MEM_RCU only if
      'b = a->b' in rcu read region and 'a' is trusted. This adds safety
      guarantee for 'b' inside the rcu read region.
  v6 -> v7:
    . rebase on top of bpf-next.
    . remove the patch which enables sleepable program using
      cgrp_local_storage map. This is orthogonal to this patch set
      and will be addressed separately.
    . mark the rcu pointer dereference result as UNTRUSTED if inside
      a bpf_rcu_read_lock() region.
  v5 -> v6:
    . fix selftest prog miss_unlock which tested nested locking.
    . add comments in selftest prog cgrp_succ to explain how to handle
      nested memory access after rcu memory load.
  v4 -> v5:
    . add new test to aarch64 deny list.
  v3 -> v4:
    . fix selftest failures when built with gcc. gcc doesn't support
      btf_type_tag yet and some tests relies on that. skip these
      tests if vmlinux BTF does not have btf_type_tag("rcu").
  v2 -> v3:
    . went back to MEM_RCU approach with invalidate rcu ptr registers
      at bpf_rcu_read_unlock() place.
    . remove KF_RCU_LOCK/UNLOCK flag and compare btf_id at verification
      time instead.
  v1 -> v2:
    . use kfunc instead of helper for bpf_rcu_read_lock/unlock.
    . not use MEM_RCU bpf_type_flag, instead use active_rcu_lock
      in reg state to identify rcu ptr's.
    . Add more self tests.
    . add new test to s390x deny list.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f471748b 48671232
......@@ -572,6 +572,9 @@ enum bpf_type_flag {
*/
PTR_TRUSTED = BIT(12 + BPF_BASE_TYPE_BITS),
/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
......@@ -682,6 +685,7 @@ struct bpf_func_proto {
u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
bool gpl_only;
bool pkt_access;
bool might_sleep;
enum bpf_return_type ret_type;
union {
struct {
......
......@@ -344,6 +344,7 @@ struct bpf_verifier_state {
u32 id;
} active_lock;
bool speculative;
bool active_rcu_lock;
/* first and last insn idx of this verifier state */
u32 first_insn_idx;
......@@ -445,6 +446,7 @@ struct bpf_insn_aux_data {
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
u8 alu_state; /* used in combination with alu_limit */
/* below fields are initialized once */
......@@ -534,6 +536,7 @@ struct bpf_verifier_env {
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
bool rcu_tag_supported;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
......@@ -680,7 +683,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
}
}
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
static inline bool bpf_type_has_unsafe_modifiers(u32 type)
{
......
......@@ -49,7 +49,8 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
# endif
# define __iomem
# define __percpu BTF_TYPE_TAG(percpu)
# define __rcu
# define __rcu BTF_TYPE_TAG(rcu)
# define __chk_user_ptr(x) (void)0
# define __chk_io_ptr(x) (void)0
/* context/locking */
......
......@@ -151,6 +151,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
static const struct bpf_func_proto bpf_ima_inode_hash_proto = {
.func = bpf_ima_inode_hash,
.gpl_only = false,
.might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0],
......@@ -169,6 +170,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_file_hash_btf_ids, struct, file)
static const struct bpf_func_proto bpf_ima_file_hash_proto = {
.func = bpf_ima_file_hash,
.gpl_only = false,
.might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_ima_file_hash_btf_ids[0],
......@@ -221,9 +223,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_bprm_opts_set:
return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash:
return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
return &bpf_ima_inode_hash_proto;
case BPF_FUNC_ima_file_hash:
return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
return &bpf_ima_file_hash_proto;
case BPF_FUNC_get_attach_cookie:
return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
#ifdef CONFIG_NET
......
......@@ -6238,6 +6238,9 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
/* check __percpu tag */
if (strcmp(tag_value, "percpu") == 0)
tmp_flag = MEM_PERCPU;
/* check __rcu tag */
if (strcmp(tag_value, "rcu") == 0)
tmp_flag = MEM_RCU;
}
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
......
......@@ -661,6 +661,7 @@ BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
const struct bpf_func_proto bpf_copy_from_user_proto = {
.func = bpf_copy_from_user,
.gpl_only = false,
.might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
......@@ -691,6 +692,7 @@ BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
const struct bpf_func_proto bpf_copy_from_user_task_proto = {
.func = bpf_copy_from_user_task,
.gpl_only = true,
.might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
......@@ -1988,6 +1990,16 @@ void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
return obj__ign;
}
void bpf_rcu_read_lock(void)
{
rcu_read_lock();
}
void bpf_rcu_read_unlock(void)
{
rcu_read_unlock();
}
__diag_pop();
BTF_SET8_START(generic_btf_ids)
......@@ -2029,6 +2041,8 @@ BTF_ID(func, bpf_cgroup_release)
BTF_SET8_START(common_btf_ids)
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
BTF_ID_FLAGS(func, bpf_rdonly_cast)
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
BTF_SET8_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
......
......@@ -527,6 +527,14 @@ static bool is_callback_calling_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_user_ringbuf_drain;
}
static bool is_storage_get_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_sk_storage_get ||
func_id == BPF_FUNC_inode_storage_get ||
func_id == BPF_FUNC_task_storage_get ||
func_id == BPF_FUNC_cgrp_storage_get;
}
static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
const struct bpf_map *map)
{
......@@ -589,11 +597,12 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
strncpy(postfix, "_or_null", 16);
}
snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s",
snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
type & MEM_RDONLY ? "rdonly_" : "",
type & MEM_RINGBUF ? "ringbuf_" : "",
type & MEM_USER ? "user_" : "",
type & MEM_PERCPU ? "percpu_" : "",
type & MEM_RCU ? "rcu_" : "",
type & PTR_UNTRUSTED ? "untrusted_" : "",
type & PTR_TRUSTED ? "trusted_" : ""
);
......@@ -1220,6 +1229,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->frame[i] = NULL;
}
dst_state->speculative = src->speculative;
dst_state->active_rcu_lock = src->active_rcu_lock;
dst_state->curframe = src->curframe;
dst_state->active_lock.ptr = src->active_lock.ptr;
dst_state->active_lock.id = src->active_lock.id;
......@@ -4258,6 +4268,25 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_FLOW_KEYS;
}
static bool is_trusted_reg(const struct bpf_reg_state *reg)
{
/* A referenced register is always trusted. */
if (reg->ref_obj_id)
return true;
/* If a register is not referenced, it is trusted if it has the
* MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the
* other type modifiers may be safe, but we elect to take an opt-in
* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
* not.
*
* Eventually, we should make PTR_TRUSTED the single source of truth
* for whether a register is trusted.
*/
return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
!bpf_type_has_unsafe_modifiers(reg->type);
}
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
......@@ -4737,9 +4766,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (type_flag(reg->type) & PTR_UNTRUSTED)
flag |= PTR_UNTRUSTED;
/* Any pointer obtained from walking a trusted pointer is no longer trusted. */
/* By default any pointer obtained from walking a trusted pointer is
* no longer trusted except the rcu case below.
*/
flag &= ~PTR_TRUSTED;
if (flag & MEM_RCU) {
/* Mark value register as MEM_RCU only if it is protected by
* bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
* itself can already indicate trustedness inside the rcu
* read lock region. Also mark it as PTR_TRUSTED.
*/
if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg))
flag &= ~MEM_RCU;
else
flag |= PTR_TRUSTED;
} else if (reg->type & MEM_RCU) {
/* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
* with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
*/
flag |= PTR_UNTRUSTED;
}
if (atype == BPF_READ && value_regno >= 0)
mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
......@@ -5897,6 +5945,7 @@ static const struct bpf_reg_types btf_ptr_types = {
.types = {
PTR_TO_BTF_ID,
PTR_TO_BTF_ID | PTR_TRUSTED,
PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
},
};
static const struct bpf_reg_types percpu_btf_ptr_types = {
......@@ -6075,6 +6124,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID | MEM_ALLOC:
case PTR_TO_BTF_ID | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* it's fixed offset must be 0. In the other cases, fixed offset
......@@ -7516,6 +7566,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
if (!env->prog->aux->sleepable && fn->might_sleep) {
verbose(env, "helper call might sleep in a non-sleepable prog\n");
return -EINVAL;
}
/* With LD_ABS/IND some JITs save/restore skb from r1. */
changes_data = bpf_helper_changes_pkt_data(fn->func);
if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
......@@ -7534,6 +7589,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return err;
}
if (env->cur_state->active_rcu_lock) {
if (fn->might_sleep) {
verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
func_id_name(func_id), func_id);
return -EINVAL;
}
if (env->prog->aux->sleepable && is_storage_get_function(func_id))
env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
}
meta.func_id = func_id;
/* check args */
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
......@@ -7961,25 +8027,6 @@ static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
}
static bool is_trusted_reg(const struct bpf_reg_state *reg)
{
/* A referenced register is always trusted. */
if (reg->ref_obj_id)
return true;
/* If a register is not referenced, it is trusted if it has either the
* MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
* other type modifiers may be safe, but we elect to take an opt-in
* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
* not.
*
* Eventually, we should make PTR_TRUSTED the single source of truth
* for whether a register is trusted.
*/
return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
!bpf_type_has_unsafe_modifiers(reg->type);
}
static bool __kfunc_param_match_suffix(const struct btf *btf,
const struct btf_param *arg,
const char *suffix)
......@@ -8158,6 +8205,8 @@ enum special_kfunc_type {
KF_bpf_list_pop_back,
KF_bpf_cast_to_kern_ctx,
KF_bpf_rdonly_cast,
KF_bpf_rcu_read_lock,
KF_bpf_rcu_read_unlock,
};
BTF_SET_START(special_kfunc_set)
......@@ -8180,6 +8229,18 @@ BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rcu_read_lock)
BTF_ID(func, bpf_rcu_read_unlock)
static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
}
static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
}
static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
......@@ -8812,6 +8873,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
const struct btf_type *t, *func, *func_proto, *ptr_type;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
bool sleepable, rcu_lock, rcu_unlock;
struct bpf_kfunc_call_arg_meta meta;
u32 i, nargs, func_id, ptr_type_id;
int err, insn_idx = *insn_idx_p;
......@@ -8853,11 +8915,45 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EACCES;
}
if (is_kfunc_sleepable(&meta) && !env->prog->aux->sleepable) {
sleepable = is_kfunc_sleepable(&meta);
if (sleepable && !env->prog->aux->sleepable) {
verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
return -EACCES;
}
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
return -EACCES;
}
if (env->cur_state->active_rcu_lock) {
struct bpf_func_state *state;
struct bpf_reg_state *reg;
if (rcu_lock) {
verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
return -EINVAL;
} else if (rcu_unlock) {
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
if (reg->type & MEM_RCU) {
reg->type &= ~(MEM_RCU | PTR_TRUSTED);
reg->type |= PTR_UNTRUSTED;
}
}));
env->cur_state->active_rcu_lock = false;
} else if (sleepable) {
verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
return -EACCES;
}
} else if (rcu_lock) {
env->cur_state->active_rcu_lock = true;
} else if (rcu_unlock) {
verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
return -EINVAL;
}
/* Check the arguments */
err = check_kfunc_args(env, &meta);
if (err < 0)
......@@ -11749,6 +11845,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
if (env->cur_state->active_rcu_lock) {
verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
return -EINVAL;
}
if (regs[ctx_reg].type != PTR_TO_CTX) {
verbose(env,
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
......@@ -13014,6 +13115,9 @@ static bool states_equal(struct bpf_verifier_env *env,
old->active_lock.id != cur->active_lock.id)
return false;
if (old->active_rcu_lock != cur->active_rcu_lock)
return false;
/* for states to be equal callsites have to be the same
* and all frame states need to be equivalent
*/
......@@ -13701,6 +13805,11 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
if (env->cur_state->active_rcu_lock) {
verbose(env, "bpf_rcu_read_unlock is missing\n");
return -EINVAL;
}
/* We must do check_reference_leak here before
* prepare_func_exit to handle the case when
* state->curframe > 0, it may be a callback
......@@ -15489,14 +15598,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto patch_call_imm;
}
if (insn->imm == BPF_FUNC_task_storage_get ||
insn->imm == BPF_FUNC_sk_storage_get ||
insn->imm == BPF_FUNC_inode_storage_get ||
insn->imm == BPF_FUNC_cgrp_storage_get) {
if (env->prog->aux->sleepable)
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
else
if (is_storage_get_function(insn->imm)) {
if (!env->prog->aux->sleepable ||
env->insn_aux_data[i + delta].storage_get_func_atomic)
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
else
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
insn_buf[1] = *insn;
cnt = 2;
......@@ -16575,6 +16682,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
env->bypass_spec_v1 = bpf_bypass_spec_v1();
env->bypass_spec_v4 = bpf_bypass_spec_v4();
env->bpf_capable = bpf_capable();
env->rcu_tag_supported = btf_vmlinux &&
btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
......
......@@ -1485,9 +1485,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_task_stack:
return &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
return &bpf_copy_from_user_proto;
case BPF_FUNC_copy_from_user_task:
return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
return &bpf_copy_from_user_task_proto;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_per_cpu_ptr:
......
......@@ -45,6 +45,7 @@ modify_return # modify_return__attach failed
module_attach # skel_attach skeleton attach failed: -524
mptcp/base # run_test mptcp unexpected error: -524 (errno 524)
netcnt # packets unexpected packets: actual 10001 != expected 10000
rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22
recursion # skel_attach unexpected error: -524 (errno 524)
ringbuf # skel_attach skeleton attachment failed: -1
setget_sockopt # attach_cgroup unexpected error: -524
......
......@@ -43,6 +43,7 @@ module_attach # skel_attach skeleton attach failed: -
mptcp
netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?)
probe_user # check_kprobe_res wrong kprobe res from probe read (?)
rcu_read_lock # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?)
recursion # skel_attach unexpected error: -524 (trampoline)
ringbuf # skel_load skeleton load failed (?)
select_reuseport # intermittently fails on new s390x setup
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <test_progs.h>
#include <bpf/btf.h>
#include "rcu_read_lock.skel.h"
#include "cgroup_helpers.h"
static unsigned long long cgroup_id;
static void test_success(void)
{
struct rcu_read_lock *skel;
int err;
skel = rcu_read_lock__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
skel->bss->target_pid = syscall(SYS_gettid);
bpf_program__set_autoload(skel->progs.get_cgroup_id, true);
bpf_program__set_autoload(skel->progs.task_succ, true);
bpf_program__set_autoload(skel->progs.no_lock, true);
bpf_program__set_autoload(skel->progs.two_regions, true);
bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
err = rcu_read_lock__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
err = rcu_read_lock__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
syscall(SYS_getpgid);
ASSERT_EQ(skel->bss->task_storage_val, 2, "task_storage_val");
ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
out:
rcu_read_lock__destroy(skel);
}
static void test_rcuptr_acquire(void)
{
struct rcu_read_lock *skel;
int err;
skel = rcu_read_lock__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
skel->bss->target_pid = syscall(SYS_gettid);
bpf_program__set_autoload(skel->progs.task_acquire, true);
err = rcu_read_lock__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
err = rcu_read_lock__attach(skel);
ASSERT_OK(err, "skel_attach");
out:
rcu_read_lock__destroy(skel);
}
static const char * const inproper_region_tests[] = {
"miss_lock",
"miss_unlock",
"non_sleepable_rcu_mismatch",
"inproper_sleepable_helper",
"inproper_sleepable_kfunc",
"nested_rcu_region",
};
static void test_inproper_region(void)
{
struct rcu_read_lock *skel;
struct bpf_program *prog;
int i, err;
for (i = 0; i < ARRAY_SIZE(inproper_region_tests); i++) {
skel = rcu_read_lock__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
prog = bpf_object__find_program_by_name(skel->obj, inproper_region_tests[i]);
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto out;
bpf_program__set_autoload(prog, true);
err = rcu_read_lock__load(skel);
ASSERT_ERR(err, "skel_load");
out:
rcu_read_lock__destroy(skel);
}
}
static const char * const rcuptr_misuse_tests[] = {
"task_untrusted_non_rcuptr",
"task_untrusted_rcuptr",
"cross_rcu_region",
};
static void test_rcuptr_misuse(void)
{
struct rcu_read_lock *skel;
struct bpf_program *prog;
int i, err;
for (i = 0; i < ARRAY_SIZE(rcuptr_misuse_tests); i++) {
skel = rcu_read_lock__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
prog = bpf_object__find_program_by_name(skel->obj, rcuptr_misuse_tests[i]);
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto out;
bpf_program__set_autoload(prog, true);
err = rcu_read_lock__load(skel);
ASSERT_ERR(err, "skel_load");
out:
rcu_read_lock__destroy(skel);
}
}
void test_rcu_read_lock(void)
{
struct btf *vmlinux_btf;
int cgroup_fd;
vmlinux_btf = btf__load_vmlinux_btf();
if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
return;
if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) {
test__skip();
goto out;
}
cgroup_fd = test__join_cgroup("/rcu_read_lock");
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock"))
goto out;
cgroup_id = get_cgroup_id("/rcu_read_lock");
if (test__start_subtest("success"))
test_success();
if (test__start_subtest("rcuptr_acquire"))
test_rcuptr_acquire();
if (test__start_subtest("negative_tests_inproper_region"))
test_inproper_region();
if (test__start_subtest("negative_tests_rcuptr_misuse"))
test_rcuptr_misuse();
close(cgroup_fd);
out:
btf__free(vmlinux_btf);
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tracing_net.h"
#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, long);
} map_a SEC(".maps");
__u32 user_data, key_serial, target_pid;
__u64 flags, task_storage_val, cgroup_id;
struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
void bpf_key_put(struct bpf_key *key) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
void bpf_task_release(struct task_struct *p) __ksym;
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int get_cgroup_id(void *ctx)
{
struct task_struct *task;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
/* simulate bpf_get_current_cgroup_id() helper */
bpf_rcu_read_lock();
cgroup_id = task->cgroups->dfl_cgrp->kn->id;
bpf_rcu_read_unlock();
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int task_succ(void *ctx)
{
struct task_struct *task, *real_parent;
long init_val = 2;
long *ptr;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
bpf_rcu_read_lock();
/* region including helper using rcu ptr real_parent */
real_parent = task->real_parent;
ptr = bpf_task_storage_get(&map_a, real_parent, &init_val,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
goto out;
ptr = bpf_task_storage_get(&map_a, real_parent, 0, 0);
if (!ptr)
goto out;
task_storage_val = *ptr;
out:
bpf_rcu_read_unlock();
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
int no_lock(void *ctx)
{
struct task_struct *task, *real_parent;
/* no bpf_rcu_read_lock(), old code still works */
task = bpf_get_current_task_btf();
real_parent = task->real_parent;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
int two_regions(void *ctx)
{
struct task_struct *task, *real_parent;
/* two regions */
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
bpf_rcu_read_unlock();
bpf_rcu_read_lock();
real_parent = task->real_parent;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
bpf_rcu_read_unlock();
return 0;
}
SEC("?fentry/" SYS_PREFIX "sys_getpgid")
int non_sleepable_1(void *ctx)
{
struct task_struct *task, *real_parent;
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
real_parent = task->real_parent;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
bpf_rcu_read_unlock();
return 0;
}
SEC("?fentry/" SYS_PREFIX "sys_getpgid")
int non_sleepable_2(void *ctx)
{
struct task_struct *task, *real_parent;
bpf_rcu_read_lock();
task = bpf_get_current_task_btf();
bpf_rcu_read_unlock();
bpf_rcu_read_lock();
real_parent = task->real_parent;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
bpf_rcu_read_unlock();
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
int task_acquire(void *ctx)
{
struct task_struct *task, *real_parent;
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
real_parent = task->real_parent;
/* acquire a reference which can be used outside rcu read lock region */
real_parent = bpf_task_acquire(real_parent);
bpf_rcu_read_unlock();
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
bpf_task_release(real_parent);
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int miss_lock(void *ctx)
{
struct task_struct *task;
struct css_set *cgroups;
struct cgroup *dfl_cgrp;
/* missing bpf_rcu_read_lock() */
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
(void)bpf_task_storage_get(&map_a, task, 0, 0);
bpf_rcu_read_unlock();
bpf_rcu_read_unlock();
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int miss_unlock(void *ctx)
{
struct task_struct *task;
struct css_set *cgroups;
struct cgroup *dfl_cgrp;
/* missing bpf_rcu_read_unlock() */
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
(void)bpf_task_storage_get(&map_a, task, 0, 0);
return 0;
}
SEC("?fentry/" SYS_PREFIX "sys_getpgid")
int non_sleepable_rcu_mismatch(void *ctx)
{
struct task_struct *task, *real_parent;
task = bpf_get_current_task_btf();
/* non-sleepable: missing bpf_rcu_read_unlock() in one path */
bpf_rcu_read_lock();
real_parent = task->real_parent;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
if (real_parent)
bpf_rcu_read_unlock();
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int inproper_sleepable_helper(void *ctx)
{
struct task_struct *task, *real_parent;
struct pt_regs *regs;
__u32 value = 0;
void *ptr;
task = bpf_get_current_task_btf();
/* sleepable helper in rcu read lock region */
bpf_rcu_read_lock();
real_parent = task->real_parent;
regs = (struct pt_regs *)bpf_task_pt_regs(real_parent);
if (!regs) {
bpf_rcu_read_unlock();
return 0;
}
ptr = (void *)PT_REGS_IP(regs);
(void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0);
user_data = value;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
bpf_rcu_read_unlock();
return 0;
}
SEC("?lsm.s/bpf")
int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size)
{
struct bpf_key *bkey;
/* sleepable kfunc in rcu read lock region */
bpf_rcu_read_lock();
bkey = bpf_lookup_user_key(key_serial, flags);
bpf_rcu_read_unlock();
if (!bkey)
return -1;
bpf_key_put(bkey);
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
int nested_rcu_region(void *ctx)
{
struct task_struct *task, *real_parent;
/* nested rcu read lock regions */
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
bpf_rcu_read_lock();
real_parent = task->real_parent;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
bpf_rcu_read_unlock();
bpf_rcu_read_unlock();
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int task_untrusted_non_rcuptr(void *ctx)
{
struct task_struct *task, *last_wakee;
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
/* the pointer last_wakee marked as untrusted */
last_wakee = task->real_parent->last_wakee;
(void)bpf_task_storage_get(&map_a, last_wakee, 0, 0);
bpf_rcu_read_unlock();
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int task_untrusted_rcuptr(void *ctx)
{
struct task_struct *task, *real_parent;
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
real_parent = task->real_parent;
bpf_rcu_read_unlock();
/* helper use of rcu ptr outside the rcu read lock region */
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
int cross_rcu_region(void *ctx)
{
struct task_struct *task, *real_parent;
/* rcu ptr define/use in different regions */
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
real_parent = task->real_parent;
bpf_rcu_read_unlock();
bpf_rcu_read_lock();
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
bpf_rcu_read_unlock();
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment