Commit deb9fd64 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Make struct bpf_cpumask RCU safe'

David Vernet says:

====================

The struct bpf_cpumask type is currently not RCU safe. It uses the
bpf_mem_cache_{alloc,free}() APIs to allocate and release cpumasks, and
those allocations may be reused before an RCU grace period has elapsed.
We want to be able to enable using this pattern in BPF programs:

private(MASK) static struct bpf_cpumask __kptr *global;

int BPF_PROG(prog, ...)
{
	struct bpf_cpumask *cpumask;

	bpf_rcu_read_lock();
	cpumask = global;
	if (!cpumask) {
		bpf_rcu_read_unlock();
		return -1;
	}
	bpf_cpumask_setall(cpumask);
	...
	bpf_rcu_read_unlock();
}

In other words, to be able to pass a kptr to KF_RCU bpf_cpumask kfuncs
without requiring the acquisition and release of refcounts using
bpf_cpumask_kptr_get(). This patchset enables this by making the struct
bpf_cpumask type RCU safe, and removing the bpf_cpumask_kptr_get()
function.
---
v1: https://lore.kernel.org/all/20230316014122.678082-2-void@manifault.com/

Changelog:
----------
v1 -> v2:
- Add doxygen comment for new @rcu field in struct bpf_cpumask.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 6cb9430b fec2c6d1
......@@ -117,12 +117,7 @@ For example:
As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can
also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in
a map, the reference can be removed from the map with bpf_kptr_xchg(), or
opportunistically acquired with bpf_cpumask_kptr_get():
.. kernel-doc:: kernel/bpf/cpumask.c
:identifiers: bpf_cpumask_kptr_get
Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
opportunistically acquired using RCU:
.. code-block:: c
......@@ -144,7 +139,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
/**
* A simple example tracepoint program showing how a
* struct bpf_cpumask * kptr that is stored in a map can
* be acquired using the bpf_cpumask_kptr_get() kfunc.
* be passed to kfuncs using RCU protection.
*/
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
......@@ -158,26 +153,21 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
if (!v)
return -ENOENT;
bpf_rcu_read_lock();
/* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */
kptr = bpf_cpumask_kptr_get(&v->cpumask);
if (!kptr)
kptr = v->cpumask;
if (!kptr) {
/* If no bpf_cpumask was present in the map, it's because
* we're racing with another CPU that removed it with
* bpf_kptr_xchg() between the bpf_map_lookup_elem()
* above, and our call to bpf_cpumask_kptr_get().
* bpf_cpumask_kptr_get() internally safely handles this
* race, and will return NULL if the cpumask is no longer
* present in the map by the time we invoke the kfunc.
* above, and our load of the pointer from the map.
*/
bpf_rcu_read_unlock();
return -EBUSY;
}
/* Free the reference we just took above. Note that the
* original struct bpf_cpumask * kptr is still in the map. It will
* be freed either at a later time if another context deletes
* it from the map, or automatically by the BPF subsystem if
* it's still present when the map is destroyed.
*/
bpf_cpumask_release(kptr);
bpf_cpumask_setall(kptr);
bpf_rcu_read_unlock();
return 0;
}
......
......@@ -9,6 +9,7 @@
/**
* struct bpf_cpumask - refcounted BPF cpumask wrapper structure
* @cpumask: The actual cpumask embedded in the struct.
* @rcu: The RCU head used to free the cpumask with RCU safety.
* @usage: Object reference counter. When the refcount goes to 0, the
* memory is released back to the BPF allocator, which provides
* RCU safety.
......@@ -24,6 +25,7 @@
*/
struct bpf_cpumask {
cpumask_t cpumask;
struct rcu_head rcu;
refcount_t usage;
};
......@@ -80,32 +82,14 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
return cpumask;
}
/**
* bpf_cpumask_kptr_get() - Attempt to acquire a reference to a BPF cpumask
* stored in a map.
* @cpumaskp: A pointer to a BPF cpumask map value.
*
* Attempts to acquire a reference to a BPF cpumask stored in a map value. The
* cpumask returned by this function must either be embedded in a map as a
* kptr, or freed with bpf_cpumask_release(). This function may return NULL if
* no BPF cpumask was found in the specified map value.
*/
__bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp)
static void cpumask_free_cb(struct rcu_head *head)
{
struct bpf_cpumask *cpumask;
/* The BPF memory allocator frees memory backing its caches in an RCU
* callback. Thus, we can safely use RCU to ensure that the cpumask is
* safe to read.
*/
rcu_read_lock();
cpumask = READ_ONCE(*cpumaskp);
if (cpumask && !refcount_inc_not_zero(&cpumask->usage))
cpumask = NULL;
rcu_read_unlock();
return cpumask;
cpumask = container_of(head, struct bpf_cpumask, rcu);
migrate_disable();
bpf_mem_cache_free(&bpf_cpumask_ma, cpumask);
migrate_enable();
}
/**
......@@ -121,11 +105,8 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
if (!cpumask)
return;
if (refcount_dec_and_test(&cpumask->usage)) {
migrate_disable();
bpf_mem_cache_free(&bpf_cpumask_ma, cpumask);
migrate_enable();
}
if (refcount_dec_and_test(&cpumask->usage))
call_rcu(&cpumask->rcu, cpumask_free_cb);
}
/**
......@@ -426,7 +407,6 @@ BTF_SET8_START(cpumask_kfunc_btf_ids)
BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU)
......
......@@ -4599,6 +4599,7 @@ static bool in_rcu_cs(struct bpf_verifier_env *env)
BTF_SET_START(rcu_protected_types)
BTF_ID(struct, prog_test_ref_kfunc)
BTF_ID(struct, cgroup)
BTF_ID(struct, bpf_cpumask)
BTF_SET_END(rcu_protected_types)
static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
......
......@@ -16,7 +16,7 @@ static const char * const cpumask_success_testcases[] = {
"test_copy_any_anyand",
"test_insert_leave",
"test_insert_remove_release",
"test_insert_kptr_get_release",
"test_global_mask_rcu",
};
static void verify_success(const char *prog_name)
......
......@@ -9,6 +9,9 @@
int err;
#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
private(MASK) static struct bpf_cpumask __kptr * global_mask;
struct __cpumask_map_value {
struct bpf_cpumask __kptr * cpumask;
};
......@@ -23,7 +26,6 @@ struct array_map {
struct bpf_cpumask *bpf_cpumask_create(void) __ksym;
void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym;
struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym;
struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumask) __ksym;
u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym;
u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
......@@ -51,6 +53,9 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym
u32 bpf_cpumask_any(const struct cpumask *src) __ksym;
u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
static inline const struct cpumask *cast(struct bpf_cpumask *cpumask)
{
return (const struct cpumask *)cpumask;
......
......@@ -95,35 +95,73 @@ int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_
}
SEC("tp_btf/task_newtask")
__failure __msg("Unreleased reference")
int BPF_PROG(test_kptr_get_no_release, struct task_struct *task, u64 clone_flags)
__failure __msg("NULL pointer passed to trusted arg0")
int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *cpumask;
struct __cpumask_map_value *v;
/* NULL passed to KF_TRUSTED_ARGS kfunc. */
bpf_cpumask_empty(NULL);
cpumask = create_cpumask();
if (!cpumask)
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("R2 must be a rcu pointer")
int BPF_PROG(test_global_mask_out_of_rcu, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *local, *prev;
local = create_cpumask();
if (!local)
return 0;
if (cpumask_map_insert(cpumask))
prev = bpf_kptr_xchg(&global_mask, local);
if (prev) {
bpf_cpumask_release(prev);
err = 3;
return 0;
}
v = cpumask_map_value_lookup();
if (!v)
bpf_rcu_read_lock();
local = global_mask;
if (!local) {
err = 4;
bpf_rcu_read_unlock();
return 0;
}
cpumask = bpf_cpumask_kptr_get(&v->cpumask);
bpf_rcu_read_unlock();
/* RCU region is exited before calling KF_RCU kfunc. */
bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
/* cpumask is never released. */
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("NULL pointer passed to trusted arg0")
int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
__failure __msg("NULL pointer passed to trusted arg1")
int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone_flags)
{
/* NULL passed to KF_TRUSTED_ARGS kfunc. */
bpf_cpumask_empty(NULL);
struct bpf_cpumask *local, *prev;
local = create_cpumask();
if (!local)
return 0;
prev = bpf_kptr_xchg(&global_mask, local);
if (prev) {
bpf_cpumask_release(prev);
err = 3;
return 0;
}
bpf_rcu_read_lock();
local = global_mask;
/* No NULL check is performed on global cpumask kptr. */
bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
bpf_rcu_read_unlock();
return 0;
}
......@@ -395,31 +395,34 @@ int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_fla
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_flags)
int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *cpumask;
struct __cpumask_map_value *v;
struct bpf_cpumask *local, *prev;
cpumask = create_cpumask();
if (!cpumask)
if (!is_test_task())
return 0;
if (cpumask_map_insert(cpumask)) {
local = create_cpumask();
if (!local)
return 0;
prev = bpf_kptr_xchg(&global_mask, local);
if (prev) {
bpf_cpumask_release(prev);
err = 3;
return 0;
}
v = cpumask_map_value_lookup();
if (!v) {
bpf_rcu_read_lock();
local = global_mask;
if (!local) {
err = 4;
bpf_rcu_read_unlock();
return 0;
}
cpumask = bpf_cpumask_kptr_get(&v->cpumask);
if (cpumask)
bpf_cpumask_release(cpumask);
else
err = 5;
bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
bpf_rcu_read_unlock();
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment