Commit eb814cf1 authored by Delyan Kratunov's avatar Delyan Kratunov Committed by Alexei Starovoitov

selftests/bpf: fix task_local_storage/exit_creds rcu usage

BPF CI has revealed flakiness in the task_local_storage/exit_creds test.
The failure point in CI [1] is that null_ptr_count is equal to 0,
which indicates that the program hasn't run yet. This points to the
kern_sync_rcu (sys_membarrier -> synchronize_rcu underneath) not
waiting sufficiently.

Indeed, synchronize_rcu only waits for read-side sections that started
before the call. If the program execution starts *during* the
synchronize_rcu invocation (due to, say, preemption), the test won't
wait long enough.

As a speculative fix, make the synchornize_rcu calls in a loop until
an explicit run counter has gone up.

  [1]: https://github.com/kernel-patches/bpf/actions/runs/3268263235/jobs/5374940791Signed-off-by: default avatarDelyan Kratunov <delyank@meta.com>
Link: https://lore.kernel.org/r/156d4ef82275a074e8da8f4cffbd01b0c1466493.camel@meta.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 12f96823
...@@ -39,7 +39,8 @@ static void test_sys_enter_exit(void) ...@@ -39,7 +39,8 @@ static void test_sys_enter_exit(void)
static void test_exit_creds(void) static void test_exit_creds(void)
{ {
struct task_local_storage_exit_creds *skel; struct task_local_storage_exit_creds *skel;
int err; int err, run_count, sync_rcu_calls = 0;
const int MAX_SYNC_RCU_CALLS = 1000;
skel = task_local_storage_exit_creds__open_and_load(); skel = task_local_storage_exit_creds__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
...@@ -53,8 +54,19 @@ static void test_exit_creds(void) ...@@ -53,8 +54,19 @@ static void test_exit_creds(void)
if (CHECK_FAIL(system("ls > /dev/null"))) if (CHECK_FAIL(system("ls > /dev/null")))
goto out; goto out;
/* sync rcu to make sure exit_creds() is called for "ls" */ /* kern_sync_rcu is not enough on its own as the read section we want
kern_sync_rcu(); * to wait for may start after we enter synchronize_rcu, so our call
* won't wait for the section to finish. Loop on the run counter
* as well to ensure the program has run.
*/
do {
kern_sync_rcu();
run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST);
} while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS);
ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS,
"sync_rcu count too high");
ASSERT_NEQ(run_count, 0, "run_count");
ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count"); ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count"); ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
out: out:
......
...@@ -14,6 +14,7 @@ struct { ...@@ -14,6 +14,7 @@ struct {
__type(value, __u64); __type(value, __u64);
} task_storage SEC(".maps"); } task_storage SEC(".maps");
int run_count = 0;
int valid_ptr_count = 0; int valid_ptr_count = 0;
int null_ptr_count = 0; int null_ptr_count = 0;
...@@ -28,5 +29,7 @@ int BPF_PROG(trace_exit_creds, struct task_struct *task) ...@@ -28,5 +29,7 @@ int BPF_PROG(trace_exit_creds, struct task_struct *task)
__sync_fetch_and_add(&valid_ptr_count, 1); __sync_fetch_and_add(&valid_ptr_count, 1);
else else
__sync_fetch_and_add(&null_ptr_count, 1); __sync_fetch_and_add(&null_ptr_count, 1);
__sync_fetch_and_add(&run_count, 1);
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment