Commit 64619b28 authored by Uladzislau Rezki (Sony)'s avatar Uladzislau Rezki (Sony)

Merge branches 'fixes.2024.04.15a', 'misc.2024.04.12a',...

Merge branches 'fixes.2024.04.15a', 'misc.2024.04.12a', 'rcu-sync-normal-improve.2024.04.15a', 'rcu-tasks.2024.04.15a' and 'rcutorture.2024.04.15a' into rcu-merge.2024.04.15a

fixes.2024.04.15a: RCU fixes
misc.2024.04.12a: Miscellaneous fixes
rcu-sync-normal-improve.2024.04.15a: Improving synchronize_rcu() call
rcu-tasks.2024.04.15a: Tasks RCU updates
rcutorture.2024.04.15a: Torture-test updates
......@@ -445,7 +445,8 @@ Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
Naoya Horiguchi <naoya.horiguchi@nec.com> <n-horiguchi@ah.jp.nec.com>
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
Neeraj Upadhyay <neeraj.upadhyay@kernel.org> <quic_neeraju@quicinc.com>
Neeraj Upadhyay <neeraj.upadhyay@kernel.org> <neeraju@codeaurora.org>
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
Nguyen Anh Quynh <aquynh@gmail.com>
Nicholas Piggin <npiggin@gmail.com> <npiggen@suse.de>
......
......@@ -427,7 +427,7 @@ their assorted primitives.
This section shows a simple use of the core RCU API to protect a
global pointer to a dynamically allocated structure. More-typical
uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst.
uses of RCU may be found in listRCU.rst and NMI-RCU.rst.
::
struct foo {
......@@ -510,8 +510,8 @@ So, to sum up:
data item.
See checklist.rst for additional rules to follow when using RCU.
And again, more-typical uses of RCU may be found in listRCU.rst,
arrayRCU.rst, and NMI-RCU.rst.
And again, more-typical uses of RCU may be found in listRCU.rst
and NMI-RCU.rst.
.. _4_whatisRCU:
......
......@@ -5091,6 +5091,20 @@
delay, memory pressure or callback list growing too
big.
rcutree.rcu_normal_wake_from_gp= [KNL]
Reduces a latency of synchronize_rcu() call. This approach
maintains its own track of synchronize_rcu() callers, so it
does not interact with regular callbacks because it does not
use a call_rcu[_hurry]() path. Please note, this is for a
normal grace period.
How to enable it:
echo 1 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp
or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=1"
Default is 0.
rcuscale.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
......
......@@ -18591,7 +18591,7 @@ F: tools/testing/selftests/resctrl/
READ-COPY UPDATE (RCU)
M: "Paul E. McKenney" <paulmck@kernel.org>
M: Frederic Weisbecker <frederic@kernel.org> (kernel/rcu/tree_nocb.h)
M: Neeraj Upadhyay <quic_neeraju@quicinc.com> (kernel/rcu/tasks.h)
M: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> (kernel/rcu/tasks.h)
M: Joel Fernandes <joel@joelfernandes.org>
M: Josh Triplett <josh@joshtriplett.org>
M: Boqun Feng <boqun.feng@gmail.com>
......
......@@ -55,7 +55,7 @@ config KPROBES
depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
select TASKS_RCU if PREEMPTION
select NEED_TASKS_RCU
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
......@@ -104,7 +104,7 @@ config STATIC_CALL_SELFTEST
config OPTPROBES
def_bool y
depends on KPROBES && HAVE_OPTPROBES
select TASKS_RCU if PREEMPTION
select NEED_TASKS_RCU
config KPROBES_ON_FTRACE
def_bool y
......
......@@ -19,18 +19,18 @@ struct rcu_synchronize {
};
void wakeme_after_rcu(struct rcu_head *head);
void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
void __wait_rcu_gp(bool checktiny, unsigned int state, int n, call_rcu_func_t *crcu_array,
struct rcu_synchronize *rs_array);
#define _wait_rcu_gp(checktiny, ...) \
do { \
call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \
struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \
__wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \
__crcu_array, __rs_array); \
#define _wait_rcu_gp(checktiny, state, ...) \
do { \
call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \
struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \
__wait_rcu_gp(checktiny, state, ARRAY_SIZE(__crcu_array), __crcu_array, __rs_array); \
} while (0)
#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
#define wait_rcu_gp(...) _wait_rcu_gp(false, TASK_UNINTERRUPTIBLE, __VA_ARGS__)
#define wait_rcu_gp_state(state, ...) _wait_rcu_gp(false, state, __VA_ARGS__)
/**
* synchronize_rcu_mult - Wait concurrently for multiple grace periods
......@@ -54,7 +54,7 @@ do { \
* grace period.
*/
#define synchronize_rcu_mult(...) \
_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), TASK_UNINTERRUPTIBLE, __VA_ARGS__)
static inline void cond_resched_rcu(void)
{
......
......@@ -707,6 +707,33 @@ TRACE_EVENT_RCU(rcu_invoke_kfree_bulk_callback,
__entry->rcuname, __entry->p, __entry->nr_records)
);
/*
* Tracepoint for a normal synchronize_rcu() states. The first argument
* is the RCU flavor, the second argument is a pointer to rcu_head the
* last one is an event.
*/
TRACE_EVENT_RCU(rcu_sr_normal,
TP_PROTO(const char *rcuname, struct rcu_head *rhp, const char *srevent),
TP_ARGS(rcuname, rhp, srevent),
TP_STRUCT__entry(
__field(const char *, rcuname)
__field(void *, rhp)
__field(const char *, srevent)
),
TP_fast_assign(
__entry->rcuname = rcuname;
__entry->rhp = rhp;
__entry->srevent = srevent;
),
TP_printk("%s rhp=0x%p event=%s",
__entry->rcuname, __entry->rhp, __entry->srevent)
);
/*
* Tracepoint for exiting rcu_do_batch after RCU callbacks have been
* invoked. The first argument is the name of the RCU flavor,
......
......@@ -28,7 +28,7 @@ config BPF_SYSCALL
bool "Enable bpf() system call"
select BPF
select IRQ_WORK
select TASKS_RCU if PREEMPTION
select NEED_TASKS_RCU
select TASKS_TRACE_RCU
select BINARY_PRINTF
select NET_SOCK_MSG if NET
......
......@@ -333,7 +333,7 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im)
int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
NULL, im->ip_epilogue);
WARN_ON(err);
if (IS_ENABLED(CONFIG_PREEMPTION))
if (IS_ENABLED(CONFIG_TASKS_RCU))
call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
else
percpu_ref_kill(&im->pcref);
......
......@@ -522,12 +522,18 @@ static inline void show_rcu_tasks_gp_kthreads(void) {}
#ifdef CONFIG_TASKS_RCU
struct task_struct *get_rcu_tasks_gp_kthread(void);
void rcu_tasks_get_gp_data(int *flags, unsigned long *gp_seq);
#endif // # ifdef CONFIG_TASKS_RCU
#ifdef CONFIG_TASKS_RUDE_RCU
struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
void rcu_tasks_rude_get_gp_data(int *flags, unsigned long *gp_seq);
#endif // # ifdef CONFIG_TASKS_RUDE_RCU
#ifdef CONFIG_TASKS_TRACE_RCU
void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq);
#endif
#ifdef CONFIG_TASKS_RCU_GENERIC
void tasks_cblist_init_generic(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
......@@ -557,8 +563,7 @@ static inline void rcu_set_jiffies_lazy_flush(unsigned long j) { }
#endif
#if defined(CONFIG_TREE_RCU)
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
unsigned long *gp_seq);
void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq);
void do_trace_rcu_torture_read(const char *rcutorturename,
struct rcu_head *rhp,
unsigned long secs,
......@@ -566,8 +571,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
unsigned long c);
void rcu_gp_set_torture_wait(int duration);
#else
static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
int *flags, unsigned long *gp_seq)
static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
{
*flags = 0;
*gp_seq = 0;
......@@ -587,20 +591,16 @@ static inline void rcu_gp_set_torture_wait(int duration) { }
#ifdef CONFIG_TINY_SRCU
static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
struct srcu_struct *sp, int *flags,
static inline void srcutorture_get_gp_data(struct srcu_struct *sp, int *flags,
unsigned long *gp_seq)
{
if (test_type != SRCU_FLAVOR)
return;
*flags = 0;
*gp_seq = sp->srcu_idx;
}
#elif defined(CONFIG_TREE_SRCU)
void srcutorture_get_gp_data(enum rcutorture_type test_type,
struct srcu_struct *sp, int *flags,
void srcutorture_get_gp_data(struct srcu_struct *sp, int *flags,
unsigned long *gp_seq);
#endif
......
This diff is collapsed.
......@@ -1826,12 +1826,9 @@ static void process_srcu(struct work_struct *work)
srcu_reschedule(ssp, curdelay);
}
void srcutorture_get_gp_data(enum rcutorture_type test_type,
struct srcu_struct *ssp, int *flags,
void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags,
unsigned long *gp_seq)
{
if (test_type != SRCU_FLAVOR)
return;
*flags = 0;
*gp_seq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
}
......
......@@ -74,6 +74,7 @@ struct rcu_tasks_percpu {
* @holdouts_func: This flavor's holdout-list scan function (optional).
* @postgp_func: This flavor's post-grace-period function (optional).
* @call_func: This flavor's call_rcu()-equivalent function.
* @wait_state: Task state for synchronous grace-period waits (default TASK_UNINTERRUPTIBLE).
* @rtpcpu: This flavor's rcu_tasks_percpu structure.
* @percpu_enqueue_shift: Shift down CPU ID this much when enqueuing callbacks.
* @percpu_enqueue_lim: Number of per-CPU callback queues in use for enqueuing.
......@@ -107,6 +108,7 @@ struct rcu_tasks {
holdouts_func_t holdouts_func;
postgp_func_t postgp_func;
call_rcu_func_t call_func;
unsigned int wait_state;
struct rcu_tasks_percpu __percpu *rtpcpu;
int percpu_enqueue_shift;
int percpu_enqueue_lim;
......@@ -134,6 +136,7 @@ static struct rcu_tasks rt_name = \
.tasks_gp_mutex = __MUTEX_INITIALIZER(rt_name.tasks_gp_mutex), \
.gp_func = gp, \
.call_func = call, \
.wait_state = TASK_UNINTERRUPTIBLE, \
.rtpcpu = &rt_name ## __percpu, \
.lazy_jiffies = DIV_ROUND_UP(HZ, 4), \
.name = n, \
......@@ -147,7 +150,7 @@ static struct rcu_tasks rt_name = \
#ifdef CONFIG_TASKS_RCU
/* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
/* Report delay of scan exiting tasklist in rcu_tasks_postscan(). */
static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);
#endif
......@@ -638,7 +641,7 @@ static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
// If the grace-period kthread is running, use it.
if (READ_ONCE(rtp->kthread_ptr)) {
wait_rcu_gp(rtp->call_func);
wait_rcu_gp_state(rtp->wait_state, rtp->call_func);
return;
}
rcu_tasks_one_gp(rtp, true);
......@@ -1160,6 +1163,7 @@ static int __init rcu_spawn_tasks_kthread(void)
rcu_tasks.postscan_func = rcu_tasks_postscan;
rcu_tasks.holdouts_func = check_all_holdout_tasks;
rcu_tasks.postgp_func = rcu_tasks_postgp;
rcu_tasks.wait_state = TASK_IDLE;
rcu_spawn_tasks_kthread_generic(&rcu_tasks);
return 0;
}
......@@ -1178,6 +1182,13 @@ struct task_struct *get_rcu_tasks_gp_kthread(void)
}
EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread);
void rcu_tasks_get_gp_data(int *flags, unsigned long *gp_seq)
{
*flags = 0;
*gp_seq = rcu_seq_current(&rcu_tasks.tasks_gp_seq);
}
EXPORT_SYMBOL_GPL(rcu_tasks_get_gp_data);
/*
* Protect against tasklist scan blind spot while the task is exiting and
* may be removed from the tasklist. Do this by adding the task to yet
......@@ -1199,8 +1210,7 @@ void exit_tasks_rcu_start(void)
rtpcp = this_cpu_ptr(rcu_tasks.rtpcpu);
t->rcu_tasks_exit_cpu = smp_processor_id();
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
if (!rtpcp->rtp_exit_list.next)
INIT_LIST_HEAD(&rtpcp->rtp_exit_list);
WARN_ON_ONCE(!rtpcp->rtp_exit_list.next);
list_add(&t->rcu_tasks_exit_list, &rtpcp->rtp_exit_list);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
preempt_enable();
......@@ -1358,6 +1368,13 @@ struct task_struct *get_rcu_tasks_rude_gp_kthread(void)
}
EXPORT_SYMBOL_GPL(get_rcu_tasks_rude_gp_kthread);
void rcu_tasks_rude_get_gp_data(int *flags, unsigned long *gp_seq)
{
*flags = 0;
*gp_seq = rcu_seq_current(&rcu_tasks_rude.tasks_gp_seq);
}
EXPORT_SYMBOL_GPL(rcu_tasks_rude_get_gp_data);
#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
////////////////////////////////////////////////////////////////////////
......@@ -2002,7 +2019,7 @@ void show_rcu_tasks_trace_gp_kthread(void)
{
char buf[64];
sprintf(buf, "N%lu h:%lu/%lu/%lu",
snprintf(buf, sizeof(buf), "N%lu h:%lu/%lu/%lu",
data_race(n_trc_holdouts),
data_race(n_heavy_reader_ofl_updates),
data_race(n_heavy_reader_updates),
......@@ -2018,6 +2035,13 @@ struct task_struct *get_rcu_tasks_trace_gp_kthread(void)
}
EXPORT_SYMBOL_GPL(get_rcu_tasks_trace_gp_kthread);
void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq)
{
*flags = 0;
*gp_seq = rcu_seq_current(&rcu_tasks_trace.tasks_gp_seq);
}
EXPORT_SYMBOL_GPL(rcu_tasks_trace_get_gp_data);
#else /* #ifdef CONFIG_TASKS_TRACE_RCU */
static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
#endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
......
This diff is collapsed.
......@@ -315,6 +315,19 @@ do { \
__set_current_state(TASK_RUNNING); \
} while (0)
/*
* A max threshold for synchronize_rcu() users which are
* awaken directly by the rcu_gp_kthread(). Left part is
* deferred to the main worker.
*/
#define SR_MAX_USERS_WAKE_FROM_GP 5
#define SR_NORMAL_GP_WAIT_HEAD_MAX 5
struct sr_wait_node {
atomic_t inuse;
struct llist_node node;
};
/*
* RCU global state, including node hierarchy. This hierarchy is
* represented in "heap" form in a dense array. The root (first level)
......@@ -400,6 +413,13 @@ struct rcu_state {
/* Synchronize offline with */
/* GP pre-initialization. */
int nocb_is_setup; /* nocb is setup from boot */
/* synchronize_rcu() part. */
struct llist_head srs_next; /* request a GP users. */
struct llist_node *srs_wait_tail; /* wait for GP users. */
struct llist_node *srs_done_tail; /* ready for GP users. */
struct sr_wait_node srs_wait_nodes[SR_NORMAL_GP_WAIT_HEAD_MAX];
struct work_struct srs_cleanup_work;
};
/* Values for rcu_state structure's gp_flags field. */
......
......@@ -930,7 +930,7 @@ void synchronize_rcu_expedited(void)
/* If expedited grace periods are prohibited, fall back to normal. */
if (rcu_gp_is_normal()) {
wait_rcu_gp(call_rcu_hurry);
synchronize_rcu_normal();
return;
}
......
......@@ -408,7 +408,7 @@ void wakeme_after_rcu(struct rcu_head *head)
}
EXPORT_SYMBOL_GPL(wakeme_after_rcu);
void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
void __wait_rcu_gp(bool checktiny, unsigned int state, int n, call_rcu_func_t *crcu_array,
struct rcu_synchronize *rs_array)
{
int i;
......@@ -440,7 +440,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
if (crcu_array[j] == crcu_array[i])
break;
if (j == i) {
wait_for_completion(&rs_array[i].completion);
wait_for_completion_state(&rs_array[i].completion, state);
destroy_rcu_head_on_stack(&rs_array[i].head);
}
}
......
......@@ -163,7 +163,7 @@ config TRACING
select BINARY_PRINTF
select EVENT_TRACING
select TRACE_CLOCK
select TASKS_RCU if PREEMPTION
select NEED_TASKS_RCU
config GENERIC_TRACER
bool
......@@ -204,7 +204,7 @@ config FUNCTION_TRACER
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
select GLOB
select TASKS_RCU if PREEMPTION
select NEED_TASKS_RCU
select TASKS_RUDE_RCU
help
Enable the kernel to trace every kernel function. This is done
......
......@@ -3157,8 +3157,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
* synchronize_rcu_tasks() will wait for those tasks to
* execute and either schedule voluntarily or enter user space.
*/
if (IS_ENABLED(CONFIG_PREEMPTION))
synchronize_rcu_tasks();
synchronize_rcu_tasks();
ftrace_trampoline_free(ops);
}
......
......@@ -391,7 +391,7 @@ __EOF__
forceflavor="`echo $flavor | sed -e 's/^CONFIG/CONFIG_FORCE/'`"
deselectedflavors="`grep -v $flavor $T/rcutasksflavors | tr '\012' ' ' | tr -s ' ' | sed -e 's/ *$//'`"
echo " --- Running RCU Tasks Trace flavor $flavor `date`" >> $rtfdir/log
tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y CONFIG_KPROBES=n CONFIG_RCU_TRACE=n CONFIG_TRACING=n CONFIG_BLK_DEV_IO_TRACE=n CONFIG_UPROBE_EVENTS=n $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
retcode=$?
if test "$retcode" -ne 0
then
......@@ -425,7 +425,7 @@ fi
if test "$do_scftorture" = "yes"
then
# Scale memory based on the number of CPUs.
scfmem=$((2+HALF_ALLOTED_CPUS/16))
scfmem=$((3+HALF_ALLOTED_CPUS/16))
torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make
fi
......@@ -559,7 +559,7 @@ do_kcsan="$do_kcsan_save"
if test "$do_kvfree" = "yes"
then
torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration $duration_rcutorture --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
fi
if test "$do_clocksourcewd" = "yes"
......
......@@ -10,8 +10,9 @@ CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_RCU_BOOST=n
CONFIG_RCU_BOOST=y
CONFIG_RCU_BOOST_DELAY=100
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
#CHECK#CONFIG_RCU_EXPERT=n
CONFIG_RCU_EXPERT=y
CONFIG_KPROBES=n
CONFIG_FTRACE=n
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment