Commit 8e1704b6 authored by Paul E. McKenney's avatar Paul E. McKenney

Merge branches 'doc.2023.01.05a', 'fixes.2023.01.23a', 'kvfree.2023.01.03a',...

Merge branches 'doc.2023.01.05a', 'fixes.2023.01.23a', 'kvfree.2023.01.03a', 'srcu.2023.01.03a', 'srcu-always.2023.02.02a', 'tasks.2023.01.03a', 'torture.2023.01.05a' and 'torturescript.2023.01.03a' into HEAD

doc.2023.01.05a: Documentation update.
fixes.2023.01.23a: Miscellaneous fixes.
kvfree.2023.01.03a: kvfree_rcu() updates.
srcu.2023.01.03a: SRCU updates.
srcu-always.2023.02.02a: Finish making SRCU be unconditionally available.
tasks.2023.01.03a: Tasks-RCU updates.
torture.2023.01.05a: Torture-test updates.
torturescript.2023.01.03a: Torture-test scripting updates.
......@@ -5113,6 +5113,11 @@
rcupdate.rcu_cpu_stall_timeout to be used (after
conversion from seconds to milliseconds).
rcupdate.rcu_exp_stall_task_details= [KNL]
Print stack dumps of any tasks blocking the
current expedited RCU grace period during an
expedited RCU CPU stall warning.
rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for
example, synchronize_rcu_expedited() instead
......
......@@ -181,7 +181,6 @@ void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers);
#ifdef CONFIG_SRCU
static DEFINE_MUTEX(device_links_lock);
DEFINE_STATIC_SRCU(device_links_srcu);
......@@ -220,47 +219,6 @@ static void device_link_remove_from_lists(struct device_link *link)
list_del_rcu(&link->s_node);
list_del_rcu(&link->c_node);
}
#else /* !CONFIG_SRCU */
static DECLARE_RWSEM(device_links_lock);
static inline void device_links_write_lock(void)
{
down_write(&device_links_lock);
}
static inline void device_links_write_unlock(void)
{
up_write(&device_links_lock);
}
int device_links_read_lock(void)
{
down_read(&device_links_lock);
return 0;
}
void device_links_read_unlock(int not_used)
{
up_read(&device_links_lock);
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int device_links_read_lock_held(void)
{
return lockdep_is_held(&device_links_lock);
}
#endif
static inline void device_link_synchronize_removal(void)
{
}
static void device_link_remove_from_lists(struct device_link *link)
{
list_del(&link->s_node);
list_del(&link->c_node);
}
#endif /* !CONFIG_SRCU */
static bool device_is_ancestor(struct device *dev, struct device *target)
{
......
# SPDX-License-Identifier: GPL-2.0-only
menuconfig DAX
tristate "DAX: direct access to differentiated memory"
select SRCU
default m if NVDIMM_DAX
if DAX
......
......@@ -2,7 +2,6 @@
config STM
tristate "System Trace Module devices"
select CONFIGFS_FS
select SRCU
help
A System Trace Module (STM) is a device exporting data in System
Trace Protocol (STP) format as defined by MIPI STP standards.
......
......@@ -6,7 +6,6 @@
menuconfig MD
bool "Multiple devices driver support (RAID and LVM)"
depends on BLOCK
select SRCU
help
Support multiple physical spindles through a single logical device.
Required for RAID and logical volume management.
......
......@@ -334,7 +334,6 @@ config NETCONSOLE_DYNAMIC
config NETPOLL
def_bool NETCONSOLE
select SRCU
config NET_POLL_CONTROLLER
def_bool NETPOLL
......
......@@ -258,7 +258,7 @@ config PCIE_MEDIATEK_GEN3
MediaTek SoCs.
config VMD
depends on PCI_MSI && X86_64 && SRCU && !UML
depends on PCI_MSI && X86_64 && !UML
tristate "Intel Volume Management Device Driver"
help
Adds support for the Intel Volume Management Device (VMD). VMD is a
......
......@@ -17,7 +17,6 @@ config BTRFS_FS
select FS_IOMAP
select RAID6_PQ
select XOR_BLOCKS
select SRCU
depends on PAGE_SIZE_LESS_THAN_256KB
help
......
......@@ -1889,7 +1889,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
}
EXPORT_SYMBOL(generic_setlease);
#if IS_ENABLED(CONFIG_SRCU)
/*
* Kernel subsystems can register to be notified on any attempt to set
* a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
......@@ -1923,30 +1922,6 @@ void lease_unregister_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
#else /* !IS_ENABLED(CONFIG_SRCU) */
static inline void
lease_notifier_chain_init(void)
{
}
static inline void
setlease_notifier(long arg, struct file_lock *lease)
{
}
int lease_register_notifier(struct notifier_block *nb)
{
return 0;
}
EXPORT_SYMBOL_GPL(lease_register_notifier);
void lease_unregister_notifier(struct notifier_block *nb)
{
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
#endif /* IS_ENABLED(CONFIG_SRCU) */
/**
* vfs_setlease - sets a lease on an open file
* @filp: file pointer
......
# SPDX-License-Identifier: GPL-2.0-only
config FSNOTIFY
def_bool n
select SRCU
source "fs/notify/dnotify/Kconfig"
source "fs/notify/inotify/Kconfig"
......
......@@ -6,7 +6,6 @@
config QUOTA
bool "Quota support"
select QUOTACTL
select SRCU
help
If you say Y here, you will be able to set per user limits for disk
usage (also called disk quotas). Currently, it works for the
......
......@@ -139,7 +139,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
if (last) {
n->next = last->next;
n->pprev = &last->next;
rcu_assign_pointer(hlist_next_rcu(last), n);
rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
} else {
hlist_nulls_add_head_rcu(n, h);
}
......
......@@ -238,6 +238,7 @@ void synchronize_rcu_tasks_rude(void);
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
void exit_tasks_rcu_start(void);
void exit_tasks_rcu_stop(void);
void exit_tasks_rcu_finish(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
#define rcu_tasks_classic_qs(t, preempt) do { } while (0)
......@@ -246,6 +247,7 @@ void exit_tasks_rcu_finish(void);
#define call_rcu_tasks call_rcu
#define synchronize_rcu_tasks synchronize_rcu
static inline void exit_tasks_rcu_start(void) { }
static inline void exit_tasks_rcu_stop(void) { }
static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
......@@ -374,11 +376,18 @@ static inline int debug_lockdep_rcu_enabled(void)
* RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
* @c: condition to check
* @s: informative message
*
* This checks debug_lockdep_rcu_enabled() before checking (c) to
* prevent early boot splats due to lockdep not yet being initialized,
* and rechecks it after checking (c) to prevent false-positive splats
* due to races with lockdep being disabled. See commit 3066820034b5dd
* ("rcu: Reject RCU_LOCKDEP_WARN() false positives") for more detail.
*/
#define RCU_LOCKDEP_WARN(c, s) \
do { \
static bool __section(".data.unlikely") __warned; \
if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \
if (debug_lockdep_rcu_enabled() && (c) && \
debug_lockdep_rcu_enabled() && !__warned) { \
__warned = true; \
lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
} \
......@@ -1004,6 +1013,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \
kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__)
#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
#define kfree_rcu_mightsleep(ptr) kvfree_rcu_mightsleep(ptr)
#define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME
#define kvfree_rcu_arg_2(ptr, rhf) \
do { \
......@@ -1011,8 +1023,7 @@ do { \
\
if (___p) { \
BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \
kvfree_call_rcu(&((___p)->rhf), (rcu_callback_t)(unsigned long) \
(offsetof(typeof(*(ptr)), rhf))); \
kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
} \
} while (0)
......@@ -1021,7 +1032,7 @@ do { \
typeof(ptr) ___p = (ptr); \
\
if (___p) \
kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \
kvfree_call_rcu(NULL, (void *) (___p)); \
} while (0)
/*
......
......@@ -98,25 +98,25 @@ static inline void synchronize_rcu_expedited(void)
*/
extern void kvfree(const void *addr);
static inline void __kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
if (head) {
call_rcu(head, func);
call_rcu(head, (rcu_callback_t) ((void *) head - ptr));
return;
}
// kvfree_rcu(one_arg) call.
might_sleep();
synchronize_rcu();
kvfree((void *) func);
kvfree(ptr);
}
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
#else
static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
__kvfree_call_rcu(head, func);
__kvfree_call_rcu(head, ptr);
}
#endif
......
......@@ -33,7 +33,7 @@ static inline void rcu_virt_note_context_switch(void)
}
void synchronize_rcu_expedited(void);
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
void rcu_barrier(void);
bool rcu_eqs_special_set(int cpu);
......
......@@ -214,6 +214,34 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
return retval;
}
/**
* srcu_down_read - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
*
* Enter a semaphore-like SRCU read-side critical section. Note that
* SRCU read-side critical sections may be nested. However, it is
* illegal to call anything that waits on an SRCU grace period for the
* same srcu_struct, whether directly or indirectly. Please note that
* one way to indirectly wait on an SRCU grace period is to acquire
* a mutex that is held elsewhere while calling synchronize_srcu() or
* synchronize_srcu_expedited(). But if you want lockdep to help you
* keep this stuff straight, you should instead use srcu_read_lock().
*
* The semaphore-like nature of srcu_down_read() means that the matching
* srcu_up_read() can be invoked from some other context, for example,
* from some other task or from an irq handler. However, neither
* srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler.
*
* Calls to srcu_down_read() may be nested, similar to the manner in
* which calls to down_read() may be nested.
*/
static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
{
WARN_ON_ONCE(in_nmi());
srcu_check_nmi_safety(ssp, false);
return __srcu_read_lock(ssp);
}
/**
* srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
......@@ -254,6 +282,23 @@ srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
__srcu_read_unlock(ssp, idx);
}
/**
* srcu_up_read - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
* @idx: return value from corresponding srcu_read_lock().
*
* Exit an SRCU read-side critical section, but not necessarily from
* the same context as the maching srcu_down_read().
*/
static inline void srcu_up_read(struct srcu_struct *ssp, int idx)
__releases(ssp)
{
WARN_ON_ONCE(idx & ~0x1);
WARN_ON_ONCE(in_nmi());
srcu_check_nmi_safety(ssp, false);
__srcu_read_unlock(ssp, idx);
}
/**
* smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
*
......
......@@ -49,7 +49,7 @@ struct srcu_data {
struct srcu_node {
spinlock_t __private lock;
unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */
/* if greater than ->srcu_gq_seq. */
/* if greater than ->srcu_gp_seq. */
unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */
unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
struct srcu_node *srcu_parent; /* Next up in tree. */
......
......@@ -1865,7 +1865,6 @@ config PERF_EVENTS
default y if PROFILING
depends on HAVE_PERF_EVENTS
select IRQ_WORK
select SRCU
help
Enable kernel support for various performance events provided
by software and hardware.
......
......@@ -46,6 +46,9 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
torture_param(int, stat_interval, 60,
"Number of seconds between stats printk()s");
torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
torture_param(int, rt_boost, 2,
"Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
torture_param(int, verbose, 1,
"Enable verbose debugging printk()s");
......@@ -127,15 +130,50 @@ static void torture_lock_busted_write_unlock(int tid __maybe_unused)
/* BUGGY, do not use in real life!!! */
}
static void torture_boost_dummy(struct torture_random_state *trsp)
static void __torture_rt_boost(struct torture_random_state *trsp)
{
/* Only rtmutexes care about priority */
const unsigned int factor = rt_boost_factor;
if (!rt_task(current)) {
/*
* Boost priority once every rt_boost_factor operations. When
* the task tries to take the lock, the rtmutex it will account
* for the new priority, and do any corresponding pi-dance.
*/
if (trsp && !(torture_random(trsp) %
(cxt.nrealwriters_stress * factor))) {
sched_set_fifo(current);
} else /* common case, do nothing */
return;
} else {
/*
* The task will remain boosted for another 10 * rt_boost_factor
* operations, then restored back to its original prio, and so
* forth.
*
* When @trsp is nil, we want to force-reset the task for
* stopping the kthread.
*/
if (!trsp || !(torture_random(trsp) %
(cxt.nrealwriters_stress * factor * 2))) {
sched_set_normal(current, 0);
} else /* common case, do nothing */
return;
}
}
static void torture_rt_boost(struct torture_random_state *trsp)
{
if (rt_boost != 2)
return;
__torture_rt_boost(trsp);
}
static struct lock_torture_ops lock_busted_ops = {
.writelock = torture_lock_busted_write_lock,
.write_delay = torture_lock_busted_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_lock_busted_write_unlock,
.readlock = NULL,
.read_delay = NULL,
......@@ -179,7 +217,7 @@ __releases(torture_spinlock)
static struct lock_torture_ops spin_lock_ops = {
.writelock = torture_spin_lock_write_lock,
.write_delay = torture_spin_lock_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_spin_lock_write_unlock,
.readlock = NULL,
.read_delay = NULL,
......@@ -206,7 +244,7 @@ __releases(torture_spinlock)
static struct lock_torture_ops spin_lock_irq_ops = {
.writelock = torture_spin_lock_write_lock_irq,
.write_delay = torture_spin_lock_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_lock_spin_write_unlock_irq,
.readlock = NULL,
.read_delay = NULL,
......@@ -275,7 +313,7 @@ __releases(torture_rwlock)
static struct lock_torture_ops rw_lock_ops = {
.writelock = torture_rwlock_write_lock,
.write_delay = torture_rwlock_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_rwlock_write_unlock,
.readlock = torture_rwlock_read_lock,
.read_delay = torture_rwlock_read_delay,
......@@ -318,7 +356,7 @@ __releases(torture_rwlock)
static struct lock_torture_ops rw_lock_irq_ops = {
.writelock = torture_rwlock_write_lock_irq,
.write_delay = torture_rwlock_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_rwlock_write_unlock_irq,
.readlock = torture_rwlock_read_lock_irq,
.read_delay = torture_rwlock_read_delay,
......@@ -358,7 +396,7 @@ __releases(torture_mutex)
static struct lock_torture_ops mutex_lock_ops = {
.writelock = torture_mutex_lock,
.write_delay = torture_mutex_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_mutex_unlock,
.readlock = NULL,
.read_delay = NULL,
......@@ -456,7 +494,7 @@ static struct lock_torture_ops ww_mutex_lock_ops = {
.exit = torture_ww_mutex_exit,
.writelock = torture_ww_mutex_lock,
.write_delay = torture_mutex_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_ww_mutex_unlock,
.readlock = NULL,
.read_delay = NULL,
......@@ -474,37 +512,6 @@ __acquires(torture_rtmutex)
return 0;
}
static void torture_rtmutex_boost(struct torture_random_state *trsp)
{
const unsigned int factor = 50000; /* yes, quite arbitrary */
if (!rt_task(current)) {
/*
* Boost priority once every ~50k operations. When the
* task tries to take the lock, the rtmutex it will account
* for the new priority, and do any corresponding pi-dance.
*/
if (trsp && !(torture_random(trsp) %
(cxt.nrealwriters_stress * factor))) {
sched_set_fifo(current);
} else /* common case, do nothing */
return;
} else {
/*
* The task will remain boosted for another ~500k operations,
* then restored back to its original prio, and so forth.
*
* When @trsp is nil, we want to force-reset the task for
* stopping the kthread.
*/
if (!trsp || !(torture_random(trsp) %
(cxt.nrealwriters_stress * factor * 2))) {
sched_set_normal(current, 0);
} else /* common case, do nothing */
return;
}
}
static void torture_rtmutex_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
......@@ -530,10 +537,18 @@ __releases(torture_rtmutex)
rt_mutex_unlock(&torture_rtmutex);
}
static void torture_rt_boost_rtmutex(struct torture_random_state *trsp)
{
if (!rt_boost)
return;
__torture_rt_boost(trsp);
}
static struct lock_torture_ops rtmutex_lock_ops = {
.writelock = torture_rtmutex_lock,
.write_delay = torture_rtmutex_delay,
.task_boost = torture_rtmutex_boost,
.task_boost = torture_rt_boost_rtmutex,
.writeunlock = torture_rtmutex_unlock,
.readlock = NULL,
.read_delay = NULL,
......@@ -600,7 +615,7 @@ __releases(torture_rwsem)
static struct lock_torture_ops rwsem_lock_ops = {
.writelock = torture_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_rwsem_up_write,
.readlock = torture_rwsem_down_read,
.read_delay = torture_rwsem_read_delay,
......@@ -652,7 +667,7 @@ static struct lock_torture_ops percpu_rwsem_lock_ops = {
.exit = torture_percpu_rwsem_exit,
.writelock = torture_percpu_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_percpu_rwsem_up_write,
.readlock = torture_percpu_rwsem_down_read,
.read_delay = torture_rwsem_read_delay,
......
......@@ -456,7 +456,6 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh,
}
EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
#ifdef CONFIG_SRCU
/*
* SRCU notifier chain routines. Registration and unregistration
* use a mutex, and call_chain is synchronized by SRCU (no locks).
......@@ -573,8 +572,6 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh)
}
EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
#endif /* CONFIG_SRCU */
static ATOMIC_NOTIFIER_HEAD(die_chain);
int notrace notify_die(enum die_val val, const char *str,
......
......@@ -244,7 +244,24 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
set_current_state(TASK_INTERRUPTIBLE);
if (pid_ns->pid_allocated == init_pids)
break;
/*
* Release tasks_rcu_exit_srcu to avoid following deadlock:
*
* 1) TASK A unshare(CLONE_NEWPID)
* 2) TASK A fork() twice -> TASK B (child reaper for new ns)
* and TASK C
* 3) TASK B exits, kills TASK C, waits for TASK A to reap it
* 4) TASK A calls synchronize_rcu_tasks()
* -> synchronize_srcu(tasks_rcu_exit_srcu)
* 5) *DEADLOCK*
*
* It is considered safe to release tasks_rcu_exit_srcu here
* because we assume the current task can not be concurrently
* reaped at this point.
*/
exit_tasks_rcu_stop();
schedule();
exit_tasks_rcu_start();
}
__set_current_state(TASK_RUNNING);
......
......@@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
extern int rcu_exp_cpu_stall_timeout;
extern bool rcu_exp_stall_task_details __read_mostly;
int rcu_jiffies_till_stall_check(void);
int rcu_exp_jiffies_till_stall_check(void);
......@@ -447,14 +448,20 @@ do { \
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
static inline bool rcu_gp_is_normal(void) { return true; }
static inline bool rcu_gp_is_expedited(void) { return false; }
static inline bool rcu_async_should_hurry(void) { return false; }
static inline void rcu_expedite_gp(void) { }
static inline void rcu_unexpedite_gp(void) { }
static inline void rcu_async_hurry(void) { }
static inline void rcu_async_relax(void) { }
static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
#else /* #ifdef CONFIG_TINY_RCU */
bool rcu_gp_is_normal(void); /* Internal RCU use. */
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
bool rcu_async_should_hurry(void); /* Internal RCU use. */
void rcu_expedite_gp(void);
void rcu_unexpedite_gp(void);
void rcu_async_hurry(void);
void rcu_async_relax(void);
void rcupdate_announce_bootup_oddness(void);
#ifdef CONFIG_TASKS_RCU_GENERIC
void show_rcu_tasks_gp_kthreads(void);
......
......@@ -89,7 +89,7 @@ static void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
}
/* Get the length of a segment of the rcu_segcblist structure. */
static long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg)
long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg)
{
return READ_ONCE(rsclp->seglen[seg]);
}
......
......@@ -15,6 +15,8 @@ static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
return READ_ONCE(rclp->len);
}
long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg);
/* Return number of callbacks in segmented callback list by summing seglen. */
long rcu_segcblist_n_segment_cbs(struct rcu_segcblist *rsclp);
......
......@@ -399,7 +399,7 @@ static int torture_readlock_not_held(void)
return rcu_read_lock_bh_held() || rcu_read_lock_sched_held();
}
static int rcu_torture_read_lock(void) __acquires(RCU)
static int rcu_torture_read_lock(void)
{
rcu_read_lock();
return 0;
......@@ -441,7 +441,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
}
}
static void rcu_torture_read_unlock(int idx) __releases(RCU)
static void rcu_torture_read_unlock(int idx)
{
rcu_read_unlock();
}
......@@ -625,7 +625,7 @@ static struct srcu_struct srcu_ctld;
static struct srcu_struct *srcu_ctlp = &srcu_ctl;
static struct rcu_torture_ops srcud_ops;
static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
static int srcu_torture_read_lock(void)
{
if (cur_ops == &srcud_ops)
return srcu_read_lock_nmisafe(srcu_ctlp);
......@@ -652,7 +652,7 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
}
}
static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
static void srcu_torture_read_unlock(int idx)
{
if (cur_ops == &srcud_ops)
srcu_read_unlock_nmisafe(srcu_ctlp, idx);
......@@ -814,13 +814,13 @@ static void synchronize_rcu_trivial(void)
}
}
static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
static int rcu_torture_read_lock_trivial(void)
{
preempt_disable();
return 0;
}
static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
static void rcu_torture_read_unlock_trivial(int idx)
{
preempt_enable();
}
......
......@@ -76,6 +76,8 @@ torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s");
// Wait until there are multiple CPUs before starting test.
torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
"Holdoff time before test start (s)");
// Number of typesafe_lookup structures, that is, the degree of concurrency.
torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures.");
// Number of loops per experiment, all readers execute operations concurrently.
torture_param(long, loops, 10000, "Number of loops per experiment.");
// Number of readers, with -1 defaulting to about 75% of the CPUs.
......@@ -124,7 +126,7 @@ static int exp_idx;
// Operations vector for selecting different types of tests.
struct ref_scale_ops {
void (*init)(void);
bool (*init)(void);
void (*cleanup)(void);
void (*readsection)(const int nloops);
void (*delaysection)(const int nloops, const int udl, const int ndl);
......@@ -162,8 +164,9 @@ static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl
}
}
static void rcu_sync_scale_init(void)
static bool rcu_sync_scale_init(void)
{
return true;
}
static struct ref_scale_ops rcu_ops = {
......@@ -315,9 +318,10 @@ static struct ref_scale_ops refcnt_ops = {
// Definitions for rwlock
static rwlock_t test_rwlock;
static void ref_rwlock_init(void)
static bool ref_rwlock_init(void)
{
rwlock_init(&test_rwlock);
return true;
}
static void ref_rwlock_section(const int nloops)
......@@ -351,9 +355,10 @@ static struct ref_scale_ops rwlock_ops = {
// Definitions for rwsem
static struct rw_semaphore test_rwsem;
static void ref_rwsem_init(void)
static bool ref_rwsem_init(void)
{
init_rwsem(&test_rwsem);
return true;
}
static void ref_rwsem_section(const int nloops)
......@@ -523,6 +528,237 @@ static struct ref_scale_ops clock_ops = {
.name = "clock"
};
////////////////////////////////////////////////////////////////////////
//
// Methods leveraging SLAB_TYPESAFE_BY_RCU.
//
// Item to look up in a typesafe manner. Array of pointers to these.
struct refscale_typesafe {
atomic_t rts_refctr; // Used by all flavors
spinlock_t rts_lock;
seqlock_t rts_seqlock;
unsigned int a;
unsigned int b;
};
static struct kmem_cache *typesafe_kmem_cachep;
static struct refscale_typesafe **rtsarray;
static long rtsarray_size;
static DEFINE_TORTURE_RANDOM_PERCPU(refscale_rand);
static bool (*rts_acquire)(struct refscale_typesafe *rtsp, unsigned int *start);
static bool (*rts_release)(struct refscale_typesafe *rtsp, unsigned int start);
// Conditionally acquire an explicit in-structure reference count.
static bool typesafe_ref_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
{
return atomic_inc_not_zero(&rtsp->rts_refctr);
}
// Unconditionally release an explicit in-structure reference count.
static bool typesafe_ref_release(struct refscale_typesafe *rtsp, unsigned int start)
{
if (!atomic_dec_return(&rtsp->rts_refctr)) {
WRITE_ONCE(rtsp->a, rtsp->a + 1);
kmem_cache_free(typesafe_kmem_cachep, rtsp);
}
return true;
}
// Unconditionally acquire an explicit in-structure spinlock.
static bool typesafe_lock_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
{
spin_lock(&rtsp->rts_lock);
return true;
}
// Unconditionally release an explicit in-structure spinlock.
static bool typesafe_lock_release(struct refscale_typesafe *rtsp, unsigned int start)
{
spin_unlock(&rtsp->rts_lock);
return true;
}
// Unconditionally acquire an explicit in-structure sequence lock.
static bool typesafe_seqlock_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
{
*start = read_seqbegin(&rtsp->rts_seqlock);
return true;
}
// Conditionally release an explicit in-structure sequence lock. Return
// true if this release was successful, that is, if no retry is required.
static bool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsigned int start)
{
return !read_seqretry(&rtsp->rts_seqlock, start);
}
// Do a read-side critical section with the specified delay in
// microseconds and nanoseconds inserted so as to increase probability
// of failure.
static void typesafe_delay_section(const int nloops, const int udl, const int ndl)
{
unsigned int a;
unsigned int b;
int i;
long idx;
struct refscale_typesafe *rtsp;
unsigned int start;
for (i = nloops; i >= 0; i--) {
preempt_disable();
idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size;
preempt_enable();
retry:
rcu_read_lock();
rtsp = rcu_dereference(rtsarray[idx]);
a = READ_ONCE(rtsp->a);
if (!rts_acquire(rtsp, &start)) {
rcu_read_unlock();
goto retry;
}
if (a != READ_ONCE(rtsp->a)) {
(void)rts_release(rtsp, start);
rcu_read_unlock();
goto retry;
}
un_delay(udl, ndl);
// Remember, seqlock read-side release can fail.
if (!rts_release(rtsp, start)) {
rcu_read_unlock();
goto retry;
}
b = READ_ONCE(rtsp->a);
WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b);
b = rtsp->b;
rcu_read_unlock();
WARN_ON_ONCE(a * a != b);
}
}
// Because the acquisition and release methods are expensive, there
// is no point in optimizing away the un_delay() function's two checks.
// Thus simply define typesafe_read_section() as a simple wrapper around
// typesafe_delay_section().
static void typesafe_read_section(const int nloops)
{
typesafe_delay_section(nloops, 0, 0);
}
// Allocate and initialize one refscale_typesafe structure.
static struct refscale_typesafe *typesafe_alloc_one(void)
{
struct refscale_typesafe *rtsp;
rtsp = kmem_cache_alloc(typesafe_kmem_cachep, GFP_KERNEL);
if (!rtsp)
return NULL;
atomic_set(&rtsp->rts_refctr, 1);
WRITE_ONCE(rtsp->a, rtsp->a + 1);
WRITE_ONCE(rtsp->b, rtsp->a * rtsp->a);
return rtsp;
}
// Slab-allocator constructor for refscale_typesafe structures created
// out of a new slab of system memory.
static void refscale_typesafe_ctor(void *rtsp_in)
{
struct refscale_typesafe *rtsp = rtsp_in;
spin_lock_init(&rtsp->rts_lock);
seqlock_init(&rtsp->rts_seqlock);
preempt_disable();
rtsp->a = torture_random(this_cpu_ptr(&refscale_rand));
preempt_enable();
}
static struct ref_scale_ops typesafe_ref_ops;
static struct ref_scale_ops typesafe_lock_ops;
static struct ref_scale_ops typesafe_seqlock_ops;
// Initialize for a typesafe test.
static bool typesafe_init(void)
{
long idx;
long si = lookup_instances;
typesafe_kmem_cachep = kmem_cache_create("refscale_typesafe",
sizeof(struct refscale_typesafe), sizeof(void *),
SLAB_TYPESAFE_BY_RCU, refscale_typesafe_ctor);
if (!typesafe_kmem_cachep)
return false;
if (si < 0)
si = -si * nr_cpu_ids;
else if (si == 0)
si = nr_cpu_ids;
rtsarray_size = si;
rtsarray = kcalloc(si, sizeof(*rtsarray), GFP_KERNEL);
if (!rtsarray)
return false;
for (idx = 0; idx < rtsarray_size; idx++) {
rtsarray[idx] = typesafe_alloc_one();
if (!rtsarray[idx])
return false;
}
if (cur_ops == &typesafe_ref_ops) {
rts_acquire = typesafe_ref_acquire;
rts_release = typesafe_ref_release;
} else if (cur_ops == &typesafe_lock_ops) {
rts_acquire = typesafe_lock_acquire;
rts_release = typesafe_lock_release;
} else if (cur_ops == &typesafe_seqlock_ops) {
rts_acquire = typesafe_seqlock_acquire;
rts_release = typesafe_seqlock_release;
} else {
WARN_ON_ONCE(1);
return false;
}
return true;
}
// Clean up after a typesafe test.
static void typesafe_cleanup(void)
{
long idx;
if (rtsarray) {
for (idx = 0; idx < rtsarray_size; idx++)
kmem_cache_free(typesafe_kmem_cachep, rtsarray[idx]);
kfree(rtsarray);
rtsarray = NULL;
rtsarray_size = 0;
}
kmem_cache_destroy(typesafe_kmem_cachep);
typesafe_kmem_cachep = NULL;
rts_acquire = NULL;
rts_release = NULL;
}
// The typesafe_init() function distinguishes these structures by address.
static struct ref_scale_ops typesafe_ref_ops = {
.init = typesafe_init,
.cleanup = typesafe_cleanup,
.readsection = typesafe_read_section,
.delaysection = typesafe_delay_section,
.name = "typesafe_ref"
};
static struct ref_scale_ops typesafe_lock_ops = {
.init = typesafe_init,
.cleanup = typesafe_cleanup,
.readsection = typesafe_read_section,
.delaysection = typesafe_delay_section,
.name = "typesafe_lock"
};
static struct ref_scale_ops typesafe_seqlock_ops = {
.init = typesafe_init,
.cleanup = typesafe_cleanup,
.readsection = typesafe_read_section,
.delaysection = typesafe_delay_section,
.name = "typesafe_seqlock"
};
static void rcu_scale_one_reader(void)
{
if (readdelay <= 0)
......@@ -812,6 +1048,7 @@ ref_scale_init(void)
static struct ref_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
&typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
};
if (!torture_init_begin(scale_type, verbose))
......@@ -833,7 +1070,10 @@ ref_scale_init(void)
goto unwind;
}
if (cur_ops->init)
cur_ops->init();
if (!cur_ops->init()) {
firsterr = -EUCLEAN;
goto unwind;
}
ref_scale_print_module_parms(cur_ops, "Start of test");
......
......@@ -154,7 +154,7 @@ static void init_srcu_struct_data(struct srcu_struct *ssp)
*/
static inline bool srcu_invl_snp_seq(unsigned long s)
{
return rcu_seq_state(s) == SRCU_SNP_INIT_SEQ;
return s == SRCU_SNP_INIT_SEQ;
}
/*
......@@ -469,24 +469,59 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
/*
* If the locks are the same as the unlocks, then there must have
* been no readers on this index at some time in between. This does
* not mean that there are no more readers, as one could have read
* the current index but not have incremented the lock counter yet.
* been no readers on this index at some point in this function.
* But there might be more readers, as a task might have read
* the current ->srcu_idx but not yet have incremented its CPU's
* ->srcu_lock_count[idx] counter. In fact, it is possible
* that most of the tasks have been preempted between fetching
* ->srcu_idx and incrementing ->srcu_lock_count[idx]. And there
* could be almost (ULONG_MAX / sizeof(struct task_struct)) tasks
* in a system whose address space was fully populated with memory.
* Call this quantity Nt.
*
* So suppose that the updater is preempted here for so long
* that more than ULONG_MAX non-nested readers come and go in
* the meantime. It turns out that this cannot result in overflow
* because if a reader modifies its unlock count after we read it
* above, then that reader's next load of ->srcu_idx is guaranteed
* to get the new value, which will cause it to operate on the
* other bank of counters, where it cannot contribute to the
* overflow of these counters. This means that there is a maximum
* of 2*NR_CPUS increments, which cannot overflow given current
* systems, especially not on 64-bit systems.
* So suppose that the updater is preempted at this point in the
* code for a long time. That now-preempted updater has already
* flipped ->srcu_idx (possibly during the preceding grace period),
* done an smp_mb() (again, possibly during the preceding grace
* period), and summed up the ->srcu_unlock_count[idx] counters.
* How many times can a given one of the aforementioned Nt tasks
* increment the old ->srcu_idx value's ->srcu_lock_count[idx]
* counter, in the absence of nesting?
*
* OK, how about nesting? This does impose a limit on nesting
* of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
* especially on 64-bit systems.
* It can clearly do so once, given that it has already fetched
* the old value of ->srcu_idx and is just about to use that value
* to index its increment of ->srcu_lock_count[idx]. But as soon as
* it leaves that SRCU read-side critical section, it will increment
* ->srcu_unlock_count[idx], which must follow the updater's above
* read from that same value. Thus, as soon the reading task does
* an smp_mb() and a later fetch from ->srcu_idx, that task will be
* guaranteed to get the new index. Except that the increment of
* ->srcu_unlock_count[idx] in __srcu_read_unlock() is after the
* smp_mb(), and the fetch from ->srcu_idx in __srcu_read_lock()
* is before the smp_mb(). Thus, that task might not see the new
* value of ->srcu_idx until the -second- __srcu_read_lock(),
* which in turn means that this task might well increment
* ->srcu_lock_count[idx] for the old value of ->srcu_idx twice,
* not just once.
*
* However, it is important to note that a given smp_mb() takes
* effect not just for the task executing it, but also for any
* later task running on that same CPU.
*
* That is, there can be almost Nt + Nc further increments of
* ->srcu_lock_count[idx] for the old index, where Nc is the number
* of CPUs. But this is OK because the size of the task_struct
* structure limits the value of Nt and current systems limit Nc
* to a few thousand.
*
* OK, but what about nesting? This does impose a limit on
* nesting of half of the size of the task_struct structure
* (measured in bytes), which should be sufficient. A late 2022
* TREE01 rcutorture run reported this size to be no less than
* 9408 bytes, allowing up to 4704 levels of nesting, which is
* comfortably beyond excessive. Especially on 64-bit systems,
* which are unlikely to be configured with an address space fully
* populated with memory, at least not anytime soon.
*/
return srcu_readers_lock_idx(ssp, idx) == unlocks;
}
......@@ -726,7 +761,7 @@ static void srcu_gp_start(struct srcu_struct *ssp)
int state;
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
sdp = per_cpu_ptr(ssp->sda, 0);
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
else
sdp = this_cpu_ptr(ssp->sda);
lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
......@@ -837,7 +872,8 @@ static void srcu_gp_end(struct srcu_struct *ssp)
/* Initiate callback invocation as needed. */
ss_state = smp_load_acquire(&ssp->srcu_size_state);
if (ss_state < SRCU_SIZE_WAIT_BARRIER) {
srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, 0), cbdelay);
srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()),
cbdelay);
} else {
idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
srcu_for_each_node_breadth_first(ssp, snp) {
......@@ -914,7 +950,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
if (snp)
for (; snp != NULL; snp = snp->srcu_parent) {
sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) ||
(!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
return;
spin_lock_irqsave_rcu_node(snp, flags);
......@@ -941,6 +977,9 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
*
* Note that this function also does the work of srcu_funnel_exp_start(),
* in some cases by directly invoking it.
*
* The srcu read lock should be hold around this function. And s is a seq snap
* after holding that lock.
*/
static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
unsigned long s, bool do_norm)
......@@ -961,7 +1000,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
if (snp_leaf)
/* Each pass through the loop does one level of the srcu_node tree. */
for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != snp_leaf)
if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) && snp != snp_leaf)
return; /* GP already done and CBs recorded. */
spin_lock_irqsave_rcu_node(snp, flags);
snp_seq = snp->srcu_have_cbs[idx];
......@@ -998,8 +1037,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
/* If grace period not already done and none in progress, start it. */
if (!rcu_seq_done(&ssp->srcu_gp_seq, s) &&
/* If grace period not already in progress, start it. */
if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) &&
rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
srcu_gp_start(ssp);
......@@ -1059,10 +1098,11 @@ static void srcu_flip(struct srcu_struct *ssp)
/*
* Ensure that if the updater misses an __srcu_read_unlock()
* increment, that task's next __srcu_read_lock() will see the
* above counter update. Note that both this memory barrier
* and the one in srcu_readers_active_idx_check() provide the
* guarantee for __srcu_read_lock().
* increment, that task's __srcu_read_lock() following its next
* __srcu_read_lock() or __srcu_read_unlock() will see the above
* counter update. Note that both this memory barrier and the
* one in srcu_readers_active_idx_check() provide the guarantee
* for __srcu_read_lock().
*/
smp_mb(); /* D */ /* Pairs with C. */
}
......@@ -1161,7 +1201,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
idx = __srcu_read_lock_nmisafe(ssp);
ss_state = smp_load_acquire(&ssp->srcu_size_state);
if (ss_state < SRCU_SIZE_WAIT_CALL)
sdp = per_cpu_ptr(ssp->sda, 0);
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
else
sdp = raw_cpu_ptr(ssp->sda);
spin_lock_irqsave_sdp_contention(sdp, &flags);
......@@ -1497,7 +1537,7 @@ void srcu_barrier(struct srcu_struct *ssp)
idx = __srcu_read_lock_nmisafe(ssp);
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, get_boot_cpu_id()));
else
for_each_possible_cpu(cpu)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
......
......@@ -384,6 +384,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
{
int cpu;
unsigned long flags;
bool gpdone = poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq);
long n;
long ncbs = 0;
long ncbsnz = 0;
......@@ -425,21 +426,23 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids));
smp_store_release(&rtp->percpu_enqueue_lim, 1);
rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
gpdone = false;
pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
}
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
}
if (rcu_task_cb_adjust && !ncbsnz &&
poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq)) {
if (rcu_task_cb_adjust && !ncbsnz && gpdone) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim < rtp->percpu_dequeue_lim) {
WRITE_ONCE(rtp->percpu_dequeue_lim, 1);
pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name);
}
for (cpu = rtp->percpu_dequeue_lim; cpu < nr_cpu_ids; cpu++) {
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
if (rtp->percpu_dequeue_lim == 1) {
for (cpu = rtp->percpu_dequeue_lim; cpu < nr_cpu_ids; cpu++) {
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rtpcp->cblist));
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rtpcp->cblist));
}
}
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
}
......@@ -560,8 +563,9 @@ static int __noreturn rcu_tasks_kthread(void *arg)
static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
{
/* Complain if the scheduler has not started. */
WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
"synchronize_rcu_tasks called too soon");
if (WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
"synchronize_%s() called too soon", rtp->name))
return;
// If the grace-period kthread is running, use it.
if (READ_ONCE(rtp->kthread_ptr)) {
......@@ -827,11 +831,21 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
static void rcu_tasks_postscan(struct list_head *hop)
{
/*
* Wait for tasks that are in the process of exiting. This
* does only part of the job, ensuring that all tasks that were
* previously exiting reach the point where they have disabled
* preemption, allowing the later synchronize_rcu() to finish
* the job.
* Exiting tasks may escape the tasklist scan. Those are vulnerable
* until their final schedule() with TASK_DEAD state. To cope with
* this, divide the fragile exit path part in two intersecting
* read side critical sections:
*
* 1) An _SRCU_ read side starting before calling exit_notify(),
* which may remove the task from the tasklist, and ending after
* the final preempt_disable() call in do_exit().
*
* 2) An _RCU_ read side starting with the final preempt_disable()
* call in do_exit() and ending with the final call to schedule()
* with TASK_DEAD state.
*
* This handles the part 1). And postgp will handle part 2) with a
* call to synchronize_rcu().
*/
synchronize_srcu(&tasks_rcu_exit_srcu);
}
......@@ -898,7 +912,10 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
*
* In addition, this synchronize_rcu() waits for exiting tasks
* to complete their final preempt_disable() region of execution,
* cleaning up after the synchronize_srcu() above.
* cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu),
* enforcing the whole region before tasklist removal until
* the final schedule() with TASK_DEAD state to be an RCU TASKS
* read side critical section.
*/
synchronize_rcu();
}
......@@ -988,27 +1005,42 @@ void show_rcu_tasks_classic_gp_kthread(void)
EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
#endif // !defined(CONFIG_TINY_RCU)
/* Do the srcu_read_lock() for the above synchronize_srcu(). */
/*
* Contribute to protect against tasklist scan blind spot while the
* task is exiting and may be removed from the tasklist. See
* corresponding synchronize_srcu() for further details.
*/
void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
{
preempt_disable();
current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
preempt_enable();
}
/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
/*
* Contribute to protect against tasklist scan blind spot while the
* task is exiting and may be removed from the tasklist. See
* corresponding synchronize_srcu() for further details.
*/
void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu)
{
struct task_struct *t = current;
preempt_disable();
__srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx);
preempt_enable();
exit_tasks_rcu_finish_trace(t);
}
/*
* Contribute to protect against tasklist scan blind spot while the
* task is exiting and may be removed from the tasklist. See
* corresponding synchronize_srcu() for further details.
*/
void exit_tasks_rcu_finish(void)
{
exit_tasks_rcu_stop();
exit_tasks_rcu_finish_trace(current);
}
#else /* #ifdef CONFIG_TASKS_RCU */
void exit_tasks_rcu_start(void) { }
void exit_tasks_rcu_stop(void) { }
void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
......@@ -1036,9 +1068,6 @@ static void rcu_tasks_be_rude(struct work_struct *work)
// Wait for one rude RCU-tasks grace period.
static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
{
if (num_online_cpus() <= 1)
return; // Fastpath for only one CPU.
rtp->n_ipis += cpumask_weight(cpu_online_mask);
schedule_on_each_cpu(rcu_tasks_be_rude);
}
......@@ -1815,23 +1844,21 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp)
static void rcu_tasks_initiate_self_tests(void)
{
unsigned long j = jiffies;
pr_info("Running RCU-tasks wait API self tests\n");
#ifdef CONFIG_TASKS_RCU
tests[0].runstart = j;
tests[0].runstart = jiffies;
synchronize_rcu_tasks();
call_rcu_tasks(&tests[0].rh, test_rcu_tasks_callback);
#endif
#ifdef CONFIG_TASKS_RUDE_RCU
tests[1].runstart = j;
tests[1].runstart = jiffies;
synchronize_rcu_tasks_rude();
call_rcu_tasks_rude(&tests[1].rh, test_rcu_tasks_callback);
#endif
#ifdef CONFIG_TASKS_TRACE_RCU
tests[2].runstart = j;
tests[2].runstart = jiffies;
synchronize_rcu_tasks_trace();
call_rcu_tasks_trace(&tests[2].rh, test_rcu_tasks_callback);
#endif
......
......@@ -246,15 +246,12 @@ bool poll_state_synchronize_rcu(unsigned long oldstate)
EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
if (head) {
void *ptr = (void *) head - (unsigned long) func;
if (head)
kasan_record_aux_stack_noalloc(ptr);
}
__kvfree_call_rcu(head, func);
__kvfree_call_rcu(head, ptr);
}
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
#endif
......
This diff is collapsed.
......@@ -11,6 +11,7 @@
static void rcu_exp_handler(void *unused);
static int rcu_print_task_exp_stall(struct rcu_node *rnp);
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp);
/*
* Record the start of an expedited grace period.
......@@ -667,8 +668,11 @@ static void synchronize_rcu_expedited_wait(void)
mask = leaf_node_cpu_bit(rnp, cpu);
if (!(READ_ONCE(rnp->expmask) & mask))
continue;
preempt_disable(); // For smp_processor_id() in dump_cpu_task().
dump_cpu_task(cpu);
preempt_enable();
}
rcu_exp_print_detail_task_stall_rnp(rnp);
}
jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
panic_on_rcu_stall();
......@@ -811,6 +815,36 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
return ndetected;
}
/*
* Scan the current list of tasks blocked within RCU read-side critical
* sections, dumping the stack of each that is blocking the current
* expedited grace period.
*/
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
{
unsigned long flags;
struct task_struct *t;
if (!rcu_exp_stall_task_details)
return;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!READ_ONCE(rnp->exp_tasks)) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
t = list_entry(rnp->exp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
/*
* We could be printing a lot while holding a spinlock.
* Avoid triggering hard lockup.
*/
touch_nmi_watchdog();
sched_show_task(t);
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
#else /* #ifdef CONFIG_PREEMPT_RCU */
/* Request an expedited quiescent state. */
......@@ -883,6 +917,15 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
return 0;
}
/*
* Because preemptible RCU does not exist, we never have to print out
* tasks blocked within RCU read-side critical sections that are blocking
* the current expedited grace period.
*/
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
{
}
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
/**
......
......@@ -144,8 +144,45 @@ bool rcu_gp_is_normal(void)
}
EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
static atomic_t rcu_async_hurry_nesting = ATOMIC_INIT(1);
/*
* Should call_rcu() callbacks be processed with urgency or are
* they OK being executed with arbitrary delays?
*/
bool rcu_async_should_hurry(void)
{
return !IS_ENABLED(CONFIG_RCU_LAZY) ||
atomic_read(&rcu_async_hurry_nesting);
}
EXPORT_SYMBOL_GPL(rcu_async_should_hurry);
/**
* rcu_async_hurry - Make future async RCU callbacks not lazy.
*
* After a call to this function, future calls to call_rcu()
* will be processed in a timely fashion.
*/
void rcu_async_hurry(void)
{
if (IS_ENABLED(CONFIG_RCU_LAZY))
atomic_inc(&rcu_async_hurry_nesting);
}
EXPORT_SYMBOL_GPL(rcu_async_hurry);
/**
* rcu_async_relax - Make future async RCU callbacks lazy.
*
* After a call to this function, future calls to call_rcu()
* will be processed in a lazy fashion.
*/
void rcu_async_relax(void)
{
if (IS_ENABLED(CONFIG_RCU_LAZY))
atomic_dec(&rcu_async_hurry_nesting);
}
EXPORT_SYMBOL_GPL(rcu_async_relax);
static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
/*
* Should normal grace-period primitives be expedited? Intended for
* use within RCU. Note that this function takes the rcu_expedited
......@@ -195,6 +232,7 @@ static bool rcu_boot_ended __read_mostly;
void rcu_end_inkernel_boot(void)
{
rcu_unexpedite_gp();
rcu_async_relax();
if (rcu_normal_after_boot)
WRITE_ONCE(rcu_normal, 1);
rcu_boot_ended = true;
......@@ -220,6 +258,7 @@ void rcu_test_sync_prims(void)
{
if (!IS_ENABLED(CONFIG_PROVE_RCU))
return;
pr_info("Running RCU synchronous self tests\n");
synchronize_rcu();
synchronize_rcu_expedited();
}
......@@ -508,6 +547,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644);
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
bool rcu_exp_stall_task_details __read_mostly;
module_param(rcu_exp_stall_task_details, bool, 0644);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
// Suppress boot-time RCU CPU stall warnings and rcutorture writer stall
......@@ -555,9 +596,12 @@ struct early_boot_kfree_rcu {
static void early_boot_test_call_rcu(void)
{
static struct rcu_head head;
int idx;
static struct rcu_head shead;
struct early_boot_kfree_rcu *rhp;
idx = srcu_down_read(&early_srcu);
srcu_up_read(&early_srcu, idx);
call_rcu(&head, test_callback);
early_srcu_cookie = start_poll_synchronize_srcu(&early_srcu);
call_srcu(&early_srcu, &shead, test_callback);
......@@ -586,6 +630,7 @@ static int rcu_verify_early_boot_tests(void)
early_boot_test_counter++;
srcu_barrier(&early_srcu);
WARN_ON_ONCE(!poll_state_synchronize_srcu(&early_srcu, early_srcu_cookie));
cleanup_srcu_struct(&early_srcu);
}
if (rcu_self_test_counter != early_boot_test_counter) {
WARN_ON(1);
......
......@@ -450,7 +450,7 @@ unsigned long
torture_random(struct torture_random_state *trsp)
{
if (--trsp->trs_count < 0) {
trsp->trs_state += (unsigned long)local_clock();
trsp->trs_state += (unsigned long)local_clock() + raw_smp_processor_id();
trsp->trs_count = TORTURE_RANDOM_REFRESH;
}
trsp->trs_state = trsp->trs_state * TORTURE_RANDOM_MULT +
......@@ -915,7 +915,7 @@ void torture_kthread_stopping(char *title)
VERBOSE_TOROUT_STRING(buf);
while (!kthread_should_stop()) {
torture_shutdown_absorb(title);
schedule_timeout_uninterruptible(1);
schedule_timeout_uninterruptible(HZ / 20);
}
}
EXPORT_SYMBOL_GPL(torture_kthread_stopping);
......
......@@ -10,10 +10,9 @@
T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`"
trap 'rm -rf $T' 0
cat $1 > $T/.config
sed -e 's/"//g' < $1 > $T/.config
cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
grep -v '^CONFIG_INITRAMFS_SOURCE' |
sed -e 's/"//g' -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' < $2 |
awk '
{
print "if grep -q \"" $0 "\" < '"$T/.config"'";
......
......@@ -10,7 +10,7 @@
#
# Authors: Paul E. McKenney <paulmck@kernel.org>
egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
grep -v 'ODEBUG: ' |
grep -v 'This means that this is a DEBUG kernel and it is' |
grep -v 'Warning: unable to open an initial console' |
......
......@@ -44,10 +44,10 @@ fi
ncpus="`getconf _NPROCESSORS_ONLN`"
make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
retval=$?
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|Error|error:|warning:" || grep -E -q "Stop|Error|error:" < $resdir/Make.out
then
echo Kernel build error
egrep "Stop|Error|error:|warning:" < $resdir/Make.out
grep -E "Stop|Error|error:|warning:" < $resdir/Make.out
echo Run aborted.
exit 3
fi
......@@ -32,11 +32,11 @@ for i in ${rundir}/*/Make.out
do
scenariodir="`dirname $i`"
scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`"
if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i
if grep -E -q "error:|warning:|^ld: .*undefined reference to" < $i
then
egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
grep -E "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
files="$files $i.diags $i"
elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run"
elif ! test -f ${scenariobasedir}/vmlinux && ! test -f ${scenariobasedir}/vmlinux.xz && ! test -f "${rundir}/re-run"
then
echo No ${scenariobasedir}/vmlinux file > $i.diags
files="$files $i.diags $i"
......
......@@ -186,7 +186,7 @@ do
fi
;;
--kconfig|--kconfigs)
checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$'
TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
shift
;;
......@@ -585,7 +585,7 @@ awk < $T/cfgcpu.pack \
echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
# Extract the tests and their batches from the script.
egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" |
sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' |
awk '
/^----Start/ {
......@@ -622,7 +622,7 @@ then
elif test "$dryrun" = sched
then
# Extract the test run schedule from the script.
egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" |
sed -e 's/:.*$//' -e 's/^echo //'
nbuilds="`grep 'Starting build\.' $T/script |
grep -v ">>" | sed -e 's/:.*$//' -e 's/^echo //' |
......
......@@ -65,7 +65,7 @@ then
fi
grep --binary-files=text 'torture:.*ver:' $file |
egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
grep -E --binary-files=text -v '\(null\)|rtc: 000000000* ' |
sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
sed -e 's/^.*ver: //' |
awk '
......@@ -128,17 +128,17 @@ then
then
summary="$summary Badness: $n_badness"
fi
n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | egrep -c 'WARNING:|Warn'`
n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | grep -E -c 'WARNING:|Warn'`
if test "$n_warn" -ne 0
then
summary="$summary Warnings: $n_warn"
fi
n_bugs=`egrep -c '\bBUG|Oops:' $file`
n_bugs=`grep -E -c '\bBUG|Oops:' $file`
if test "$n_bugs" -ne 0
then
summary="$summary Bugs: $n_bugs"
fi
n_kcsan=`egrep -c 'BUG: KCSAN: ' $file`
n_kcsan=`grep -E -c 'BUG: KCSAN: ' $file`
if test "$n_kcsan" -ne 0
then
if test "$n_bugs" = "$n_kcsan"
......@@ -158,7 +158,7 @@ then
then
summary="$summary lockdep: $n_badness"
fi
n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
n_stalls=`grep -E -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
if test "$n_stalls" -ne 0
then
summary="$summary Stalls: $n_stalls"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment