Commit 28e92f99 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'core-rcu-2021.07.04' of...

Merge branch 'core-rcu-2021.07.04' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu

Pull RCU updates from Paul McKenney:

 - Bitmap parsing support for "all" as an alias for all bits

 - Documentation updates

 - Miscellaneous fixes, including some that overlap into mm and lockdep

 - kvfree_rcu() updates

 - mem_dump_obj() updates, with acks from one of the slab-allocator
   maintainers

 - RCU NOCB CPU updates, including limited deoffloading

 - SRCU updates

 - Tasks-RCU updates

 - Torture-test updates

* 'core-rcu-2021.07.04' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu: (78 commits)
  tasks-rcu: Make show_rcu_tasks_gp_kthreads() be static inline
  rcu-tasks: Make ksoftirqd provide RCU Tasks quiescent states
  rcu: Add missing __releases() annotation
  rcu: Remove obsolete rcu_read_unlock() deadlock commentary
  rcu: Improve comments describing RCU read-side critical sections
  rcu: Create an unrcu_pointer() to remove __rcu from a pointer
  srcu: Early test SRCU polling start
  rcu: Fix various typos in comments
  rcu/nocb: Unify timers
  rcu/nocb: Prepare for fine-grained deferred wakeup
  rcu/nocb: Only cancel nocb timer if not polling
  rcu/nocb: Delete bypass_timer upon nocb_gp wakeup
  rcu/nocb: Cancel nocb_timer upon nocb_gp wakeup
  rcu/nocb: Allow de-offloading rdp leader
  rcu/nocb: Directly call __wake_nocb_gp() from bypass timer
  rcu: Don't penalize priority boosting when there is nothing to boost
  rcu: Point to documentation of ordering guarantees
  rcu: Make rcu_gp_cleanup() be noinline for tracing
  rcu: Restrict RCU_STRICT_GRACE_PERIOD to at most four CPUs
  rcu: Make show_rcu_gp_kthreads() dump rcu_node structures blocking GP
  ...
parents da803f82 641faf1b
......@@ -21,7 +21,7 @@ Any code that happens after the end of a given RCU grace period is guaranteed
to see the effects of all accesses prior to the beginning of that grace
period that are within RCU read-side critical sections.
Similarly, any code that happens before the beginning of a given RCU grace
period is guaranteed to see the effects of all accesses following the end
period is guaranteed to not see the effects of all accesses following the end
of that grace period that are within RCU read-side critical sections.
Note well that RCU-sched read-side critical sections include any region
......@@ -339,14 +339,14 @@ The diagram below shows the path of ordering if the leftmost
leftmost ``rcu_node`` structure offlines its last CPU and if the next
``rcu_node`` structure has no online CPUs).
.. kernel-figure:: TreeRCU-gp-init-1.svg
.. kernel-figure:: TreeRCU-gp-init-2.svg
The final ``rcu_gp_init()`` pass through the ``rcu_node`` tree traverses
breadth-first, setting each ``rcu_node`` structure's ``->gp_seq`` field
to the newly advanced value from the ``rcu_state`` structure, as shown
in the following diagram.
.. kernel-figure:: TreeRCU-gp-init-1.svg
.. kernel-figure:: TreeRCU-gp-init-3.svg
This change will also cause each CPU's next call to
``__note_gp_changes()`` to notice that a new grace period has started,
......
......@@ -76,6 +76,11 @@ to change, such as less cores in the CPU list, then N and any ranges using N
will also change. Use the same on a small 4 core system, and "16-N" becomes
"16-3" and now the same boot input will be flagged as invalid (start > end).
The special case-tolerant group name "all" has a meaning of selecting all CPUs,
so that "nohz_full=all" is the equivalent of "nohz_full=0-N".
The semantics of "N" and "all" is supported on a level of bitmaps and holds for
all users of bitmap_parse().
This document may not be entirely up to date and comprehensive. The command
"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
......
......@@ -4354,6 +4354,11 @@
whole algorithm to behave better in low memory
condition.
rcutree.rcu_delay_page_cache_fill_msec= [KNL]
Set the page-cache refill delay (in milliseconds)
in response to low-memory conditions. The range
of permitted values is in the range 0:100000.
rcutree.jiffies_till_first_fqs= [KNL]
Set delay from grace-period initialization to
first attempt to force quiescent states.
......
......@@ -315,7 +315,7 @@ static inline int rcu_read_lock_any_held(void)
#define RCU_LOCKDEP_WARN(c, s) \
do { \
static bool __section(".data.unlikely") __warned; \
if (debug_lockdep_rcu_enabled() && !__warned && (c)) { \
if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \
__warned = true; \
lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
} \
......@@ -373,7 +373,7 @@ static inline void rcu_preempt_sleep_check(void) { }
#define unrcu_pointer(p) \
({ \
typeof(*p) *_________p1 = (typeof(*p) *__force)(p); \
rcu_check_sparse(p, __rcu); \
rcu_check_sparse(p, __rcu); \
((typeof(*p) __force __kernel *)(_________p1)); \
})
......@@ -532,7 +532,12 @@ do { \
* @p: The pointer to read, prior to dereferencing
* @c: The conditions under which the dereference will take place
*
* This is the RCU-bh counterpart to rcu_dereference_check().
* This is the RCU-bh counterpart to rcu_dereference_check(). However,
* please note that starting in v5.0 kernels, vanilla RCU grace periods
* wait for local_bh_disable() regions of code in addition to regions of
* code demarked by rcu_read_lock() and rcu_read_unlock(). This means
* that synchronize_rcu(), call_rcu, and friends all take not only
* rcu_read_lock() but also rcu_read_lock_bh() into account.
*/
#define rcu_dereference_bh_check(p, c) \
__rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu)
......@@ -543,6 +548,11 @@ do { \
* @c: The conditions under which the dereference will take place
*
* This is the RCU-sched counterpart to rcu_dereference_check().
* However, please note that starting in v5.0 kernels, vanilla RCU grace
* periods wait for preempt_disable() regions of code in addition to
* regions of code demarked by rcu_read_lock() and rcu_read_unlock().
* This means that synchronize_rcu(), call_rcu, and friends all take not
* only rcu_read_lock() but also rcu_read_lock_sched() into account.
*/
#define rcu_dereference_sched_check(p, c) \
__rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
......@@ -634,6 +644,12 @@ do { \
* sections, invocation of the corresponding RCU callback is deferred
* until after the all the other CPUs exit their critical sections.
*
* In v5.0 and later kernels, synchronize_rcu() and call_rcu() also
* wait for regions of code with preemption disabled, including regions of
* code with interrupts or softirqs disabled. In pre-v5.0 kernels, which
* define synchronize_sched(), only code enclosed within rcu_read_lock()
* and rcu_read_unlock() are guaranteed to be waited for.
*
* Note, however, that RCU callbacks are permitted to run concurrently
* with new RCU read-side critical sections. One way that this can happen
* is via the following sequence of events: (1) CPU 0 enters an RCU
......@@ -686,33 +702,12 @@ static __always_inline void rcu_read_lock(void)
/**
* rcu_read_unlock() - marks the end of an RCU read-side critical section.
*
* In most situations, rcu_read_unlock() is immune from deadlock.
* However, in kernels built with CONFIG_RCU_BOOST, rcu_read_unlock()
* is responsible for deboosting, which it does via rt_mutex_unlock().
* Unfortunately, this function acquires the scheduler's runqueue and
* priority-inheritance spinlocks. This means that deadlock could result
* if the caller of rcu_read_unlock() already holds one of these locks or
* any lock that is ever acquired while holding them.
*
* That said, RCU readers are never priority boosted unless they were
* preempted. Therefore, one way to avoid deadlock is to make sure
* that preemption never happens within any RCU read-side critical
* section whose outermost rcu_read_unlock() is called with one of
* rt_mutex_unlock()'s locks held. Such preemption can be avoided in
* a number of ways, for example, by invoking preempt_disable() before
* critical section's outermost rcu_read_lock().
*
* Given that the set of locks acquired by rt_mutex_unlock() might change
* at any time, a somewhat more future-proofed approach is to make sure
* that that preemption never happens within any RCU read-side critical
* section whose outermost rcu_read_unlock() is called with irqs disabled.
* This approach relies on the fact that rt_mutex_unlock() currently only
* acquires irq-disabled locks.
*
* The second of these two approaches is best in most situations,
* however, the first approach can also be useful, at least to those
* developers willing to keep abreast of the set of locks acquired by
* rt_mutex_unlock().
* In almost all situations, rcu_read_unlock() is immune from deadlock.
* In recent kernels that have consolidated synchronize_sched() and
* synchronize_rcu_bh() into synchronize_rcu(), this deadlock immunity
* also extends to the scheduler's runqueue and priority-inheritance
* spinlocks, courtesy of the quiescent-state deferral that is carried
* out when rcu_read_unlock() is invoked with interrupts disabled.
*
* See rcu_read_lock() for more information.
*/
......@@ -728,9 +723,11 @@ static inline void rcu_read_unlock(void)
/**
* rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
*
* This is equivalent of rcu_read_lock(), but also disables softirqs.
* Note that anything else that disables softirqs can also serve as
* an RCU read-side critical section.
* This is equivalent to rcu_read_lock(), but also disables softirqs.
* Note that anything else that disables softirqs can also serve as an RCU
* read-side critical section. However, please note that this equivalence
* applies only to v5.0 and later. Before v5.0, rcu_read_lock() and
* rcu_read_lock_bh() were unrelated.
*
* Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
* must occur in the same context, for example, it is illegal to invoke
......@@ -763,9 +760,12 @@ static inline void rcu_read_unlock_bh(void)
/**
* rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
*
* This is equivalent of rcu_read_lock(), but disables preemption.
* Read-side critical sections can also be introduced by anything else
* that disables preemption, including local_irq_disable() and friends.
* This is equivalent to rcu_read_lock(), but also disables preemption.
* Read-side critical sections can also be introduced by anything else that
* disables preemption, including local_irq_disable() and friends. However,
* please note that the equivalence to rcu_read_lock() applies only to
* v5.0 and later. Before v5.0, rcu_read_lock() and rcu_read_lock_sched()
* were unrelated.
*
* Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
* must occur in the same context, for example, it is illegal to invoke
......
......@@ -86,7 +86,6 @@ static inline void rcu_irq_enter(void) { }
static inline void rcu_irq_exit_irqson(void) { }
static inline void rcu_irq_enter_irqson(void) { }
static inline void rcu_irq_exit(void) { }
static inline void rcu_irq_exit_preempt(void) { }
static inline void rcu_irq_exit_check_preempt(void) { }
#define rcu_is_idle_cpu(cpu) \
(is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq())
......
......@@ -49,7 +49,6 @@ void rcu_idle_enter(void);
void rcu_idle_exit(void);
void rcu_irq_enter(void);
void rcu_irq_exit(void);
void rcu_irq_exit_preempt(void);
void rcu_irq_enter_irqson(void);
void rcu_irq_exit_irqson(void);
bool rcu_is_idle_cpu(int cpu);
......
......@@ -64,6 +64,12 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
#ifdef CONFIG_SRCU
void srcu_init(void);
#else /* #ifdef CONFIG_SRCU */
static inline void srcu_init(void) { }
#endif /* #else #ifdef CONFIG_SRCU */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/**
......
......@@ -82,9 +82,7 @@ struct srcu_struct {
/* callback for the barrier */
/* operation. */
struct delayed_work work;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
};
/* Values for state variable (bottom bits of ->srcu_gp_seq). */
......
......@@ -192,8 +192,6 @@ extern int try_to_del_timer_sync(struct timer_list *timer);
#define del_singleshot_timer_sync(t) del_timer_sync(t)
extern bool timer_curr_running(struct timer_list *timer);
extern void init_timers(void);
struct hrtimer;
extern enum hrtimer_restart it_real_fn(struct hrtimer *);
......
......@@ -278,6 +278,7 @@ TRACE_EVENT_RCU(rcu_exp_funnel_lock,
* "WakeNot": Don't wake rcuo kthread.
* "WakeNotPoll": Don't wake rcuo kthread because it is polling.
* "WakeOvfIsDeferred": Wake rcuo kthread later, CB list is huge.
* "WakeBypassIsDeferred": Wake rcuo kthread later, bypass list is contended.
* "WokeEmpty": rcuo CB kthread woke to find empty list.
*/
TRACE_EVENT_RCU(rcu_nocb_wake,
......
......@@ -42,6 +42,7 @@
#include <linux/profile.h>
#include <linux/kfence.h>
#include <linux/rcupdate.h>
#include <linux/srcu.h>
#include <linux/moduleparam.h>
#include <linux/kallsyms.h>
#include <linux/writeback.h>
......@@ -1008,6 +1009,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
tick_init();
rcu_init_nohz();
init_timers();
srcu_init();
hrtimers_init();
softirq_init();
timekeeping_init();
......
......@@ -6506,6 +6506,7 @@ asmlinkage __visible void lockdep_sys_exit(void)
void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
{
struct task_struct *curr = current;
int dl = READ_ONCE(debug_locks);
/* Note: the following can be executed concurrently, so be careful. */
pr_warn("\n");
......@@ -6515,11 +6516,12 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
pr_warn("-----------------------------\n");
pr_warn("%s:%d %s!\n", file, line, s);
pr_warn("\nother info that might help us debug this:\n\n");
pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n%s",
!rcu_lockdep_current_cpu_online()
? "RCU used illegally from offline CPU!\n"
: "",
rcu_scheduler_active, debug_locks);
rcu_scheduler_active, dl,
dl ? "" : "Possible false positive due to lockdep disabling via debug_locks = 0\n");
/*
* If a CPU is in the RCU-free window in idle (ie: in the section
......
......@@ -116,7 +116,7 @@ config RCU_EQS_DEBUG
config RCU_STRICT_GRACE_PERIOD
bool "Provide debug RCU implementation with short grace periods"
depends on DEBUG_KERNEL && RCU_EXPERT
depends on DEBUG_KERNEL && RCU_EXPERT && NR_CPUS <= 4
default n
select PREEMPT_COUNT if PREEMPT=n
help
......
......@@ -308,6 +308,8 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
}
}
extern void rcu_init_geometry(void);
/* Returns a pointer to the first leaf rcu_node structure. */
#define rcu_first_leaf_node() (rcu_state.level[rcu_num_lvls - 1])
......@@ -422,12 +424,6 @@ do { \
#endif /* #if defined(CONFIG_SRCU) || !defined(CONFIG_TINY_RCU) */
#ifdef CONFIG_SRCU
void srcu_init(void);
#else /* #ifdef CONFIG_SRCU */
static inline void srcu_init(void) { }
#endif /* #else #ifdef CONFIG_SRCU */
#ifdef CONFIG_TINY_RCU
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
static inline bool rcu_gp_is_normal(void) { return true; }
......@@ -441,7 +437,11 @@ bool rcu_gp_is_expedited(void); /* Internal RCU use. */
void rcu_expedite_gp(void);
void rcu_unexpedite_gp(void);
void rcupdate_announce_bootup_oddness(void);
#ifdef CONFIG_TASKS_RCU_GENERIC
void show_rcu_tasks_gp_kthreads(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
static inline void show_rcu_tasks_gp_kthreads(void) {}
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
void rcu_request_urgent_qs_task(struct task_struct *t);
#endif /* #else #ifdef CONFIG_TINY_RCU */
......@@ -519,6 +519,7 @@ static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
static inline unsigned long
srcu_batches_completed(struct srcu_struct *sp) { return 0; }
static inline void rcu_force_quiescent_state(void) { }
static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { return true; }
static inline void show_rcu_gp_kthreads(void) { }
static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
static inline void rcu_fwd_progress_check(unsigned long j) { }
......@@ -527,6 +528,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
unsigned long rcu_get_gp_seq(void);
unsigned long rcu_exp_batches_completed(void);
unsigned long srcu_batches_completed(struct srcu_struct *sp);
bool rcu_check_boost_fail(unsigned long gp_state, int *cpup);
void show_rcu_gp_kthreads(void);
int rcu_get_gp_kthreads_prio(void);
void rcu_fwd_progress_check(unsigned long j);
......
This diff is collapsed.
......@@ -362,6 +362,111 @@ static struct ref_scale_ops rwsem_ops = {
.name = "rwsem"
};
// Definitions for global spinlock
static DEFINE_SPINLOCK(test_lock);
static void ref_lock_section(const int nloops)
{
int i;
preempt_disable();
for (i = nloops; i >= 0; i--) {
spin_lock(&test_lock);
spin_unlock(&test_lock);
}
preempt_enable();
}
static void ref_lock_delay_section(const int nloops, const int udl, const int ndl)
{
int i;
preempt_disable();
for (i = nloops; i >= 0; i--) {
spin_lock(&test_lock);
un_delay(udl, ndl);
spin_unlock(&test_lock);
}
preempt_enable();
}
static struct ref_scale_ops lock_ops = {
.readsection = ref_lock_section,
.delaysection = ref_lock_delay_section,
.name = "lock"
};
// Definitions for global irq-save spinlock
static void ref_lock_irq_section(const int nloops)
{
unsigned long flags;
int i;
preempt_disable();
for (i = nloops; i >= 0; i--) {
spin_lock_irqsave(&test_lock, flags);
spin_unlock_irqrestore(&test_lock, flags);
}
preempt_enable();
}
static void ref_lock_irq_delay_section(const int nloops, const int udl, const int ndl)
{
unsigned long flags;
int i;
preempt_disable();
for (i = nloops; i >= 0; i--) {
spin_lock_irqsave(&test_lock, flags);
un_delay(udl, ndl);
spin_unlock_irqrestore(&test_lock, flags);
}
preempt_enable();
}
static struct ref_scale_ops lock_irq_ops = {
.readsection = ref_lock_irq_section,
.delaysection = ref_lock_irq_delay_section,
.name = "lock-irq"
};
// Definitions acquire-release.
static DEFINE_PER_CPU(unsigned long, test_acqrel);
static void ref_acqrel_section(const int nloops)
{
unsigned long x;
int i;
preempt_disable();
for (i = nloops; i >= 0; i--) {
x = smp_load_acquire(this_cpu_ptr(&test_acqrel));
smp_store_release(this_cpu_ptr(&test_acqrel), x + 1);
}
preempt_enable();
}
static void ref_acqrel_delay_section(const int nloops, const int udl, const int ndl)
{
unsigned long x;
int i;
preempt_disable();
for (i = nloops; i >= 0; i--) {
x = smp_load_acquire(this_cpu_ptr(&test_acqrel));
un_delay(udl, ndl);
smp_store_release(this_cpu_ptr(&test_acqrel), x + 1);
}
preempt_enable();
}
static struct ref_scale_ops acqrel_ops = {
.readsection = ref_acqrel_section,
.delaysection = ref_acqrel_delay_section,
.name = "acqrel"
};
static void rcu_scale_one_reader(void)
{
if (readdelay <= 0)
......@@ -653,8 +758,8 @@ ref_scale_init(void)
long i;
int firsterr = 0;
static struct ref_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops,
&refcnt_ops, &rwlock_ops, &rwsem_ops,
&rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops,
&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops,
};
if (!torture_init_begin(scale_type, verbose))
......
......@@ -80,7 +80,7 @@ do { \
* srcu_read_unlock() running against them. So if the is_static parameter
* is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
*/
static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
static void init_srcu_struct_nodes(struct srcu_struct *ssp)
{
int cpu;
int i;
......@@ -90,6 +90,9 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
struct srcu_node *snp;
struct srcu_node *snp_first;
/* Initialize geometry if it has not already been initialized. */
rcu_init_geometry();
/* Work out the overall tree geometry. */
ssp->level[0] = &ssp->node[0];
for (i = 1; i < rcu_num_lvls; i++)
......@@ -148,14 +151,6 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
sdp->ssp = ssp;
sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
if (is_static)
continue;
/* Dynamically allocated, better be no srcu_read_locks()! */
for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) {
sdp->srcu_lock_count[i] = 0;
sdp->srcu_unlock_count[i] = 0;
}
}
}
......@@ -179,7 +174,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
ssp->sda = alloc_percpu(struct srcu_data);
if (!ssp->sda)
return -ENOMEM;
init_srcu_struct_nodes(ssp, is_static);
init_srcu_struct_nodes(ssp);
ssp->srcu_gp_seq_needed_exp = 0;
ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
......@@ -777,9 +772,9 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp)
spin_unlock_irqrestore_rcu_node(sdp, flags);
/*
* No local callbacks, so probabalistically probe global state.
* No local callbacks, so probabilistically probe global state.
* Exact information would require acquiring locks, which would
* kill scalability, hence the probabalistic nature of the probe.
* kill scalability, hence the probabilistic nature of the probe.
*/
/* First, see if enough time has passed since the last GP. */
......@@ -1000,6 +995,9 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
* synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
* passed the same srcu_struct structure.
*
* Implementation of these memory-ordering guarantees is similar to
* that of synchronize_rcu().
*
* If SRCU is likely idle, expedite the first request. This semantic
* was provided by Classic SRCU, and is relied upon by its users, so TREE
* SRCU must also provide it. Note that detecting idleness is heuristic
......@@ -1392,11 +1390,15 @@ void __init srcu_init(void)
{
struct srcu_struct *ssp;
/*
* Once that is set, call_srcu() can follow the normal path and
* queue delayed work. This must follow RCU workqueues creation
* and timers initialization.
*/
srcu_init_done = true;
while (!list_empty(&srcu_boot_list)) {
ssp = list_first_entry(&srcu_boot_list, struct srcu_struct,
work.work.entry);
check_init_srcu_struct(ssp);
list_del_init(&ssp->work.work.entry);
queue_work(rcu_gp_wq, &ssp->work.work);
}
......
......@@ -94,9 +94,9 @@ static void rcu_sync_func(struct rcu_head *rhp)
rcu_sync_call(rsp);
} else {
/*
* We're at least a GP after the last rcu_sync_exit(); eveybody
* We're at least a GP after the last rcu_sync_exit(); everybody
* will now have observed the write side critical section.
* Let 'em rip!.
* Let 'em rip!
*/
WRITE_ONCE(rsp->gp_state, GP_IDLE);
}
......
......@@ -23,7 +23,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
* struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
* @cbs_head: Head of callback list.
* @cbs_tail: Tail pointer for callback list.
* @cbs_wq: Wait queue allowning new callback to get kthread's attention.
* @cbs_wq: Wait queue allowing new callback to get kthread's attention.
* @cbs_lock: Lock protecting callback list.
* @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
* @gp_func: This flavor's grace-period-wait function.
......@@ -377,6 +377,46 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
// Finally, this implementation does not support high call_rcu_tasks()
// rates from multiple CPUs. If this is required, per-CPU callback lists
// will be needed.
//
// The implementation uses rcu_tasks_wait_gp(), which relies on function
// pointers in the rcu_tasks structure. The rcu_spawn_tasks_kthread()
// function sets these function pointers up so that rcu_tasks_wait_gp()
// invokes these functions in this order:
//
// rcu_tasks_pregp_step():
// Invokes synchronize_rcu() in order to wait for all in-flight
// t->on_rq and t->nvcsw transitions to complete. This works because
// all such transitions are carried out with interrupts disabled.
// rcu_tasks_pertask(), invoked on every non-idle task:
// For every runnable non-idle task other than the current one, use
// get_task_struct() to pin down that task, snapshot that task's
// number of voluntary context switches, and add that task to the
// holdout list.
// rcu_tasks_postscan():
// Invoke synchronize_srcu() to ensure that all tasks that were
// in the process of exiting (and which thus might not know to
// synchronize with this RCU Tasks grace period) have completed
// exiting.
// check_all_holdout_tasks(), repeatedly until holdout list is empty:
// Scans the holdout list, attempting to identify a quiescent state
// for each task on the list. If there is a quiescent state, the
// corresponding task is removed from the holdout list.
// rcu_tasks_postgp():
// Invokes synchronize_rcu() in order to ensure that all prior
// t->on_rq and t->nvcsw transitions are seen by all CPUs and tasks
// to have happened before the end of this RCU Tasks grace period.
// Again, this works because all such transitions are carried out
// with interrupts disabled.
//
// For each exiting task, the exit_tasks_rcu_start() and
// exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU
// read-side critical sections waited for by rcu_tasks_postscan().
//
// Pre-grace-period update-side code is ordered before the grace via the
// ->cbs_lock and the smp_mb__after_spinlock(). Pre-grace-period read-side
// code is ordered before the grace period via synchronize_rcu() call
// in rcu_tasks_pregp_step() and by the scheduler's locks and interrupt
// disabling.
/* Pre-grace-period preparation. */
static void rcu_tasks_pregp_step(void)
......@@ -504,7 +544,7 @@ DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
* or transition to usermode execution. As such, there are no read-side
* primitives analogous to rcu_read_lock() and rcu_read_unlock() because
* this primitive is intended to determine that all tasks have passed
* through a safe state, not so much for data-strcuture synchronization.
* through a safe state, not so much for data-structure synchronization.
*
* See the description of call_rcu() for more detailed information on
* memory ordering guarantees.
......@@ -605,8 +645,13 @@ void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
// passing an empty function to schedule_on_each_cpu(). This approach
// provides an asynchronous call_rcu_tasks_rude() API and batching
// of concurrent calls to the synchronous synchronize_rcu_rude() API.
// This sends IPIs far and wide and induces otherwise unnecessary context
// switches on all online CPUs, whether idle or not.
// This invokes schedule_on_each_cpu() in order to send IPIs far and wide
// and induces otherwise unnecessary context switches on all online CPUs,
// whether idle or not.
//
// Callback handling is provided by the rcu_tasks_kthread() function.
//
// Ordering is provided by the scheduler's context-switch code.
// Empty function to allow workqueues to force a context switch.
static void rcu_tasks_be_rude(struct work_struct *work)
......@@ -637,7 +682,7 @@ DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude,
* there are no read-side primitives analogous to rcu_read_lock() and
* rcu_read_unlock() because this primitive is intended to determine
* that all tasks have passed through a safe state, not so much for
* data-strcuture synchronization.
* data-structure synchronization.
*
* See the description of call_rcu() for more detailed information on
* memory ordering guarantees.
......@@ -1163,7 +1208,7 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t)
* there are no read-side primitives analogous to rcu_read_lock() and
* rcu_read_unlock() because this primitive is intended to determine
* that all tasks have passed through a safe state, not so much for
* data-strcuture synchronization.
* data-structure synchronization.
*
* See the description of call_rcu() for more detailed information on
* memory ordering guarantees.
......@@ -1356,5 +1401,4 @@ void __init rcu_init_tasks_generic(void)
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
static inline void rcu_tasks_bootup_oddness(void) {}
void show_rcu_tasks_gp_kthreads(void) {}
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
......@@ -221,5 +221,4 @@ void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
rcu_early_boot_tests();
srcu_init();
}
This diff is collapsed.
......@@ -115,6 +115,7 @@ struct rcu_node {
/* boosting for this rcu_node structure. */
unsigned int boost_kthread_status;
/* State of boost_kthread_task for tracing. */
unsigned long n_boosts; /* Number of boosts for this rcu_node structure. */
#ifdef CONFIG_RCU_NOCB_CPU
struct swait_queue_head nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
......@@ -153,7 +154,7 @@ struct rcu_data {
unsigned long gp_seq; /* Track rsp->gp_seq counter. */
unsigned long gp_seq_needed; /* Track furthest future GP request. */
union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */
bool core_needs_qs; /* Core waits for quiesc state. */
bool core_needs_qs; /* Core waits for quiescent state. */
bool beenonline; /* CPU online at least once. */
bool gpwrap; /* Possible ->gp_seq wrap. */
bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */
......@@ -218,7 +219,6 @@ struct rcu_data {
/* The following fields are used by GP kthread, hence own cacheline. */
raw_spinlock_t nocb_gp_lock ____cacheline_internodealigned_in_smp;
struct timer_list nocb_bypass_timer; /* Force nocb_bypass flush. */
u8 nocb_gp_sleep; /* Is the nocb GP thread asleep? */
u8 nocb_gp_bypass; /* Found a bypass on last scan? */
u8 nocb_gp_gp; /* GP to wait for on last scan? */
......@@ -257,10 +257,10 @@ struct rcu_data {
};
/* Values for nocb_defer_wakeup field in struct rcu_data. */
#define RCU_NOCB_WAKE_OFF -1
#define RCU_NOCB_WAKE_NOT 0
#define RCU_NOCB_WAKE 1
#define RCU_NOCB_WAKE_FORCE 2
#define RCU_NOCB_WAKE_BYPASS 1
#define RCU_NOCB_WAKE 2
#define RCU_NOCB_WAKE_FORCE 3
#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
/* For jiffies_till_first_fqs and */
......@@ -417,8 +417,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static bool rcu_is_callbacks_kthread(void);
static void rcu_cpu_kthread_setup(unsigned int cpu);
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
static void __init rcu_spawn_boost_kthreads(void);
static void rcu_prepare_kthreads(int cpu);
static void rcu_cleanup_after_idle(void);
static void rcu_prepare_for_idle(void);
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
......@@ -434,7 +434,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
bool *was_alldone, unsigned long flags);
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
unsigned long flags);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_cpu_nocb_kthread(int cpu);
......
This diff is collapsed.
......@@ -314,6 +314,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
* tasks blocked within RCU read-side critical sections.
*/
static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
__releases(rnp->lock)
{
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0;
......@@ -716,6 +717,63 @@ static void check_cpu_stall(struct rcu_data *rdp)
// RCU forward-progress mechanisms, including of callback invocation.
/*
* Check to see if a failure to end RCU priority inversion was due to
* a CPU not passing through a quiescent state. When this happens, there
* is nothing that RCU priority boosting can do to help, so we shouldn't
* count this as an RCU priority boosting failure. A return of true says
* RCU priority boosting is to blame, and false says otherwise. If false
* is returned, the first of the CPUs to blame is stored through cpup.
* If there was no CPU blocking the current grace period, but also nothing
* in need of being boosted, *cpup is set to -1. This can happen in case
* of vCPU preemption while the last CPU is reporting its quiscent state,
* for example.
*
* If cpup is NULL, then a lockless quick check is carried out, suitable
* for high-rate usage. On the other hand, if cpup is non-NULL, each
* rcu_node structure's ->lock is acquired, ruling out high-rate usage.
*/
bool rcu_check_boost_fail(unsigned long gp_state, int *cpup)
{
bool atb = false;
int cpu;
unsigned long flags;
struct rcu_node *rnp;
rcu_for_each_leaf_node(rnp) {
if (!cpup) {
if (READ_ONCE(rnp->qsmask)) {
return false;
} else {
if (READ_ONCE(rnp->gp_tasks))
atb = true;
continue;
}
}
*cpup = -1;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->gp_tasks)
atb = true;
if (!rnp->qsmask) {
// No CPUs without quiescent states for this rnp.
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
continue;
}
// Find the first holdout CPU.
for_each_leaf_node_possible_cpu(rnp, cpu) {
if (rnp->qsmask & (1UL << (cpu - rnp->grplo))) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
*cpup = cpu;
return false;
}
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
// Can't blame CPUs, so must blame RCU priority boosting.
return atb;
}
EXPORT_SYMBOL_GPL(rcu_check_boost_fail);
/*
* Show the state of the grace-period kthreads.
*/
......@@ -726,6 +784,7 @@ void show_rcu_gp_kthreads(void)
unsigned long j;
unsigned long ja;
unsigned long jr;
unsigned long js;
unsigned long jw;
struct rcu_data *rdp;
struct rcu_node *rnp;
......@@ -734,21 +793,30 @@ void show_rcu_gp_kthreads(void)
j = jiffies;
ja = j - data_race(rcu_state.gp_activity);
jr = j - data_race(rcu_state.gp_req_activity);
js = j - data_race(rcu_state.gp_start);
jw = j - data_race(rcu_state.gp_wake_time);
pr_info("%s: wait state: %s(%d) ->state: %#x delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
pr_info("%s: wait state: %s(%d) ->state: %#lx ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n",
rcu_state.name, gp_state_getname(rcu_state.gp_state),
rcu_state.gp_state, t ? t->__state : 0x1ffff,
ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
rcu_state.gp_state, t ? t->__state : 0x1ffffL, t ? t->rt_priority : 0xffU,
js, ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
(long)data_race(rcu_state.gp_seq),
(long)data_race(rcu_get_root()->gp_seq_needed),
data_race(rcu_state.gp_max),
data_race(rcu_state.gp_flags));
rcu_for_each_node_breadth_first(rnp) {
if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq),
READ_ONCE(rnp->gp_seq_needed)))
if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) &&
!data_race(rnp->qsmask) && !data_race(rnp->boost_tasks) &&
!data_race(rnp->exp_tasks) && !data_race(rnp->gp_tasks))
continue;
pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
rnp->grplo, rnp->grphi, (long)data_race(rnp->gp_seq),
(long)data_race(rnp->gp_seq_needed));
pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n",
rnp->grplo, rnp->grphi,
(long)data_race(rnp->gp_seq), (long)data_race(rnp->gp_seq_needed),
data_race(rnp->qsmask),
".b"[!!data_race(rnp->boost_kthread_task)],
".B"[!!data_race(rnp->boost_tasks)],
".E"[!!data_race(rnp->exp_tasks)],
".G"[!!data_race(rnp->gp_tasks)],
data_race(rnp->n_boosts));
if (!rcu_is_leaf_node(rnp))
continue;
for_each_leaf_node_possible_cpu(rnp, cpu) {
......
......@@ -277,7 +277,7 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
noinstr int notrace debug_lockdep_rcu_enabled(void)
{
return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && READ_ONCE(debug_locks) &&
current->lockdep_recursion == 0;
}
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
......@@ -524,6 +524,7 @@ static void test_callback(struct rcu_head *r)
}
DEFINE_STATIC_SRCU(early_srcu);
static unsigned long early_srcu_cookie;
struct early_boot_kfree_rcu {
struct rcu_head rh;
......@@ -536,8 +537,10 @@ static void early_boot_test_call_rcu(void)
struct early_boot_kfree_rcu *rhp;
call_rcu(&head, test_callback);
if (IS_ENABLED(CONFIG_SRCU))
if (IS_ENABLED(CONFIG_SRCU)) {
early_srcu_cookie = start_poll_synchronize_srcu(&early_srcu);
call_srcu(&early_srcu, &shead, test_callback);
}
rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
if (!WARN_ON_ONCE(!rhp))
kfree_rcu(rhp, rh);
......@@ -563,6 +566,7 @@ static int rcu_verify_early_boot_tests(void)
if (IS_ENABLED(CONFIG_SRCU)) {
early_boot_test_counter++;
srcu_barrier(&early_srcu);
WARN_ON_ONCE(!poll_state_synchronize_srcu(&early_srcu, early_srcu_cookie));
}
}
if (rcu_self_test_counter != early_boot_test_counter) {
......
......@@ -1237,20 +1237,6 @@ int try_to_del_timer_sync(struct timer_list *timer)
}
EXPORT_SYMBOL(try_to_del_timer_sync);
bool timer_curr_running(struct timer_list *timer)
{
int i;
for (i = 0; i < NR_BASES; i++) {
struct timer_base *base = this_cpu_ptr(&timer_bases[i]);
if (base->running_timer == timer)
return true;
}
return false;
}
#ifdef CONFIG_PREEMPT_RT
static __init void timer_base_init_expiry_lock(struct timer_base *base)
{
......
......@@ -581,6 +581,14 @@ static const char *bitmap_parse_region(const char *str, struct region *r)
{
unsigned int lastbit = r->nbits - 1;
if (!strncasecmp(str, "all", 3)) {
r->start = 0;
r->end = lastbit;
str += 3;
goto check_pattern;
}
str = bitmap_getnum(str, &r->start, lastbit);
if (IS_ERR(str))
return str;
......@@ -595,6 +603,7 @@ static const char *bitmap_parse_region(const char *str, struct region *r)
if (IS_ERR(str))
return str;
check_pattern:
if (end_of_region(*str))
goto no_pattern;
......
......@@ -366,6 +366,13 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
{0, "0-31:1/3,1-31:1/3,2-31:1/3", &exp1[8 * step], 32, 0},
{0, "1-10:8/12,8-31:24/29,0-31:0/3", &exp1[9 * step], 32, 0},
{0, "all", &exp1[8 * step], 32, 0},
{0, "0, 1, all, ", &exp1[8 * step], 32, 0},
{0, "all:1/2", &exp1[4 * step], 32, 0},
{0, "ALL:1/2", &exp1[4 * step], 32, 0},
{-EINVAL, "al", NULL, 8, 0},
{-EINVAL, "alll", NULL, 8, 0},
{-EINVAL, "-1", NULL, 8, 0},
{-EINVAL, "-0", NULL, 8, 0},
{-EINVAL, "10-1", NULL, 8, 0},
......
......@@ -922,7 +922,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
continue;
}
/*
* No kthead_use_mm() user needs to read from the userspace so
* No kthread_use_mm() user needs to read from the userspace so
* we are ok to reap it.
*/
if (unlikely(p->flags & PF_KTHREAD))
......
......@@ -634,6 +634,7 @@ struct kmem_obj_info {
struct kmem_cache *kp_slab_cache;
void *kp_ret;
void *kp_stack[KS_ADDRS_COUNT];
void *kp_free_stack[KS_ADDRS_COUNT];
};
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
#endif
......
......@@ -575,7 +575,7 @@ EXPORT_SYMBOL_GPL(kmem_valid_obj);
* depends on the type of object and on how much debugging is enabled.
* For a slab-cache object, the fact that it is a slab object is printed,
* and, if available, the slab name, return address, and stack trace from
* the allocation of that object.
* the allocation and last free path of that object.
*
* This function will splat if passed a pointer to a non-slab object.
* If you are not sure what type of object you have, you should instead
......@@ -620,6 +620,16 @@ void kmem_dump_obj(void *object)
break;
pr_info(" %pS\n", kp.kp_stack[i]);
}
if (kp.kp_free_stack[0])
pr_cont(" Free path:\n");
for (i = 0; i < ARRAY_SIZE(kp.kp_free_stack); i++) {
if (!kp.kp_free_stack[i])
break;
pr_info(" %pS\n", kp.kp_free_stack[i]);
}
}
EXPORT_SYMBOL_GPL(kmem_dump_obj);
#endif
......
......@@ -4045,6 +4045,7 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
!(s->flags & SLAB_STORE_USER))
return;
#ifdef CONFIG_SLUB_DEBUG
objp = fixup_red_left(s, objp);
trackp = get_track(s, objp, TRACK_ALLOC);
kpp->kp_ret = (void *)trackp->addr;
#ifdef CONFIG_STACKTRACE
......@@ -4053,6 +4054,13 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
if (!kpp->kp_stack[i])
break;
}
trackp = get_track(s, objp, TRACK_FREE);
for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
if (!kpp->kp_free_stack[i])
break;
}
#endif
#endif
}
......
......@@ -983,7 +983,7 @@ int __weak memcmp_pages(struct page *page1, struct page *page2)
* depends on the type of object and on how much debugging is enabled.
* For example, for a slab-cache object, the slab name is printed, and,
* if available, the return address and stack trace from the allocation
* of that object.
* and last free path of that object.
*/
void mem_dump_obj(void *object)
{
......
#!/usr/bin/env drgn
# SPDX-License-Identifier: GPL-2.0+
#
# Dump out the number of RCU callbacks outstanding.
#
# On older kernels having multiple flavors of RCU, this dumps out the
# number of callbacks for the most heavily used flavor.
#
# Usage: sudo drgn rcu-cbs.py
#
# Copyright (C) 2021 Facebook, Inc.
#
# Authors: Paul E. McKenney <paulmck@kernel.org>
import sys
import drgn
from drgn import NULL, Object
from drgn.helpers.linux import *
def get_rdp0(prog):
try:
rdp0 = prog.variable('rcu_preempt_data', 'kernel/rcu/tree.c');
except LookupError:
rdp0 = NULL;
if rdp0 == NULL:
try:
rdp0 = prog.variable('rcu_sched_data',
'kernel/rcu/tree.c');
except LookupError:
rdp0 = NULL;
if rdp0 == NULL:
rdp0 = prog.variable('rcu_data', 'kernel/rcu/tree.c');
return rdp0.address_of_();
rdp0 = get_rdp0(prog);
# Sum up RCU callbacks.
sum = 0;
for cpu in for_each_possible_cpu(prog):
rdp = per_cpu_ptr(rdp0, cpu);
len = rdp.cblist.len.value_();
# print("CPU " + str(cpu) + " RCU callbacks: " + str(len));
sum += len;
print("Number of RCU callbacks in flight: " + str(sum));
......@@ -29,7 +29,7 @@ then
echo "Usage: $scriptname /path/to/old/run [ options ]"
exit 1
fi
if ! cp "$oldrun/batches" $T/batches.oldrun
if ! cp "$oldrun/scenarios" $T/scenarios.oldrun
then
# Later on, can reconstitute this from console.log files.
echo Prior run batches file does not exist: $oldrun/batches
......@@ -143,6 +143,8 @@ then
usage
fi
rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
touch "$rundir/log"
echo $scriptname $args | tee -a "$rundir/log"
echo $oldrun > "$rundir/re-run"
if ! test -d "$rundir/../../bin"
then
......@@ -165,22 +167,12 @@ done
grep '^#' $i | sed -e 's/^# //' > $T/qemu-cmd-settings
. $T/qemu-cmd-settings
grep -v '^#' $T/batches.oldrun | awk '
BEGIN {
oldbatch = 1;
}
grep -v '^#' $T/scenarios.oldrun | awk '
{
if (oldbatch != $1) {
print "kvm-test-1-run-batch.sh" curbatch;
curbatch = "";
oldbatch = $1;
}
curbatch = curbatch " " $2;
}
END {
print "kvm-test-1-run-batch.sh" curbatch
curbatch = "";
for (i = 2; i <= NF; i++)
curbatch = curbatch " " $i;
print "kvm-test-1-run-batch.sh" curbatch;
}' > $T/runbatches.sh
if test -n "$dryrun"
......@@ -188,12 +180,5 @@ then
echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log"
else
( cd "$rundir"; sh $T/runbatches.sh )
kcsan-collapse.sh "$rundir" | tee -a "$rundir/log"
echo | tee -a "$rundir/log"
echo ---- Results directory: $rundir | tee -a "$rundir/log"
kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1
ret=$?
cat $T/kvm-recheck.sh.out | tee -a "$rundir/log"
echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log"
exit $ret
kvm-end-run-stats.sh "$rundir" "$starttime"
fi
......@@ -40,8 +40,10 @@ if test $retval -gt 1
then
exit 2
fi
ncpus=`cpus2use.sh`
make -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
# Tell "make" to use double the number of real CPUs on the build system.
ncpus="`getconf _NPROCESSORS_ONLN`"
make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
retval=$?
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
then
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
# Check the status of the specified run.
#
# Usage: kvm-end-run-stats.sh /path/to/run starttime
#
# Copyright (C) 2021 Facebook, Inc.
#
# Authors: Paul E. McKenney <paulmck@kernel.org>
# scriptname=$0
# args="$*"
rundir="$1"
if ! test -d "$rundir"
then
echo kvm-end-run-stats.sh: Specified run directory does not exist: $rundir
exit 1
fi
T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$
trap 'rm -rf $T' 0
mkdir $T
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
PATH=${KVM}/bin:$PATH; export PATH
. functions.sh
default_starttime="`get_starttime`"
starttime="${2-default_starttime}"
echo | tee -a "$rundir/log"
echo | tee -a "$rundir/log"
echo " --- `date` Test summary:" | tee -a "$rundir/log"
echo Results directory: $rundir | tee -a "$rundir/log"
kcsan-collapse.sh "$rundir" | tee -a "$rundir/log"
kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1
ret=$?
cat $T/kvm-recheck.sh.out | tee -a "$rundir/log"
echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log"
exit $ret
......@@ -43,7 +43,7 @@ then
else
echo No build errors.
fi
if grep -q -e "--buildonly" < ${rundir}/log
if grep -q -e "--build-\?only" < ${rundir}/log && ! test -f "${rundir}/remote-log"
then
echo Build-only run, no console logs to check.
exit $editorret
......
......@@ -31,7 +31,7 @@ then
echo "$configfile ------- " $stopstate
else
title="$configfile ------- $ngps GPs"
dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
dur=`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//'`
if test -z "$dur"
then
:
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
# Run a series of tests on remote systems under KVM.
#
# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
# kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
#
# Copyright (C) 2021 Facebook, Inc.
#
# Authors: Paul E. McKenney <paulmck@kernel.org>
scriptname=$0
args="$*"
if ! test -d tools/testing/selftests/rcutorture/bin
then
echo $scriptname must be run from top-level directory of kernel source tree.
exit 1
fi
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
PATH=${KVM}/bin:$PATH; export PATH
. functions.sh
starttime="`get_starttime`"
systems="$1"
if test -z "$systems"
then
echo $scriptname: Empty list of systems will go nowhere good, giving up.
exit 1
fi
shift
# Pathnames:
# T: /tmp/kvm-remote.sh.$$
# resdir: /tmp/kvm-remote.sh.$$/res
# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix)
# oldrun: `pwd`/tools/testing/.../res/$otherds
#
# Pathname segments:
# TD: kvm-remote.sh.$$
# ds: yyyy.mm.dd-hh.mm.ss-remote
TD=kvm-remote.sh.$$
T=${TMPDIR-/tmp}/$TD
trap 'rm -rf $T' 0
mkdir $T
resdir="$T/res"
ds=`date +%Y.%m.%d-%H.%M.%S`-remote
rundir=$resdir/$ds
echo Results directory: $rundir
echo $scriptname $args
if echo $1 | grep -q '^--'
then
# Fresh build. Create a datestamp unless the caller supplied one.
datestamp="`echo "$@" | awk -v ds="$ds" '{
for (i = 1; i < NF; i++) {
if ($i == "--datestamp") {
ds = "";
break;
}
}
if (ds != "")
print "--datestamp " ds;
}'`"
kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
ret=$?
if test "$ret" -ne 0
then
echo $scriptname: kvm.sh failed exit code $?
cat $T/kvm.sh.out
exit 2
fi
oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
touch "$oldrun/remote-log"
echo $scriptname $args >> "$oldrun/remote-log"
echo | tee -a "$oldrun/remote-log"
echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
# We are going to run this, so remove the buildonly files.
rm -f "$oldrun"/*/buildonly
kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
ret=$?
if test "$ret" -ne 0
then
echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
exit 2
fi
else
# Re-use old run.
oldrun="$1"
if ! echo $oldrun | grep -q '^/'
then
oldrun="`pwd`/$oldrun"
fi
shift
touch "$oldrun/remote-log"
echo $scriptname $args >> "$oldrun/remote-log"
kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
ret=$?
if test "$ret" -ne 0
then
echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
exit 2
fi
cp -a "$rundir" "$KVM/res/"
oldrun="$KVM/res/$ds"
fi
echo | tee -a "$oldrun/remote-log"
echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
cat $T/kvm-again.sh.out
echo | tee -a "$oldrun/remote-log"
echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
# Create the kvm-remote-N.sh scripts in the bin directory.
awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
{
n = $1;
sub(/\./, "", n);
fn = dest "/kvm-remote-" n ".sh"
scenarios = "";
for (i = 2; i <= NF; i++)
scenarios = scenarios " " $i;
print "kvm-test-1-run-batch.sh" scenarios > fn;
print "rm " rundir "/remote.run" >> fn;
}'
chmod +x $T/bin/kvm-remote-*.sh
( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
# Check first to avoid the need for cleanup for system-name typos
for i in $systems
do
ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
ret=$?
if test "$ret" -ne 0
then
echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
exit 4 | tee -a "$oldrun/remote-log"
fi
done
# Download and expand the tarball on all systems.
for i in $systems
do
echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
ret=$?
if test "$ret" -ne 0
then
echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
exit 10 | tee -a "$oldrun/remote-log"
fi
done
# Function to check for presence of a file on the specified system.
# Complain if the system cannot be reached, and retry after a wait.
# Currently just waits forever if a machine disappears.
#
# Usage: checkremotefile system pathname
checkremotefile () {
local ret
local sleeptime=60
while :
do
ssh $1 "test -f \"$2\""
ret=$?
if test "$ret" -ne 255
then
return $ret
fi
echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
sleep $sleeptime
done
}
# Function to start batches on idle remote $systems
#
# Usage: startbatches curbatch nbatches
#
# Batches are numbered starting at 1. Returns the next batch to start.
# Be careful to redirect all debug output to FD 2 (stderr).
startbatches () {
local curbatch="$1"
local nbatches="$2"
local ret
# Each pass through the following loop examines one system.
for i in $systems
do
if test "$curbatch" -gt "$nbatches"
then
echo $((nbatches + 1))
return 0
fi
if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
then
continue # System still running last test, skip.
fi
ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
ret=$?
if test "$ret" -ne 0
then
echo ssh $i failed: exitcode $ret 1>&2
exit 11
fi
echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
curbatch=$((curbatch + 1))
done
echo $curbatch
}
# Launch all the scenarios.
nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
curbatch=1
while test "$curbatch" -le "$nbatches"
do
startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
curbatch="`cat $T/curbatch`"
if test -s "$T/startbatches.stderr"
then
cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
fi
if test "$curbatch" -le "$nbatches"
then
sleep 30
fi
done
echo All batches started. `date`
# Wait for all remaining scenarios to complete and collect results.
for i in $systems
do
while checkremotefile "$i" "$resdir/$ds/remote.run"
do
sleep 30
done
( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu_pid */qemu-retval; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
done
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
exit "`cat $T/exitcode`"
......@@ -20,6 +20,9 @@ mkdir $T
cd `dirname $scriptname`/../../../../../
# This script knows only English.
LANG=en_US.UTF-8; export LANG
dur=$((30*60))
dryrun=""
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
......@@ -41,6 +44,7 @@ TORTURE_KCONFIG_KASAN_ARG=""
TORTURE_KCONFIG_KCSAN_ARG=""
TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
TORTURE_REMOTE=
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
TORTURE_MOD=rcutorture
......@@ -64,7 +68,7 @@ usage () {
echo " --cpus N"
echo " --datestamp string"
echo " --defconfig string"
echo " --dryrun batches|sched|script"
echo " --dryrun batches|scenarios|sched|script"
echo " --duration minutes | <seconds>s | <hours>h | <days>d"
echo " --gdb"
echo " --help"
......@@ -77,6 +81,7 @@ usage () {
echo " --no-initrd"
echo " --qemu-args qemu-arguments"
echo " --qemu-cmd qemu-system-..."
echo " --remote"
echo " --results absolute-pathname"
echo " --torture lock|rcu|rcuscale|refscale|scf"
echo " --trust-make"
......@@ -112,10 +117,13 @@ do
checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
cpus=$2
TORTURE_ALLOTED_CPUS="$2"
max_cpus="`identify_qemu_vcpus`"
if test "$TORTURE_ALLOTED_CPUS" -gt "$max_cpus"
if test -z "$TORTURE_REMOTE"
then
TORTURE_ALLOTED_CPUS=$max_cpus
max_cpus="`identify_qemu_vcpus`"
if test "$TORTURE_ALLOTED_CPUS" -gt "$max_cpus"
then
TORTURE_ALLOTED_CPUS=$max_cpus
fi
fi
shift
;;
......@@ -130,7 +138,7 @@ do
shift
;;
--dryrun)
checkarg --dryrun "batches|sched|script" $# "$2" 'batches\|sched\|script' '^--'
checkarg --dryrun "batches|sched|script" $# "$2" 'batches\|scenarios\|sched\|script' '^--'
dryrun=$2
shift
;;
......@@ -206,6 +214,9 @@ do
TORTURE_QEMU_CMD="$2"
shift
;;
--remote)
TORTURE_REMOTE=1
;;
--results)
checkarg --results "(absolute pathname)" "$#" "$2" '^/' '^error'
resdir=$2
......@@ -550,20 +561,7 @@ END {
if (ncpus != 0)
dump(first, i, batchnum);
}' >> $T/script
cat << '___EOF___' >> $T/script
echo | tee -a $TORTURE_RESDIR/log
echo | tee -a $TORTURE_RESDIR/log
echo " --- `date` Test summary:" | tee -a $TORTURE_RESDIR/log
___EOF___
cat << ___EOF___ >> $T/script
echo Results directory: $resdir/$ds | tee -a $resdir/$ds/log
kcsan-collapse.sh $resdir/$ds | tee -a $resdir/$ds/log
kvm-recheck.sh $resdir/$ds > $T/kvm-recheck.sh.out 2>&1
___EOF___
echo 'ret=$?' >> $T/script
echo "cat $T/kvm-recheck.sh.out | tee -a $resdir/$ds/log" >> $T/script
echo 'exit $ret' >> $T/script
echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
# Extract the tests and their batches from the script.
egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
......@@ -577,6 +575,25 @@ egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
print batchno, $1, $2
}' > $T/batches
# As above, but one line per batch.
grep -v '^#' $T/batches | awk '
BEGIN {
oldbatch = 1;
}
{
if (oldbatch != $1) {
print ++n ". " curbatch;
curbatch = "";
oldbatch = $1;
}
curbatch = curbatch " " $2;
}
END {
print ++n ". " curbatch;
}' > $T/scenarios
if test "$dryrun" = script
then
cat $T/script
......@@ -597,13 +614,17 @@ elif test "$dryrun" = batches
then
cat $T/batches
exit 0
elif test "$dryrun" = scenarios
then
cat $T/scenarios
exit 0
else
# Not a dryrun. Record the batches and the number of CPUs, then run the script.
bash $T/script
ret=$?
cp $T/batches $resdir/$ds/batches
cp $T/scenarios $resdir/$ds/scenarios
echo '#' cpus=$cpus >> $resdir/$ds/batches
echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a $resdir/$ds/log
exit $ret
fi
......
......@@ -302,7 +302,7 @@ function torture_set {
kcsan_kmake_tag="--kmake-args"
cur_kcsan_kmake_args="$kcsan_kmake_args"
fi
torture_one $* --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
fi
}
......
CONFIG_SMP=y
CONFIG_NR_CPUS=16
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
#CHECK#CONFIG_PREEMPT_RCU=y
CONFIG_HZ_PERIODIC=y
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
CONFIG_RCU_FANOUT=2
CONFIG_RCU_FANOUT_LEAF=2
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
rcutorture.test_boost=2
rcutorture.stutter=0
rcutree.gp_preinit_delay=12
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcutree.kthread_prio=2
threadirqs
tree.use_softirq=0
......@@ -7,7 +7,7 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_HOTPLUG_CPU=n
CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
......
......@@ -8,7 +8,7 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_HOTPLUG_CPU=n
CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
......
......@@ -7,7 +7,7 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_HOTPLUG_CPU=n
CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
......
......@@ -7,7 +7,7 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_HOTPLUG_CPU=n
CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
......
......@@ -174,7 +174,7 @@ static inline bool spin_trylock(spinlock_t *lock)
}
struct completion {
/* Hopefuly this won't overflow. */
/* Hopefully this won't overflow. */
unsigned int count;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment