Commit 87492c06 authored by Paul E. McKenney's avatar Paul E. McKenney

Merge branches 'doc.2022.10.20a', 'fixes.2022.10.21a', 'lazy.2022.11.30a',...

Merge branches 'doc.2022.10.20a', 'fixes.2022.10.21a', 'lazy.2022.11.30a', 'srcunmisafe.2022.11.09a', 'torture.2022.10.18c' and 'torturescript.2022.10.20a' into HEAD

doc.2022.10.20a: Documentation updates.
fixes.2022.10.21a: Miscellaneous fixes.
lazy.2022.11.30a: Lazy call_rcu() and NOCB updates.
srcunmisafe.2022.11.09a: NMI-safe SRCU readers.
torture.2022.10.18c: Torture-test updates.
torturescript.2022.10.20a: Torture-test scripting updates.
...@@ -468,6 +468,9 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM ...@@ -468,6 +468,9 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
config ARCH_HAVE_NMI_SAFE_CMPXCHG config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool bool
config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
bool
config HAVE_ALIGNED_STRUCT_PAGE config HAVE_ALIGNED_STRUCT_PAGE
bool bool
help help
......
...@@ -31,6 +31,7 @@ config ARM64 ...@@ -31,6 +31,7 @@ config ARM64
select ARCH_HAS_KCOV select ARCH_HAS_KCOV
select ARCH_HAS_KEEPINITRD select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_DEVMAP
select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_PTE_SPECIAL
......
...@@ -10,6 +10,7 @@ config LOONGARCH ...@@ -10,6 +10,7 @@ config LOONGARCH
select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK if !PREEMPTION
......
...@@ -73,6 +73,7 @@ config S390 ...@@ -73,6 +73,7 @@ config S390
select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV select ARCH_HAS_KCOV
select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SCALED_CPUTIME
select ARCH_HAS_SET_MEMORY select ARCH_HAS_SET_MEMORY
......
...@@ -81,6 +81,7 @@ config X86 ...@@ -81,6 +81,7 @@ config X86
select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64
......
...@@ -312,7 +312,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) ...@@ -312,7 +312,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
* Ensure that all tasks observe the host state change before the * Ensure that all tasks observe the host state change before the
* host_failed change. * host_failed change.
*/ */
call_rcu(&scmd->rcu, scsi_eh_inc_host_failed); call_rcu_hurry(&scmd->rcu, scsi_eh_inc_host_failed);
} }
/** /**
......
...@@ -416,7 +416,7 @@ static __always_inline void guest_context_enter_irqoff(void) ...@@ -416,7 +416,7 @@ static __always_inline void guest_context_enter_irqoff(void)
*/ */
if (!context_tracking_guest_enter()) { if (!context_tracking_guest_enter()) {
instrumentation_begin(); instrumentation_begin();
rcu_virt_note_context_switch(smp_processor_id()); rcu_virt_note_context_switch();
instrumentation_end(); instrumentation_end();
} }
} }
......
...@@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void) ...@@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_RCU_LAZY
void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func);
#else
static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
{
call_rcu(head, func);
}
#endif
/* Internal to kernel */ /* Internal to kernel */
void rcu_init(void); void rcu_init(void);
extern int rcu_scheduler_active; extern int rcu_scheduler_active;
...@@ -340,6 +349,11 @@ static inline int rcu_read_lock_any_held(void) ...@@ -340,6 +349,11 @@ static inline int rcu_read_lock_any_held(void)
return !preemptible(); return !preemptible();
} }
static inline int debug_lockdep_rcu_enabled(void)
{
return 0;
}
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_PROVE_RCU #ifdef CONFIG_PROVE_RCU
......
...@@ -142,12 +142,10 @@ static inline int rcu_needs_cpu(void) ...@@ -142,12 +142,10 @@ static inline int rcu_needs_cpu(void)
* Take advantage of the fact that there is only one CPU, which * Take advantage of the fact that there is only one CPU, which
* allows us to ignore virtualization-based context switches. * allows us to ignore virtualization-based context switches.
*/ */
static inline void rcu_virt_note_context_switch(int cpu) { } static inline void rcu_virt_note_context_switch(void) { }
static inline void rcu_cpu_stall_reset(void) { } static inline void rcu_cpu_stall_reset(void) { }
static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; }
static inline void rcu_irq_exit_check_preempt(void) { } static inline void rcu_irq_exit_check_preempt(void) { }
#define rcu_is_idle_cpu(cpu) \
(is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq())
static inline void exit_rcu(void) { } static inline void exit_rcu(void) { }
static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{ {
......
...@@ -27,7 +27,7 @@ void rcu_cpu_stall_reset(void); ...@@ -27,7 +27,7 @@ void rcu_cpu_stall_reset(void);
* wrapper around rcu_note_context_switch(), which allows TINY_RCU * wrapper around rcu_note_context_switch(), which allows TINY_RCU
* to save a few bytes. The caller must have disabled interrupts. * to save a few bytes. The caller must have disabled interrupts.
*/ */
static inline void rcu_virt_note_context_switch(int cpu) static inline void rcu_virt_note_context_switch(void)
{ {
rcu_note_context_switch(false); rcu_note_context_switch(false);
} }
...@@ -87,8 +87,6 @@ bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); ...@@ -87,8 +87,6 @@ bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
void cond_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu(unsigned long oldstate);
void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
bool rcu_is_idle_cpu(int cpu);
#ifdef CONFIG_PROVE_RCU #ifdef CONFIG_PROVE_RCU
void rcu_irq_exit_check_preempt(void); void rcu_irq_exit_check_preempt(void);
#else #else
......
...@@ -76,6 +76,17 @@ ...@@ -76,6 +76,17 @@
* rcu_read_lock before reading the address, then rcu_read_unlock after * rcu_read_lock before reading the address, then rcu_read_unlock after
* taking the spinlock within the structure expected at that address. * taking the spinlock within the structure expected at that address.
* *
* Note that it is not possible to acquire a lock within a structure
* allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference
* as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages
* are not zeroed before being given to the slab, which means that any
* locks must be initialized after each and every kmem_struct_alloc().
* Alternatively, make the ctor passed to kmem_cache_create() initialize
* the locks at page-allocation time, as is done in __i915_request_ctor(),
* sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers
* to safely acquire those ctor-initialized locks under rcu_read_lock()
* protection.
*
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
*/ */
/* Defer freeing slabs to RCU */ /* Defer freeing slabs to RCU */
......
...@@ -64,6 +64,20 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); ...@@ -64,6 +64,20 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
#ifdef CONFIG_NEED_SRCU_NMI_SAFE
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
#else
static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
{
return __srcu_read_lock(ssp);
}
static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
{
__srcu_read_unlock(ssp, idx);
}
#endif /* CONFIG_NEED_SRCU_NMI_SAFE */
#ifdef CONFIG_SRCU #ifdef CONFIG_SRCU
void srcu_init(void); void srcu_init(void);
#else /* #ifdef CONFIG_SRCU */ #else /* #ifdef CONFIG_SRCU */
...@@ -104,6 +118,18 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) ...@@ -104,6 +118,18 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#define SRCU_NMI_UNKNOWN 0x0
#define SRCU_NMI_UNSAFE 0x1
#define SRCU_NMI_SAFE 0x2
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe);
#else
static inline void srcu_check_nmi_safety(struct srcu_struct *ssp,
bool nmi_safe) { }
#endif
/** /**
* srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
* @p: the pointer to fetch and protect for later dereferencing * @p: the pointer to fetch and protect for later dereferencing
...@@ -161,17 +187,36 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) ...@@ -161,17 +187,36 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
{ {
int retval; int retval;
srcu_check_nmi_safety(ssp, false);
retval = __srcu_read_lock(ssp); retval = __srcu_read_lock(ssp);
rcu_lock_acquire(&(ssp)->dep_map); rcu_lock_acquire(&(ssp)->dep_map);
return retval; return retval;
} }
/**
* srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
*
* Enter an SRCU read-side critical section, but in an NMI-safe manner.
* See srcu_read_lock() for more information.
*/
static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
srcu_check_nmi_safety(ssp, true);
retval = __srcu_read_lock_nmisafe(ssp);
rcu_lock_acquire(&(ssp)->dep_map);
return retval;
}
/* Used by tracing, cannot be traced and cannot invoke lockdep. */ /* Used by tracing, cannot be traced and cannot invoke lockdep. */
static inline notrace int static inline notrace int
srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
{ {
int retval; int retval;
srcu_check_nmi_safety(ssp, false);
retval = __srcu_read_lock(ssp); retval = __srcu_read_lock(ssp);
return retval; return retval;
} }
...@@ -187,14 +232,32 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) ...@@ -187,14 +232,32 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
__releases(ssp) __releases(ssp)
{ {
WARN_ON_ONCE(idx & ~0x1); WARN_ON_ONCE(idx & ~0x1);
srcu_check_nmi_safety(ssp, false);
rcu_lock_release(&(ssp)->dep_map); rcu_lock_release(&(ssp)->dep_map);
__srcu_read_unlock(ssp, idx); __srcu_read_unlock(ssp, idx);
} }
/**
* srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
* @idx: return value from corresponding srcu_read_lock().
*
* Exit an SRCU read-side critical section, but in an NMI-safe manner.
*/
static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
__releases(ssp)
{
WARN_ON_ONCE(idx & ~0x1);
srcu_check_nmi_safety(ssp, true);
rcu_lock_release(&(ssp)->dep_map);
__srcu_read_unlock_nmisafe(ssp, idx);
}
/* Used by tracing, cannot be traced and cannot call lockdep. */ /* Used by tracing, cannot be traced and cannot call lockdep. */
static inline notrace void static inline notrace void
srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
{ {
srcu_check_nmi_safety(ssp, false);
__srcu_read_unlock(ssp, idx); __srcu_read_unlock(ssp, idx);
} }
......
...@@ -23,8 +23,9 @@ struct srcu_struct; ...@@ -23,8 +23,9 @@ struct srcu_struct;
*/ */
struct srcu_data { struct srcu_data {
/* Read-side state. */ /* Read-side state. */
unsigned long srcu_lock_count[2]; /* Locks per CPU. */ atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */
unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */
int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */
/* Update-side state. */ /* Update-side state. */
spinlock_t __private lock ____cacheline_internodealigned_in_smp; spinlock_t __private lock ____cacheline_internodealigned_in_smp;
......
...@@ -72,6 +72,9 @@ config TREE_SRCU ...@@ -72,6 +72,9 @@ config TREE_SRCU
help help
This option selects the full-fledged version of SRCU. This option selects the full-fledged version of SRCU.
config NEED_SRCU_NMI_SAFE
def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU
config TASKS_RCU_GENERIC config TASKS_RCU_GENERIC
def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
select SRCU select SRCU
...@@ -311,4 +314,12 @@ config TASKS_TRACE_RCU_READ_MB ...@@ -311,4 +314,12 @@ config TASKS_TRACE_RCU_READ_MB
Say N here if you hate read-side memory barriers. Say N here if you hate read-side memory barriers.
Take the default if you are unsure. Take the default if you are unsure.
config RCU_LAZY
bool "RCU callback lazy invocation functionality"
depends on RCU_NOCB_CPU
default n
help
To save power, batch RCU callbacks and flush after delay, memory
pressure, or callback list growing too big.
endmenu # "RCU Subsystem" endmenu # "RCU Subsystem"
...@@ -474,6 +474,14 @@ enum rcutorture_type { ...@@ -474,6 +474,14 @@ enum rcutorture_type {
INVALID_RCU_FLAVOR INVALID_RCU_FLAVOR
}; };
#if defined(CONFIG_RCU_LAZY)
unsigned long rcu_lazy_get_jiffies_till_flush(void);
void rcu_lazy_set_jiffies_till_flush(unsigned long j);
#else
static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; }
static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { }
#endif
#if defined(CONFIG_TREE_RCU) #if defined(CONFIG_TREE_RCU)
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
unsigned long *gp_seq); unsigned long *gp_seq);
......
...@@ -95,6 +95,7 @@ torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); ...@@ -95,6 +95,7 @@ torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?"); torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate."); torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
static char *scale_type = "rcu"; static char *scale_type = "rcu";
module_param(scale_type, charp, 0444); module_param(scale_type, charp, 0444);
...@@ -175,7 +176,7 @@ static struct rcu_scale_ops rcu_ops = { ...@@ -175,7 +176,7 @@ static struct rcu_scale_ops rcu_ops = {
.get_gp_seq = rcu_get_gp_seq, .get_gp_seq = rcu_get_gp_seq,
.gp_diff = rcu_seq_diff, .gp_diff = rcu_seq_diff,
.exp_completed = rcu_exp_batches_completed, .exp_completed = rcu_exp_batches_completed,
.async = call_rcu, .async = call_rcu_hurry,
.gp_barrier = rcu_barrier, .gp_barrier = rcu_barrier,
.sync = synchronize_rcu, .sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited, .exp_sync = synchronize_rcu_expedited,
...@@ -659,6 +660,14 @@ struct kfree_obj { ...@@ -659,6 +660,14 @@ struct kfree_obj {
struct rcu_head rh; struct rcu_head rh;
}; };
/* Used if doing RCU-kfree'ing via call_rcu(). */
static void kfree_call_rcu(struct rcu_head *rh)
{
struct kfree_obj *obj = container_of(rh, struct kfree_obj, rh);
kfree(obj);
}
static int static int
kfree_scale_thread(void *arg) kfree_scale_thread(void *arg)
{ {
...@@ -696,6 +705,11 @@ kfree_scale_thread(void *arg) ...@@ -696,6 +705,11 @@ kfree_scale_thread(void *arg)
if (!alloc_ptr) if (!alloc_ptr)
return -ENOMEM; return -ENOMEM;
if (kfree_by_call_rcu) {
call_rcu(&(alloc_ptr->rh), kfree_call_rcu);
continue;
}
// By default kfree_rcu_test_single and kfree_rcu_test_double are // By default kfree_rcu_test_single and kfree_rcu_test_double are
// initialized to false. If both have the same value (false or true) // initialized to false. If both have the same value (false or true)
// both are randomly tested, otherwise only the one with value true // both are randomly tested, otherwise only the one with value true
...@@ -767,11 +781,58 @@ kfree_scale_shutdown(void *arg) ...@@ -767,11 +781,58 @@ kfree_scale_shutdown(void *arg)
return -EINVAL; return -EINVAL;
} }
// Used if doing RCU-kfree'ing via call_rcu().
static unsigned long jiffies_at_lazy_cb;
static struct rcu_head lazy_test1_rh;
static int rcu_lazy_test1_cb_called;
static void call_rcu_lazy_test1(struct rcu_head *rh)
{
jiffies_at_lazy_cb = jiffies;
WRITE_ONCE(rcu_lazy_test1_cb_called, 1);
}
static int __init static int __init
kfree_scale_init(void) kfree_scale_init(void)
{ {
long i;
int firsterr = 0; int firsterr = 0;
long i;
unsigned long jif_start;
unsigned long orig_jif;
// Also, do a quick self-test to ensure laziness is as much as
// expected.
if (kfree_by_call_rcu && !IS_ENABLED(CONFIG_RCU_LAZY)) {
pr_alert("CONFIG_RCU_LAZY is disabled, falling back to kfree_rcu() for delayed RCU kfree'ing\n");
kfree_by_call_rcu = 0;
}
if (kfree_by_call_rcu) {
/* do a test to check the timeout. */
orig_jif = rcu_lazy_get_jiffies_till_flush();
rcu_lazy_set_jiffies_till_flush(2 * HZ);
rcu_barrier();
jif_start = jiffies;
jiffies_at_lazy_cb = 0;
call_rcu(&lazy_test1_rh, call_rcu_lazy_test1);
smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1);
rcu_lazy_set_jiffies_till_flush(orig_jif);
if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) {
pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n");
WARN_ON_ONCE(1);
return -1;
}
if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start > 3 * HZ)) {
pr_alert("ERROR: call_rcu() CBs are being too lazy!\n");
WARN_ON_ONCE(1);
return -1;
}
}
kfree_nrealthreads = compute_real(kfree_nthreads); kfree_nrealthreads = compute_real(kfree_nthreads);
/* Start up the kthreads. */ /* Start up the kthreads. */
...@@ -784,7 +845,9 @@ kfree_scale_init(void) ...@@ -784,7 +845,9 @@ kfree_scale_init(void)
schedule_timeout_uninterruptible(1); schedule_timeout_uninterruptible(1);
} }
pr_alert("kfree object size=%zu\n", kfree_mult * sizeof(struct kfree_obj)); pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n",
kfree_mult * sizeof(struct kfree_obj),
kfree_by_call_rcu);
kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]), kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
GFP_KERNEL); GFP_KERNEL);
......
...@@ -357,6 +357,10 @@ struct rcu_torture_ops { ...@@ -357,6 +357,10 @@ struct rcu_torture_ops {
bool (*poll_gp_state_exp)(unsigned long oldstate); bool (*poll_gp_state_exp)(unsigned long oldstate);
void (*cond_sync_exp)(unsigned long oldstate); void (*cond_sync_exp)(unsigned long oldstate);
void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp); void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp);
unsigned long (*get_comp_state)(void);
void (*get_comp_state_full)(struct rcu_gp_oldstate *rgosp);
bool (*same_gp_state)(unsigned long oldstate1, unsigned long oldstate2);
bool (*same_gp_state_full)(struct rcu_gp_oldstate *rgosp1, struct rcu_gp_oldstate *rgosp2);
unsigned long (*get_gp_state)(void); unsigned long (*get_gp_state)(void);
void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp); void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp);
unsigned long (*get_gp_completed)(void); unsigned long (*get_gp_completed)(void);
...@@ -510,7 +514,7 @@ static unsigned long rcu_no_completed(void) ...@@ -510,7 +514,7 @@ static unsigned long rcu_no_completed(void)
static void rcu_torture_deferred_free(struct rcu_torture *p) static void rcu_torture_deferred_free(struct rcu_torture *p)
{ {
call_rcu(&p->rtort_rcu, rcu_torture_cb); call_rcu_hurry(&p->rtort_rcu, rcu_torture_cb);
} }
static void rcu_sync_torture_init(void) static void rcu_sync_torture_init(void)
...@@ -535,6 +539,10 @@ static struct rcu_torture_ops rcu_ops = { ...@@ -535,6 +539,10 @@ static struct rcu_torture_ops rcu_ops = {
.deferred_free = rcu_torture_deferred_free, .deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu, .sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited, .exp_sync = synchronize_rcu_expedited,
.same_gp_state = same_state_synchronize_rcu,
.same_gp_state_full = same_state_synchronize_rcu_full,
.get_comp_state = get_completed_synchronize_rcu,
.get_comp_state_full = get_completed_synchronize_rcu_full,
.get_gp_state = get_state_synchronize_rcu, .get_gp_state = get_state_synchronize_rcu,
.get_gp_state_full = get_state_synchronize_rcu_full, .get_gp_state_full = get_state_synchronize_rcu_full,
.get_gp_completed = get_completed_synchronize_rcu, .get_gp_completed = get_completed_synchronize_rcu,
...@@ -551,7 +559,7 @@ static struct rcu_torture_ops rcu_ops = { ...@@ -551,7 +559,7 @@ static struct rcu_torture_ops rcu_ops = {
.start_gp_poll_exp_full = start_poll_synchronize_rcu_expedited_full, .start_gp_poll_exp_full = start_poll_synchronize_rcu_expedited_full,
.poll_gp_state_exp = poll_state_synchronize_rcu, .poll_gp_state_exp = poll_state_synchronize_rcu,
.cond_sync_exp = cond_synchronize_rcu_expedited, .cond_sync_exp = cond_synchronize_rcu_expedited,
.call = call_rcu, .call = call_rcu_hurry,
.cb_barrier = rcu_barrier, .cb_barrier = rcu_barrier,
.fqs = rcu_force_quiescent_state, .fqs = rcu_force_quiescent_state,
.stats = NULL, .stats = NULL,
...@@ -615,9 +623,13 @@ static struct rcu_torture_ops rcu_busted_ops = { ...@@ -615,9 +623,13 @@ static struct rcu_torture_ops rcu_busted_ops = {
DEFINE_STATIC_SRCU(srcu_ctl); DEFINE_STATIC_SRCU(srcu_ctl);
static struct srcu_struct srcu_ctld; static struct srcu_struct srcu_ctld;
static struct srcu_struct *srcu_ctlp = &srcu_ctl; static struct srcu_struct *srcu_ctlp = &srcu_ctl;
static struct rcu_torture_ops srcud_ops;
static int srcu_torture_read_lock(void) __acquires(srcu_ctlp) static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
{ {
if (cur_ops == &srcud_ops)
return srcu_read_lock_nmisafe(srcu_ctlp);
else
return srcu_read_lock(srcu_ctlp); return srcu_read_lock(srcu_ctlp);
} }
...@@ -642,6 +654,9 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) ...@@ -642,6 +654,9 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
{ {
if (cur_ops == &srcud_ops)
srcu_read_unlock_nmisafe(srcu_ctlp, idx);
else
srcu_read_unlock(srcu_ctlp, idx); srcu_read_unlock(srcu_ctlp, idx);
} }
...@@ -848,7 +863,7 @@ static void rcu_tasks_torture_deferred_free(struct rcu_torture *p) ...@@ -848,7 +863,7 @@ static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
static void synchronize_rcu_mult_test(void) static void synchronize_rcu_mult_test(void)
{ {
synchronize_rcu_mult(call_rcu_tasks, call_rcu); synchronize_rcu_mult(call_rcu_tasks, call_rcu_hurry);
} }
static struct rcu_torture_ops tasks_ops = { static struct rcu_torture_ops tasks_ops = {
...@@ -1258,13 +1273,15 @@ static void rcu_torture_write_types(void) ...@@ -1258,13 +1273,15 @@ static void rcu_torture_write_types(void)
} else if (gp_normal && !cur_ops->deferred_free) { } else if (gp_normal && !cur_ops->deferred_free) {
pr_alert("%s: gp_normal without primitives.\n", __func__); pr_alert("%s: gp_normal without primitives.\n", __func__);
} }
if (gp_poll1 && cur_ops->start_gp_poll && cur_ops->poll_gp_state) { if (gp_poll1 && cur_ops->get_comp_state && cur_ops->same_gp_state &&
cur_ops->start_gp_poll && cur_ops->poll_gp_state) {
synctype[nsynctypes++] = RTWS_POLL_GET; synctype[nsynctypes++] = RTWS_POLL_GET;
pr_info("%s: Testing polling GPs.\n", __func__); pr_info("%s: Testing polling GPs.\n", __func__);
} else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) { } else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) {
pr_alert("%s: gp_poll without primitives.\n", __func__); pr_alert("%s: gp_poll without primitives.\n", __func__);
} }
if (gp_poll_full1 && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) { if (gp_poll_full1 && cur_ops->get_comp_state_full && cur_ops->same_gp_state_full
&& cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) {
synctype[nsynctypes++] = RTWS_POLL_GET_FULL; synctype[nsynctypes++] = RTWS_POLL_GET_FULL;
pr_info("%s: Testing polling full-state GPs.\n", __func__); pr_info("%s: Testing polling full-state GPs.\n", __func__);
} else if (gp_poll_full && (!cur_ops->start_gp_poll_full || !cur_ops->poll_gp_state_full)) { } else if (gp_poll_full && (!cur_ops->start_gp_poll_full || !cur_ops->poll_gp_state_full)) {
...@@ -1339,14 +1356,18 @@ rcu_torture_writer(void *arg) ...@@ -1339,14 +1356,18 @@ rcu_torture_writer(void *arg)
struct rcu_gp_oldstate cookie_full; struct rcu_gp_oldstate cookie_full;
int expediting = 0; int expediting = 0;
unsigned long gp_snap; unsigned long gp_snap;
unsigned long gp_snap1;
struct rcu_gp_oldstate gp_snap_full; struct rcu_gp_oldstate gp_snap_full;
struct rcu_gp_oldstate gp_snap1_full;
int i; int i;
int idx; int idx;
int oldnice = task_nice(current); int oldnice = task_nice(current);
struct rcu_gp_oldstate rgo[NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE];
struct rcu_torture *rp; struct rcu_torture *rp;
struct rcu_torture *old_rp; struct rcu_torture *old_rp;
static DEFINE_TORTURE_RANDOM(rand); static DEFINE_TORTURE_RANDOM(rand);
bool stutter_waited; bool stutter_waited;
unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE];
VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
if (!can_expedite) if (!can_expedite)
...@@ -1463,20 +1484,43 @@ rcu_torture_writer(void *arg) ...@@ -1463,20 +1484,43 @@ rcu_torture_writer(void *arg)
break; break;
case RTWS_POLL_GET: case RTWS_POLL_GET:
rcu_torture_writer_state = RTWS_POLL_GET; rcu_torture_writer_state = RTWS_POLL_GET;
for (i = 0; i < ARRAY_SIZE(ulo); i++)
ulo[i] = cur_ops->get_comp_state();
gp_snap = cur_ops->start_gp_poll(); gp_snap = cur_ops->start_gp_poll();
rcu_torture_writer_state = RTWS_POLL_WAIT; rcu_torture_writer_state = RTWS_POLL_WAIT;
while (!cur_ops->poll_gp_state(gp_snap)) while (!cur_ops->poll_gp_state(gp_snap)) {
gp_snap1 = cur_ops->get_gp_state();
for (i = 0; i < ARRAY_SIZE(ulo); i++)
if (cur_ops->poll_gp_state(ulo[i]) ||
cur_ops->same_gp_state(ulo[i], gp_snap1)) {
ulo[i] = gp_snap1;
break;
}
WARN_ON_ONCE(i >= ARRAY_SIZE(ulo));
torture_hrtimeout_jiffies(torture_random(&rand) % 16, torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand); &rand);
}
rcu_torture_pipe_update(old_rp); rcu_torture_pipe_update(old_rp);
break; break;
case RTWS_POLL_GET_FULL: case RTWS_POLL_GET_FULL:
rcu_torture_writer_state = RTWS_POLL_GET_FULL; rcu_torture_writer_state = RTWS_POLL_GET_FULL;
for (i = 0; i < ARRAY_SIZE(rgo); i++)
cur_ops->get_comp_state_full(&rgo[i]);
cur_ops->start_gp_poll_full(&gp_snap_full); cur_ops->start_gp_poll_full(&gp_snap_full);
rcu_torture_writer_state = RTWS_POLL_WAIT_FULL; rcu_torture_writer_state = RTWS_POLL_WAIT_FULL;
while (!cur_ops->poll_gp_state_full(&gp_snap_full)) while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
cur_ops->get_gp_state_full(&gp_snap1_full);
for (i = 0; i < ARRAY_SIZE(rgo); i++)
if (cur_ops->poll_gp_state_full(&rgo[i]) ||
cur_ops->same_gp_state_full(&rgo[i],
&gp_snap1_full)) {
rgo[i] = gp_snap1_full;
break;
}
WARN_ON_ONCE(i >= ARRAY_SIZE(rgo));
torture_hrtimeout_jiffies(torture_random(&rand) % 16, torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand); &rand);
}
rcu_torture_pipe_update(old_rp); rcu_torture_pipe_update(old_rp);
break; break;
case RTWS_POLL_GET_EXP: case RTWS_POLL_GET_EXP:
...@@ -3388,13 +3432,13 @@ static void rcu_test_debug_objects(void) ...@@ -3388,13 +3432,13 @@ static void rcu_test_debug_objects(void)
/* Try to queue the rh2 pair of callbacks for the same grace period. */ /* Try to queue the rh2 pair of callbacks for the same grace period. */
preempt_disable(); /* Prevent preemption from interrupting test. */ preempt_disable(); /* Prevent preemption from interrupting test. */
rcu_read_lock(); /* Make it impossible to finish a grace period. */ rcu_read_lock(); /* Make it impossible to finish a grace period. */
call_rcu(&rh1, rcu_torture_leak_cb); /* Start grace period. */ call_rcu_hurry(&rh1, rcu_torture_leak_cb); /* Start grace period. */
local_irq_disable(); /* Make it harder to start a new grace period. */ local_irq_disable(); /* Make it harder to start a new grace period. */
call_rcu(&rh2, rcu_torture_leak_cb); call_rcu_hurry(&rh2, rcu_torture_leak_cb);
call_rcu(&rh2, rcu_torture_err_cb); /* Duplicate callback. */ call_rcu_hurry(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
if (rhp) { if (rhp) {
call_rcu(rhp, rcu_torture_leak_cb); call_rcu_hurry(rhp, rcu_torture_leak_cb);
call_rcu(rhp, rcu_torture_err_cb); /* Another duplicate callback. */ call_rcu_hurry(rhp, rcu_torture_err_cb); /* Another duplicate callback. */
} }
local_irq_enable(); local_irq_enable();
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -417,7 +417,7 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) ...@@ -417,7 +417,7 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
sum += READ_ONCE(cpuc->srcu_lock_count[idx]); sum += atomic_long_read(&cpuc->srcu_lock_count[idx]);
} }
return sum; return sum;
} }
...@@ -429,13 +429,18 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) ...@@ -429,13 +429,18 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
{ {
int cpu; int cpu;
unsigned long mask = 0;
unsigned long sum = 0; unsigned long sum = 0;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); sum += atomic_long_read(&cpuc->srcu_unlock_count[idx]);
if (IS_ENABLED(CONFIG_PROVE_RCU))
mask = mask | READ_ONCE(cpuc->srcu_nmi_safety);
} }
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask >> 1)),
"Mixed NMI-safe readers for srcu_struct at %ps.\n", ssp);
return sum; return sum;
} }
...@@ -503,10 +508,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp) ...@@ -503,10 +508,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
sum += READ_ONCE(cpuc->srcu_lock_count[0]); sum += atomic_long_read(&cpuc->srcu_lock_count[0]);
sum += READ_ONCE(cpuc->srcu_lock_count[1]); sum += atomic_long_read(&cpuc->srcu_lock_count[1]);
sum -= READ_ONCE(cpuc->srcu_unlock_count[0]); sum -= atomic_long_read(&cpuc->srcu_unlock_count[0]);
sum -= READ_ONCE(cpuc->srcu_unlock_count[1]); sum -= atomic_long_read(&cpuc->srcu_unlock_count[1]);
} }
return sum; return sum;
} }
...@@ -626,6 +631,29 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) ...@@ -626,6 +631,29 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
} }
EXPORT_SYMBOL_GPL(cleanup_srcu_struct); EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
#ifdef CONFIG_PROVE_RCU
/*
* Check for consistent NMI safety.
*/
void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe)
{
int nmi_safe_mask = 1 << nmi_safe;
int old_nmi_safe_mask;
struct srcu_data *sdp;
/* NMI-unsafe use in NMI is a bad sign */
WARN_ON_ONCE(!nmi_safe && in_nmi());
sdp = raw_cpu_ptr(ssp->sda);
old_nmi_safe_mask = READ_ONCE(sdp->srcu_nmi_safety);
if (!old_nmi_safe_mask) {
WRITE_ONCE(sdp->srcu_nmi_safety, nmi_safe_mask);
return;
}
WARN_ONCE(old_nmi_safe_mask != nmi_safe_mask, "CPU %d old state %d new state %d\n", sdp->cpu, old_nmi_safe_mask, nmi_safe_mask);
}
EXPORT_SYMBOL_GPL(srcu_check_nmi_safety);
#endif /* CONFIG_PROVE_RCU */
/* /*
* Counts the new reader in the appropriate per-CPU element of the * Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. * srcu_struct.
...@@ -636,7 +664,7 @@ int __srcu_read_lock(struct srcu_struct *ssp) ...@@ -636,7 +664,7 @@ int __srcu_read_lock(struct srcu_struct *ssp)
int idx; int idx;
idx = READ_ONCE(ssp->srcu_idx) & 0x1; idx = READ_ONCE(ssp->srcu_idx) & 0x1;
this_cpu_inc(ssp->sda->srcu_lock_count[idx]); this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter);
smp_mb(); /* B */ /* Avoid leaking the critical section. */ smp_mb(); /* B */ /* Avoid leaking the critical section. */
return idx; return idx;
} }
...@@ -650,10 +678,45 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); ...@@ -650,10 +678,45 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
{ {
smp_mb(); /* C */ /* Avoid leaking the critical section. */ smp_mb(); /* C */ /* Avoid leaking the critical section. */
this_cpu_inc(ssp->sda->srcu_unlock_count[idx]); this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);
} }
EXPORT_SYMBOL_GPL(__srcu_read_unlock); EXPORT_SYMBOL_GPL(__srcu_read_unlock);
#ifdef CONFIG_NEED_SRCU_NMI_SAFE
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct, but in an NMI-safe manner using RMW atomics.
* Returns an index that must be passed to the matching srcu_read_unlock().
*/
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
{
int idx;
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
idx = READ_ONCE(ssp->srcu_idx) & 0x1;
atomic_long_inc(&sdp->srcu_lock_count[idx]);
smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */
return idx;
}
EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
/*
* Removes the count for the old reader from the appropriate per-CPU
* element of the srcu_struct. Note that this may well be a different
* CPU than that which was incremented by the corresponding srcu_read_lock().
*/
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
{
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */
atomic_long_inc(&sdp->srcu_unlock_count[idx]);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
#endif // CONFIG_NEED_SRCU_NMI_SAFE
/* /*
* Start an SRCU grace period. * Start an SRCU grace period.
*/ */
...@@ -1090,7 +1153,12 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, ...@@ -1090,7 +1153,12 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
int ss_state; int ss_state;
check_init_srcu_struct(ssp); check_init_srcu_struct(ssp);
idx = srcu_read_lock(ssp); /*
* While starting a new grace period, make sure we are in an
* SRCU read-side critical section so that the grace-period
* sequence number cannot wrap around in the meantime.
*/
idx = __srcu_read_lock_nmisafe(ssp);
ss_state = smp_load_acquire(&ssp->srcu_size_state); ss_state = smp_load_acquire(&ssp->srcu_size_state);
if (ss_state < SRCU_SIZE_WAIT_CALL) if (ss_state < SRCU_SIZE_WAIT_CALL)
sdp = per_cpu_ptr(ssp->sda, 0); sdp = per_cpu_ptr(ssp->sda, 0);
...@@ -1123,7 +1191,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, ...@@ -1123,7 +1191,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
srcu_funnel_gp_start(ssp, sdp, s, do_norm); srcu_funnel_gp_start(ssp, sdp, s, do_norm);
else if (needexp) else if (needexp)
srcu_funnel_exp_start(ssp, sdp_mynode, s); srcu_funnel_exp_start(ssp, sdp_mynode, s);
srcu_read_unlock(ssp, idx); __srcu_read_unlock_nmisafe(ssp, idx);
return s; return s;
} }
...@@ -1427,13 +1495,13 @@ void srcu_barrier(struct srcu_struct *ssp) ...@@ -1427,13 +1495,13 @@ void srcu_barrier(struct srcu_struct *ssp)
/* Initial count prevents reaching zero until all CBs are posted. */ /* Initial count prevents reaching zero until all CBs are posted. */
atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
idx = srcu_read_lock(ssp); idx = __srcu_read_lock_nmisafe(ssp);
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
else else
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
srcu_read_unlock(ssp, idx); __srcu_read_unlock_nmisafe(ssp, idx);
/* Remove the initial count, at which point reaching zero can happen. */ /* Remove the initial count, at which point reaching zero can happen. */
if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
...@@ -1687,8 +1755,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) ...@@ -1687,8 +1755,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
struct srcu_data *sdp; struct srcu_data *sdp;
sdp = per_cpu_ptr(ssp->sda, cpu); sdp = per_cpu_ptr(ssp->sda, cpu);
u0 = data_race(sdp->srcu_unlock_count[!idx]); u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx]));
u1 = data_race(sdp->srcu_unlock_count[idx]); u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx]));
/* /*
* Make sure that a lock is always counted if the corresponding * Make sure that a lock is always counted if the corresponding
...@@ -1696,8 +1764,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) ...@@ -1696,8 +1764,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
*/ */
smp_rmb(); smp_rmb();
l0 = data_race(sdp->srcu_lock_count[!idx]); l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx]));
l1 = data_race(sdp->srcu_lock_count[idx]); l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx]));
c0 = l0 - u0; c0 = l0 - u0;
c1 = l1 - u1; c1 = l1 - u1;
......
...@@ -44,7 +44,7 @@ static void rcu_sync_func(struct rcu_head *rhp); ...@@ -44,7 +44,7 @@ static void rcu_sync_func(struct rcu_head *rhp);
static void rcu_sync_call(struct rcu_sync *rsp) static void rcu_sync_call(struct rcu_sync *rsp)
{ {
call_rcu(&rsp->cb_head, rcu_sync_func); call_rcu_hurry(&rsp->cb_head, rcu_sync_func);
} }
/** /**
......
...@@ -728,7 +728,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) ...@@ -728,7 +728,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) { if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) {
lastinfo = j; lastinfo = j;
rtsi = rtsi * rcu_task_stall_info_mult; rtsi = rtsi * rcu_task_stall_info_mult;
pr_info("%s: %s grace period %lu is %lu jiffies old.\n", pr_info("%s: %s grace period number %lu (since boot) is %lu jiffies old.\n",
__func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start); __func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start);
} }
} }
......
...@@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = { ...@@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
void rcu_barrier(void) void rcu_barrier(void)
{ {
wait_rcu_gp(call_rcu); wait_rcu_gp(call_rcu_hurry);
} }
EXPORT_SYMBOL(rcu_barrier); EXPORT_SYMBOL(rcu_barrier);
......
...@@ -301,12 +301,6 @@ static bool rcu_dynticks_in_eqs(int snap) ...@@ -301,12 +301,6 @@ static bool rcu_dynticks_in_eqs(int snap)
return !(snap & RCU_DYNTICKS_IDX); return !(snap & RCU_DYNTICKS_IDX);
} }
/* Return true if the specified CPU is currently idle from an RCU viewpoint. */
bool rcu_is_idle_cpu(int cpu)
{
return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu));
}
/* /*
* Return true if the CPU corresponding to the specified rcu_data * Return true if the CPU corresponding to the specified rcu_data
* structure has spent some time in an extended quiescent state since * structure has spent some time in an extended quiescent state since
...@@ -2106,7 +2100,7 @@ int rcutree_dying_cpu(unsigned int cpu) ...@@ -2106,7 +2100,7 @@ int rcutree_dying_cpu(unsigned int cpu)
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
return 0; return 0;
blkd = !!(rnp->qsmask & rdp->grpmask); blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
blkd ? TPS("cpuofl-bgp") : TPS("cpuofl")); blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
return 0; return 0;
...@@ -2416,7 +2410,7 @@ void rcu_force_quiescent_state(void) ...@@ -2416,7 +2410,7 @@ void rcu_force_quiescent_state(void)
struct rcu_node *rnp_old = NULL; struct rcu_node *rnp_old = NULL;
/* Funnel through hierarchy to reduce memory contention. */ /* Funnel through hierarchy to reduce memory contention. */
rnp = __this_cpu_read(rcu_data.mynode); rnp = raw_cpu_read(rcu_data.mynode);
for (; rnp != NULL; rnp = rnp->parent) { for (; rnp != NULL; rnp = rnp->parent) {
ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) || ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
!raw_spin_trylock(&rnp->fqslock); !raw_spin_trylock(&rnp->fqslock);
...@@ -2728,47 +2722,8 @@ static void check_cb_ovld(struct rcu_data *rdp) ...@@ -2728,47 +2722,8 @@ static void check_cb_ovld(struct rcu_data *rdp)
raw_spin_unlock_rcu_node(rnp); raw_spin_unlock_rcu_node(rnp);
} }
/** static void
* call_rcu() - Queue an RCU callback for invocation after a grace period. __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
* @head: structure to be used for queueing the RCU updates.
* @func: actual callback function to be invoked after the grace period
*
* The callback function will be invoked some time after a full grace
* period elapses, in other words after all pre-existing RCU read-side
* critical sections have completed. However, the callback function
* might well execute concurrently with RCU read-side critical sections
* that started after call_rcu() was invoked.
*
* RCU read-side critical sections are delimited by rcu_read_lock()
* and rcu_read_unlock(), and may be nested. In addition, but only in
* v5.0 and later, regions of code across which interrupts, preemption,
* or softirqs have been disabled also serve as RCU read-side critical
* sections. This includes hardware interrupt handlers, softirq handlers,
* and NMI handlers.
*
* Note that all CPUs must agree that the grace period extended beyond
* all pre-existing RCU read-side critical section. On systems with more
* than one CPU, this means that when "func()" is invoked, each CPU is
* guaranteed to have executed a full memory barrier since the end of its
* last RCU read-side critical section whose beginning preceded the call
* to call_rcu(). It also means that each CPU executing an RCU read-side
* critical section that continues beyond the start of "func()" must have
* executed a memory barrier after the call_rcu() but before the beginning
* of that RCU read-side critical section. Note that these guarantees
* include CPUs that are offline, idle, or executing in user mode, as
* well as CPUs that are executing in the kernel.
*
* Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
* resulting RCU callback function "func()", then both CPU A and CPU B are
* guaranteed to execute a full memory barrier during the time interval
* between the call to call_rcu() and the invocation of "func()" -- even
* if CPU A and CPU B are the same CPU (but again only if the system has
* more than one CPU).
*
* Implementation of these memory-ordering guarantees is described here:
* Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{ {
static atomic_t doublefrees; static atomic_t doublefrees;
unsigned long flags; unsigned long flags;
...@@ -2809,7 +2764,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) ...@@ -2809,7 +2764,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
} }
check_cb_ovld(rdp); check_cb_ovld(rdp);
if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
return; // Enqueued onto ->nocb_bypass, so just leave. return; // Enqueued onto ->nocb_bypass, so just leave.
// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
rcu_segcblist_enqueue(&rdp->cblist, head); rcu_segcblist_enqueue(&rdp->cblist, head);
...@@ -2831,8 +2786,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) ...@@ -2831,8 +2786,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
local_irq_restore(flags); local_irq_restore(flags);
} }
} }
EXPORT_SYMBOL_GPL(call_rcu);
#ifdef CONFIG_RCU_LAZY
/**
* call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
* flush all lazy callbacks (including the new one) to the main ->cblist while
* doing so.
*
* @head: structure to be used for queueing the RCU updates.
* @func: actual callback function to be invoked after the grace period
*
* The callback function will be invoked some time after a full grace
* period elapses, in other words after all pre-existing RCU read-side
* critical sections have completed.
*
* Use this API instead of call_rcu() if you don't want the callback to be
* invoked after very long periods of time, which can happen on systems without
* memory pressure and on systems which are lightly loaded or mostly idle.
* This function will cause callbacks to be invoked sooner than later at the
* expense of extra power. Other than that, this function is identical to, and
* reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory
* ordering and other functionality.
*/
void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
{
return __call_rcu_common(head, func, false);
}
EXPORT_SYMBOL_GPL(call_rcu_hurry);
#endif
/**
* call_rcu() - Queue an RCU callback for invocation after a grace period.
* By default the callbacks are 'lazy' and are kept hidden from the main
* ->cblist to prevent starting of grace periods too soon.
* If you desire grace periods to start very soon, use call_rcu_hurry().
*
* @head: structure to be used for queueing the RCU updates.
* @func: actual callback function to be invoked after the grace period
*
* The callback function will be invoked some time after a full grace
* period elapses, in other words after all pre-existing RCU read-side
* critical sections have completed. However, the callback function
* might well execute concurrently with RCU read-side critical sections
* that started after call_rcu() was invoked.
*
* RCU read-side critical sections are delimited by rcu_read_lock()
* and rcu_read_unlock(), and may be nested. In addition, but only in
* v5.0 and later, regions of code across which interrupts, preemption,
* or softirqs have been disabled also serve as RCU read-side critical
* sections. This includes hardware interrupt handlers, softirq handlers,
* and NMI handlers.
*
* Note that all CPUs must agree that the grace period extended beyond
* all pre-existing RCU read-side critical section. On systems with more
* than one CPU, this means that when "func()" is invoked, each CPU is
* guaranteed to have executed a full memory barrier since the end of its
* last RCU read-side critical section whose beginning preceded the call
* to call_rcu(). It also means that each CPU executing an RCU read-side
* critical section that continues beyond the start of "func()" must have
* executed a memory barrier after the call_rcu() but before the beginning
* of that RCU read-side critical section. Note that these guarantees
* include CPUs that are offline, idle, or executing in user mode, as
* well as CPUs that are executing in the kernel.
*
* Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
* resulting RCU callback function "func()", then both CPU A and CPU B are
* guaranteed to execute a full memory barrier during the time interval
* between the call to call_rcu() and the invocation of "func()" -- even
* if CPU A and CPU B are the same CPU (but again only if the system has
* more than one CPU).
*
* Implementation of these memory-ordering guarantees is described here:
* Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
}
EXPORT_SYMBOL_GPL(call_rcu);
/* Maximum number of jiffies to wait before draining a batch. */ /* Maximum number of jiffies to wait before draining a batch. */
#define KFREE_DRAIN_JIFFIES (5 * HZ) #define KFREE_DRAIN_JIFFIES (5 * HZ)
...@@ -3507,7 +3538,7 @@ void synchronize_rcu(void) ...@@ -3507,7 +3538,7 @@ void synchronize_rcu(void)
if (rcu_gp_is_expedited()) if (rcu_gp_is_expedited())
synchronize_rcu_expedited(); synchronize_rcu_expedited();
else else
wait_rcu_gp(call_rcu); wait_rcu_gp(call_rcu_hurry);
return; return;
} }
...@@ -3894,6 +3925,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) ...@@ -3894,6 +3925,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
{ {
unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence); unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap); unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
bool wake_nocb = false;
bool was_alldone = false;
lockdep_assert_held(&rcu_state.barrier_lock); lockdep_assert_held(&rcu_state.barrier_lock);
if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq)) if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
...@@ -3902,7 +3935,14 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) ...@@ -3902,7 +3935,14 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
rdp->barrier_head.func = rcu_barrier_callback; rdp->barrier_head.func = rcu_barrier_callback;
debug_rcu_head_queue(&rdp->barrier_head); debug_rcu_head_queue(&rdp->barrier_head);
rcu_nocb_lock(rdp); rcu_nocb_lock(rdp);
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); /*
* Flush bypass and wakeup rcuog if we add callbacks to an empty regular
* queue. This way we don't wait for bypass timer that can reach seconds
* if it's fully lazy.
*/
was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
atomic_inc(&rcu_state.barrier_cpu_count); atomic_inc(&rcu_state.barrier_cpu_count);
} else { } else {
...@@ -3910,6 +3950,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) ...@@ -3910,6 +3950,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence); rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
} }
rcu_nocb_unlock(rdp); rcu_nocb_unlock(rdp);
if (wake_nocb)
wake_nocb_gp(rdp, false);
smp_store_release(&rdp->barrier_seq_snap, gseq); smp_store_release(&rdp->barrier_seq_snap, gseq);
} }
...@@ -4276,8 +4318,6 @@ void rcu_report_dead(unsigned int cpu) ...@@ -4276,8 +4318,6 @@ void rcu_report_dead(unsigned int cpu)
// Do any dangling deferred wakeups. // Do any dangling deferred wakeups.
do_nocb_deferred_wakeup(rdp); do_nocb_deferred_wakeup(rdp);
/* QS for any half-done expedited grace period. */
rcu_report_exp_rdp(rdp);
rcu_preempt_deferred_qs(current); rcu_preempt_deferred_qs(current);
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */ /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
...@@ -4325,7 +4365,7 @@ void rcutree_migrate_callbacks(int cpu) ...@@ -4325,7 +4365,7 @@ void rcutree_migrate_callbacks(int cpu)
my_rdp = this_cpu_ptr(&rcu_data); my_rdp = this_cpu_ptr(&rcu_data);
my_rnp = my_rdp->mynode; my_rnp = my_rdp->mynode;
rcu_nocb_lock(my_rdp); /* irqs already disabled. */ rcu_nocb_lock(my_rdp); /* irqs already disabled. */
WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies)); WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));
raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */ raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
/* Leverage recent GPs and set GP for new callbacks. */ /* Leverage recent GPs and set GP for new callbacks. */
needwake = rcu_advance_cbs(my_rnp, rdp) || needwake = rcu_advance_cbs(my_rnp, rdp) ||
......
...@@ -263,14 +263,16 @@ struct rcu_data { ...@@ -263,14 +263,16 @@ struct rcu_data {
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
long lazy_len; /* Length of buffered lazy callbacks. */
int cpu; int cpu;
}; };
/* Values for nocb_defer_wakeup field in struct rcu_data. */ /* Values for nocb_defer_wakeup field in struct rcu_data. */
#define RCU_NOCB_WAKE_NOT 0 #define RCU_NOCB_WAKE_NOT 0
#define RCU_NOCB_WAKE_BYPASS 1 #define RCU_NOCB_WAKE_BYPASS 1
#define RCU_NOCB_WAKE 2 #define RCU_NOCB_WAKE_LAZY 2
#define RCU_NOCB_WAKE_FORCE 3 #define RCU_NOCB_WAKE 3
#define RCU_NOCB_WAKE_FORCE 4
#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
/* For jiffies_till_first_fqs and */ /* For jiffies_till_first_fqs and */
...@@ -439,10 +441,12 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp); ...@@ -439,10 +441,12 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
static void rcu_init_one_nocb(struct rcu_node *rnp); static void rcu_init_one_nocb(struct rcu_node *rnp);
static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j); unsigned long j, bool lazy);
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
bool *was_alldone, unsigned long flags); bool *was_alldone, unsigned long flags,
bool lazy);
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
unsigned long flags); unsigned long flags);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level); static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
......
...@@ -937,7 +937,7 @@ void synchronize_rcu_expedited(void) ...@@ -937,7 +937,7 @@ void synchronize_rcu_expedited(void)
/* If expedited grace periods are prohibited, fall back to normal. */ /* If expedited grace periods are prohibited, fall back to normal. */
if (rcu_gp_is_normal()) { if (rcu_gp_is_normal()) {
wait_rcu_gp(call_rcu); wait_rcu_gp(call_rcu_hurry);
return; return;
} }
......
...@@ -256,6 +256,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) ...@@ -256,6 +256,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
return __wake_nocb_gp(rdp_gp, rdp, force, flags); return __wake_nocb_gp(rdp_gp, rdp, force, flags);
} }
/*
* LAZY_FLUSH_JIFFIES decides the maximum amount of time that
* can elapse before lazy callbacks are flushed. Lazy callbacks
* could be flushed much earlier for a number of other reasons
* however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are
* left unsubmitted to RCU after those many jiffies.
*/
#define LAZY_FLUSH_JIFFIES (10 * HZ)
static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES;
#ifdef CONFIG_RCU_LAZY
// To be called only from test code.
void rcu_lazy_set_jiffies_till_flush(unsigned long jif)
{
jiffies_till_flush = jif;
}
EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush);
unsigned long rcu_lazy_get_jiffies_till_flush(void)
{
return jiffies_till_flush;
}
EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush);
#endif
/* /*
* Arrange to wake the GP kthread for this NOCB group at some future * Arrange to wake the GP kthread for this NOCB group at some future
* time when it is safe to do so. * time when it is safe to do so.
...@@ -269,10 +294,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, ...@@ -269,10 +294,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
/* /*
* Bypass wakeup overrides previous deferments. In case * Bypass wakeup overrides previous deferments. In case of
* of callback storm, no need to wake up too early. * callback storms, no need to wake up too early.
*/ */
if (waketype == RCU_NOCB_WAKE_BYPASS) { if (waketype == RCU_NOCB_WAKE_LAZY &&
rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush);
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
} else if (waketype == RCU_NOCB_WAKE_BYPASS) {
mod_timer(&rdp_gp->nocb_timer, jiffies + 2); mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
} else { } else {
...@@ -293,12 +322,16 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, ...@@ -293,12 +322,16 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
* proves to be initially empty, just return false because the no-CB GP * proves to be initially empty, just return false because the no-CB GP
* kthread may need to be awakened in this case. * kthread may need to be awakened in this case.
* *
* Return true if there was something to be flushed and it succeeded, otherwise
* false.
*
* Note that this function always returns true if rhp is NULL. * Note that this function always returns true if rhp is NULL.
*/ */
static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in,
unsigned long j) unsigned long j, bool lazy)
{ {
struct rcu_cblist rcl; struct rcu_cblist rcl;
struct rcu_head *rhp = rhp_in;
WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)); WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
rcu_lockdep_assert_cblist_protected(rdp); rcu_lockdep_assert_cblist_protected(rdp);
...@@ -310,7 +343,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ...@@ -310,7 +343,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
/* Note: ->cblist.len already accounts for ->nocb_bypass contents. */ /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
if (rhp) if (rhp)
rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
/*
* If the new CB requested was a lazy one, queue it onto the main
* ->cblist so that we can take advantage of the grace-period that will
* happen regardless. But queue it onto the bypass list first so that
* the lazy CB is ordered with the existing CBs in the bypass list.
*/
if (lazy && rhp) {
rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
rhp = NULL;
}
rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
WRITE_ONCE(rdp->lazy_len, 0);
rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl); rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
WRITE_ONCE(rdp->nocb_bypass_first, j); WRITE_ONCE(rdp->nocb_bypass_first, j);
rcu_nocb_bypass_unlock(rdp); rcu_nocb_bypass_unlock(rdp);
...@@ -326,13 +372,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ...@@ -326,13 +372,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
* Note that this function always returns true if rhp is NULL. * Note that this function always returns true if rhp is NULL.
*/ */
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j) unsigned long j, bool lazy)
{ {
if (!rcu_rdp_is_offloaded(rdp)) if (!rcu_rdp_is_offloaded(rdp))
return true; return true;
rcu_lockdep_assert_cblist_protected(rdp); rcu_lockdep_assert_cblist_protected(rdp);
rcu_nocb_bypass_lock(rdp); rcu_nocb_bypass_lock(rdp);
return rcu_nocb_do_flush_bypass(rdp, rhp, j); return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy);
} }
/* /*
...@@ -345,7 +391,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) ...@@ -345,7 +391,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
if (!rcu_rdp_is_offloaded(rdp) || if (!rcu_rdp_is_offloaded(rdp) ||
!rcu_nocb_bypass_trylock(rdp)) !rcu_nocb_bypass_trylock(rdp))
return; return;
WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j)); WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false));
} }
/* /*
...@@ -367,12 +413,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) ...@@ -367,12 +413,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
* there is only one CPU in operation. * there is only one CPU in operation.
*/ */
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
bool *was_alldone, unsigned long flags) bool *was_alldone, unsigned long flags,
bool lazy)
{ {
unsigned long c; unsigned long c;
unsigned long cur_gp_seq; unsigned long cur_gp_seq;
unsigned long j = jiffies; unsigned long j = jiffies;
long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len));
lockdep_assert_irqs_disabled(); lockdep_assert_irqs_disabled();
...@@ -417,24 +465,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ...@@ -417,24 +465,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// If there hasn't yet been all that many ->cblist enqueues // If there hasn't yet been all that many ->cblist enqueues
// this jiffy, tell the caller to enqueue onto ->cblist. But flush // this jiffy, tell the caller to enqueue onto ->cblist. But flush
// ->nocb_bypass first. // ->nocb_bypass first.
if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) { // Lazy CBs throttle this back and do immediate bypass queuing.
if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) {
rcu_nocb_lock(rdp); rcu_nocb_lock(rdp);
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
if (*was_alldone) if (*was_alldone)
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("FirstQ")); TPS("FirstQ"));
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false));
WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
return false; // Caller must enqueue the callback. return false; // Caller must enqueue the callback.
} }
// If ->nocb_bypass has been used too long or is too full, // If ->nocb_bypass has been used too long or is too full,
// flush ->nocb_bypass to ->cblist. // flush ->nocb_bypass to ->cblist.
if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) || if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
(ncbs && bypass_is_lazy &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) ||
ncbs >= qhimark) { ncbs >= qhimark) {
rcu_nocb_lock(rdp); rcu_nocb_lock(rdp);
if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) {
if (*was_alldone) if (*was_alldone)
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("FirstQ")); TPS("FirstQ"));
...@@ -447,7 +500,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ...@@ -447,7 +500,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
rcu_advance_cbs_nowake(rdp->mynode, rdp); rcu_advance_cbs_nowake(rdp->mynode, rdp);
rdp->nocb_gp_adv_time = j; rdp->nocb_gp_adv_time = j;
} }
rcu_nocb_unlock_irqrestore(rdp, flags);
// The flush succeeded and we moved CBs into the regular list.
// Don't wait for the wake up timer as it may be too far ahead.
// Wake up the GP thread now instead, if the cblist was empty.
__call_rcu_nocb_wake(rdp, *was_alldone, flags);
return true; // Callback already enqueued. return true; // Callback already enqueued.
} }
...@@ -457,13 +515,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ...@@ -457,13 +515,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
rcu_cblist_enqueue(&rdp->nocb_bypass, rhp); rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
if (lazy)
WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1);
if (!ncbs) { if (!ncbs) {
WRITE_ONCE(rdp->nocb_bypass_first, j); WRITE_ONCE(rdp->nocb_bypass_first, j);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ")); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
} }
rcu_nocb_bypass_unlock(rdp); rcu_nocb_bypass_unlock(rdp);
smp_mb(); /* Order enqueue before wake. */ smp_mb(); /* Order enqueue before wake. */
if (ncbs) { // A wake up of the grace period kthread or timer adjustment
// needs to be done only if:
// 1. Bypass list was fully empty before (this is the first
// bypass list entry), or:
// 2. Both of these conditions are met:
// a. The bypass list previously had only lazy CBs, and:
// b. The new CB is non-lazy.
if (ncbs && (!bypass_is_lazy || lazy)) {
local_irq_restore(flags); local_irq_restore(flags);
} else { } else {
// No-CBs GP kthread might be indefinitely asleep, if so, wake. // No-CBs GP kthread might be indefinitely asleep, if so, wake.
...@@ -491,8 +560,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, ...@@ -491,8 +560,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
unsigned long flags) unsigned long flags)
__releases(rdp->nocb_lock) __releases(rdp->nocb_lock)
{ {
long bypass_len;
unsigned long cur_gp_seq; unsigned long cur_gp_seq;
unsigned long j; unsigned long j;
long lazy_len;
long len; long len;
struct task_struct *t; struct task_struct *t;
...@@ -506,9 +577,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, ...@@ -506,9 +577,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
} }
// Need to actually to a wakeup. // Need to actually to a wakeup.
len = rcu_segcblist_n_cbs(&rdp->cblist); len = rcu_segcblist_n_cbs(&rdp->cblist);
bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass);
lazy_len = READ_ONCE(rdp->lazy_len);
if (was_alldone) { if (was_alldone) {
rdp->qlen_last_fqs_check = len; rdp->qlen_last_fqs_check = len;
if (!irqs_disabled_flags(flags)) { // Only lazy CBs in bypass list
if (lazy_len && bypass_len == lazy_len) {
rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
} else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */ /* ... if queue was empty ... */
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp(rdp, false); wake_nocb_gp(rdp, false);
...@@ -599,12 +677,12 @@ static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu) ...@@ -599,12 +677,12 @@ static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu)
static void nocb_gp_wait(struct rcu_data *my_rdp) static void nocb_gp_wait(struct rcu_data *my_rdp)
{ {
bool bypass = false; bool bypass = false;
long bypass_ncbs;
int __maybe_unused cpu = my_rdp->cpu; int __maybe_unused cpu = my_rdp->cpu;
unsigned long cur_gp_seq; unsigned long cur_gp_seq;
unsigned long flags; unsigned long flags;
bool gotcbs = false; bool gotcbs = false;
unsigned long j = jiffies; unsigned long j = jiffies;
bool lazy = false;
bool needwait_gp = false; // This prevents actual uninitialized use. bool needwait_gp = false; // This prevents actual uninitialized use.
bool needwake; bool needwake;
bool needwake_gp; bool needwake_gp;
...@@ -634,23 +712,42 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) ...@@ -634,23 +712,42 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
* won't be ignored for long. * won't be ignored for long.
*/ */
list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) { list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) {
long bypass_ncbs;
bool flush_bypass = false;
long lazy_ncbs;
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check")); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
rcu_nocb_lock_irqsave(rdp, flags); rcu_nocb_lock_irqsave(rdp, flags);
lockdep_assert_held(&rdp->nocb_lock); lockdep_assert_held(&rdp->nocb_lock);
bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
if (bypass_ncbs && lazy_ncbs = READ_ONCE(rdp->lazy_len);
if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) ||
bypass_ncbs > 2 * qhimark)) {
flush_bypass = true;
} else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
bypass_ncbs > 2 * qhimark)) { bypass_ncbs > 2 * qhimark)) {
// Bypass full or old, so flush it. flush_bypass = true;
(void)rcu_nocb_try_flush_bypass(rdp, j);
bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
} else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
rcu_nocb_unlock_irqrestore(rdp, flags); rcu_nocb_unlock_irqrestore(rdp, flags);
continue; /* No callbacks here, try next. */ continue; /* No callbacks here, try next. */
} }
if (flush_bypass) {
// Bypass full or old, so flush it.
(void)rcu_nocb_try_flush_bypass(rdp, j);
bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
lazy_ncbs = READ_ONCE(rdp->lazy_len);
}
if (bypass_ncbs) { if (bypass_ncbs) {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("Bypass")); bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass"));
if (bypass_ncbs == lazy_ncbs)
lazy = true;
else
bypass = true; bypass = true;
} }
rnp = rdp->mynode; rnp = rdp->mynode;
...@@ -699,12 +796,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) ...@@ -699,12 +796,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
my_rdp->nocb_gp_gp = needwait_gp; my_rdp->nocb_gp_gp = needwait_gp;
my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0; my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
if (bypass && !rcu_nocb_poll) {
// At least one child with non-empty ->nocb_bypass, so set // At least one child with non-empty ->nocb_bypass, so set
// timer in order to avoid stranding its callbacks. // timer in order to avoid stranding its callbacks.
if (!rcu_nocb_poll) {
// If bypass list only has lazy CBs. Add a deferred lazy wake up.
if (lazy && !bypass) {
wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazyIsDeferred"));
// Otherwise add a deferred bypass wake up.
} else if (bypass) {
wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
TPS("WakeBypassIsDeferred")); TPS("WakeBypassIsDeferred"));
} }
}
if (rcu_nocb_poll) { if (rcu_nocb_poll) {
/* Polling, so trace if first poll in the series. */ /* Polling, so trace if first poll in the series. */
if (gotcbs) if (gotcbs)
...@@ -1030,7 +1135,7 @@ static long rcu_nocb_rdp_deoffload(void *arg) ...@@ -1030,7 +1135,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
* return false, which means that future calls to rcu_nocb_try_bypass() * return false, which means that future calls to rcu_nocb_try_bypass()
* will refuse to put anything into the bypass. * will refuse to put anything into the bypass.
*/ */
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
/* /*
* Start with invoking rcu_core() early. This way if the current thread * Start with invoking rcu_core() early. This way if the current thread
* happens to preempt an ongoing call to rcu_core() in the middle, * happens to preempt an ongoing call to rcu_core() in the middle,
...@@ -1207,47 +1312,87 @@ int rcu_nocb_cpu_offload(int cpu) ...@@ -1207,47 +1312,87 @@ int rcu_nocb_cpu_offload(int cpu)
} }
EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
void __init rcu_init_nohz(void) static unsigned long
lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{ {
int cpu; int cpu;
bool need_rcu_nocb_mask = false; unsigned long count = 0;
bool offload_all = false;
struct rcu_data *rdp;
#if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) /* Snapshot count of all CPUs */
if (!rcu_state.nocb_is_setup) { for_each_possible_cpu(cpu) {
need_rcu_nocb_mask = true; struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
offload_all = true;
count += READ_ONCE(rdp->lazy_len);
} }
#endif /* #if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) */
#if defined(CONFIG_NO_HZ_FULL) return count ? count : SHRINK_EMPTY;
if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) { }
need_rcu_nocb_mask = true;
offload_all = false; /* NO_HZ_FULL has its own mask. */ static unsigned long
lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
int cpu;
unsigned long flags;
unsigned long count = 0;
/* Snapshot count of all CPUs */
for_each_possible_cpu(cpu) {
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
int _count = READ_ONCE(rdp->lazy_len);
if (_count == 0)
continue;
rcu_nocb_lock_irqsave(rdp, flags);
WRITE_ONCE(rdp->lazy_len, 0);
rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp(rdp, false);
sc->nr_to_scan -= _count;
count += _count;
if (sc->nr_to_scan <= 0)
break;
} }
#endif /* #if defined(CONFIG_NO_HZ_FULL) */ return count ? count : SHRINK_STOP;
}
static struct shrinker lazy_rcu_shrinker = {
.count_objects = lazy_rcu_shrink_count,
.scan_objects = lazy_rcu_shrink_scan,
.batch = 0,
.seeks = DEFAULT_SEEKS,
};
void __init rcu_init_nohz(void)
{
int cpu;
struct rcu_data *rdp;
const struct cpumask *cpumask = NULL;
#if defined(CONFIG_NO_HZ_FULL)
if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask))
cpumask = tick_nohz_full_mask;
#endif
if (need_rcu_nocb_mask) { if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) &&
!rcu_state.nocb_is_setup && !cpumask)
cpumask = cpu_possible_mask;
if (cpumask) {
if (!cpumask_available(rcu_nocb_mask)) { if (!cpumask_available(rcu_nocb_mask)) {
if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) { if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n"); pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
return; return;
} }
} }
cpumask_or(rcu_nocb_mask, rcu_nocb_mask, cpumask);
rcu_state.nocb_is_setup = true; rcu_state.nocb_is_setup = true;
} }
if (!rcu_state.nocb_is_setup) if (!rcu_state.nocb_is_setup)
return; return;
#if defined(CONFIG_NO_HZ_FULL) if (register_shrinker(&lazy_rcu_shrinker, "rcu-lazy"))
if (tick_nohz_full_running) pr_err("Failed to register lazy_rcu shrinker!\n");
cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
#endif /* #if defined(CONFIG_NO_HZ_FULL) */
if (offload_all)
cpumask_setall(rcu_nocb_mask);
if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n"); pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
...@@ -1284,6 +1429,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) ...@@ -1284,6 +1429,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
raw_spin_lock_init(&rdp->nocb_gp_lock); raw_spin_lock_init(&rdp->nocb_gp_lock);
timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
rcu_cblist_init(&rdp->nocb_bypass); rcu_cblist_init(&rdp->nocb_bypass);
WRITE_ONCE(rdp->lazy_len, 0);
mutex_init(&rdp->nocb_gp_kthread_mutex); mutex_init(&rdp->nocb_gp_kthread_mutex);
} }
...@@ -1564,14 +1710,19 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) ...@@ -1564,14 +1710,19 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
{ {
} }
static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
{
return false;
}
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j) unsigned long j, bool lazy)
{ {
return true; return true;
} }
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
bool *was_alldone, unsigned long flags) bool *was_alldone, unsigned long flags, bool lazy)
{ {
return false; return false;
} }
......
...@@ -1221,11 +1221,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) ...@@ -1221,11 +1221,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
* We don't include outgoingcpu in the affinity set, use -1 if there is * We don't include outgoingcpu in the affinity set, use -1 if there is
* no outgoing CPU. If there are no CPUs left in the affinity set, * no outgoing CPU. If there are no CPUs left in the affinity set,
* this function allows the kthread to execute on any CPU. * this function allows the kthread to execute on any CPU.
*
* Any future concurrent calls are serialized via ->boost_kthread_mutex.
*/ */
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
{ {
struct task_struct *t = rnp->boost_kthread_task; struct task_struct *t = rnp->boost_kthread_task;
unsigned long mask = rcu_rnp_online_cpus(rnp); unsigned long mask;
cpumask_var_t cm; cpumask_var_t cm;
int cpu; int cpu;
...@@ -1234,6 +1236,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) ...@@ -1234,6 +1236,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
return; return;
mutex_lock(&rnp->boost_kthread_mutex); mutex_lock(&rnp->boost_kthread_mutex);
mask = rcu_rnp_online_cpus(rnp);
for_each_leaf_node_possible_cpu(rnp, cpu) for_each_leaf_node_possible_cpu(rnp, cpu)
if ((mask & leaf_node_cpu_bit(rnp, cpu)) && if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
cpu != outgoingcpu) cpu != outgoingcpu)
......
...@@ -1771,7 +1771,7 @@ bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork) ...@@ -1771,7 +1771,7 @@ bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
rwork->wq = wq; rwork->wq = wq;
call_rcu(&rwork->rcu, rcu_work_rcufn); call_rcu_hurry(&rwork->rcu, rcu_work_rcufn);
return true; return true;
} }
......
...@@ -230,7 +230,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, ...@@ -230,7 +230,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
percpu_ref_noop_confirm_switch; percpu_ref_noop_confirm_switch;
percpu_ref_get(ref); /* put after confirmation */ percpu_ref_get(ref); /* put after confirmation */
call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu); call_rcu_hurry(&ref->data->rcu,
percpu_ref_switch_to_atomic_rcu);
} }
static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
......
...@@ -174,7 +174,7 @@ void dst_release(struct dst_entry *dst) ...@@ -174,7 +174,7 @@ void dst_release(struct dst_entry *dst)
net_warn_ratelimited("%s: dst:%p refcnt:%d\n", net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt); __func__, dst, newrefcnt);
if (!newrefcnt) if (!newrefcnt)
call_rcu(&dst->rcu_head, dst_destroy_rcu); call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
} }
} }
EXPORT_SYMBOL(dst_release); EXPORT_SYMBOL(dst_release);
......
...@@ -234,13 +234,20 @@ static void inet_free_ifa(struct in_ifaddr *ifa) ...@@ -234,13 +234,20 @@ static void inet_free_ifa(struct in_ifaddr *ifa)
call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
} }
static void in_dev_free_rcu(struct rcu_head *head)
{
struct in_device *idev = container_of(head, struct in_device, rcu_head);
kfree(rcu_dereference_protected(idev->mc_hash, 1));
kfree(idev);
}
void in_dev_finish_destroy(struct in_device *idev) void in_dev_finish_destroy(struct in_device *idev)
{ {
struct net_device *dev = idev->dev; struct net_device *dev = idev->dev;
WARN_ON(idev->ifa_list); WARN_ON(idev->ifa_list);
WARN_ON(idev->mc_list); WARN_ON(idev->mc_list);
kfree(rcu_dereference_protected(idev->mc_hash, 1));
#ifdef NET_REFCNT_DEBUG #ifdef NET_REFCNT_DEBUG
pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
#endif #endif
...@@ -248,7 +255,7 @@ void in_dev_finish_destroy(struct in_device *idev) ...@@ -248,7 +255,7 @@ void in_dev_finish_destroy(struct in_device *idev)
if (!idev->dead) if (!idev->dead)
pr_err("Freeing alive in_device %p\n", idev); pr_err("Freeing alive in_device %p\n", idev);
else else
kfree(idev); call_rcu(&idev->rcu_head, in_dev_free_rcu);
} }
EXPORT_SYMBOL(in_dev_finish_destroy); EXPORT_SYMBOL(in_dev_finish_destroy);
...@@ -298,12 +305,6 @@ static struct in_device *inetdev_init(struct net_device *dev) ...@@ -298,12 +305,6 @@ static struct in_device *inetdev_init(struct net_device *dev)
goto out; goto out;
} }
static void in_dev_rcu_put(struct rcu_head *head)
{
struct in_device *idev = container_of(head, struct in_device, rcu_head);
in_dev_put(idev);
}
static void inetdev_destroy(struct in_device *in_dev) static void inetdev_destroy(struct in_device *in_dev)
{ {
struct net_device *dev; struct net_device *dev;
...@@ -328,7 +329,7 @@ static void inetdev_destroy(struct in_device *in_dev) ...@@ -328,7 +329,7 @@ static void inetdev_destroy(struct in_device *in_dev)
neigh_parms_release(&arp_tbl, in_dev->arp_parms); neigh_parms_release(&arp_tbl, in_dev->arp_parms);
arp_ifdown(dev); arp_ifdown(dev);
call_rcu(&in_dev->rcu_head, in_dev_rcu_put); in_dev_put(in_dev);
} }
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
......
...@@ -30,9 +30,8 @@ else ...@@ -30,9 +30,8 @@ else
fi fi
scenarios="`echo $scenariosarg | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`" scenarios="`echo $scenariosarg | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`"
T=/tmp/config2latex.sh.$$ T=`mktemp -d /tmp/config2latex.sh.XXXXXX`
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
cat << '---EOF---' >> $T/p.awk cat << '---EOF---' >> $T/p.awk
END { END {
......
...@@ -29,9 +29,8 @@ else ...@@ -29,9 +29,8 @@ else
exit 1 exit 1
fi fi
T=${TMPDIR-/tmp}/config_override.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/config_override.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' | sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' |
awk ' awk '
......
...@@ -7,9 +7,8 @@ ...@@ -7,9 +7,8 @@
# #
# Authors: Paul E. McKenney <paulmck@linux.ibm.com> # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
T=${TMPDIR-/tmp}/abat-chk-config.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
cat $1 > $T/.config cat $1 > $T/.config
......
...@@ -15,9 +15,8 @@ ...@@ -15,9 +15,8 @@
# #
# Authors: Paul E. McKenney <paulmck@linux.ibm.com> # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
T=${TMPDIR-/tmp}/configinit.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/configinit.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
# Capture config spec file. # Capture config spec file.
......
...@@ -12,9 +12,8 @@ ...@@ -12,9 +12,8 @@
scriptname=$0 scriptname=$0
args="$*" args="$*"
T=${TMPDIR-/tmp}/kvm-again.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/kvm-again.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
if ! test -d tools/testing/selftests/rcutorture/bin if ! test -d tools/testing/selftests/rcutorture/bin
then then
...@@ -51,27 +50,56 @@ RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE ...@@ -51,27 +50,56 @@ RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
PATH=${RCUTORTURE}/bin:$PATH; export PATH PATH=${RCUTORTURE}/bin:$PATH; export PATH
. functions.sh . functions.sh
bootargs=
dryrun= dryrun=
dur= dur=
default_link="cp -R" default_link="cp -R"
rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`" resdir="`pwd`/tools/testing/selftests/rcutorture/res"
rundir="$resdir/`date +%Y.%m.%d-%H.%M.%S-again`"
got_datestamp=
got_rundir=
startdate="`date`" startdate="`date`"
starttime="`get_starttime`" starttime="`get_starttime`"
usage () { usage () {
echo "Usage: $scriptname $oldrun [ arguments ]:" echo "Usage: $scriptname $oldrun [ arguments ]:"
echo " --bootargs kernel-boot-arguments"
echo " --datestamp string"
echo " --dryrun" echo " --dryrun"
echo " --duration minutes | <seconds>s | <hours>h | <days>d" echo " --duration minutes | <seconds>s | <hours>h | <days>d"
echo " --link hard|soft|copy" echo " --link hard|soft|copy"
echo " --remote" echo " --remote"
echo " --rundir /new/res/path" echo " --rundir /new/res/path"
echo "Command line: $scriptname $args"
exit 1 exit 1
} }
while test $# -gt 0 while test $# -gt 0
do do
case "$1" in case "$1" in
--bootargs|--bootarg)
checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
bootargs="$bootargs $2"
shift
;;
--datestamp)
checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--'
if test -n "$got_rundir" || test -n "$got_datestamp"
then
echo Only one of --datestamp or --rundir may be specified
usage
fi
got_datestamp=y
ds=$2
rundir="$resdir/$ds"
if test -e "$rundir"
then
echo "--datestamp $2: Already exists."
usage
fi
shift
;;
--dryrun) --dryrun)
dryrun=1 dryrun=1
;; ;;
...@@ -113,6 +141,12 @@ do ...@@ -113,6 +141,12 @@ do
;; ;;
--rundir) --rundir)
checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error' checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error'
if test -n "$got_rundir" || test -n "$got_datestamp"
then
echo Only one of --datestamp or --rundir may be specified
usage
fi
got_rundir=y
rundir=$2 rundir=$2
if test -e "$rundir" if test -e "$rundir"
then then
...@@ -122,8 +156,11 @@ do ...@@ -122,8 +156,11 @@ do
shift shift
;; ;;
*) *)
if test -n "$1"
then
echo Unknown argument $1 echo Unknown argument $1
usage usage
fi
;; ;;
esac esac
shift shift
...@@ -156,7 +193,7 @@ do ...@@ -156,7 +193,7 @@ do
qemu_cmd_dir="`dirname "$i"`" qemu_cmd_dir="`dirname "$i"`"
kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`" kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`"
jitter_dir="`dirname "$kernel_dir"`" jitter_dir="`dirname "$kernel_dir"`"
kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur "$bootargs" < $T/qemu-cmd > $i
if test -n "$arg_remote" if test -n "$arg_remote"
then then
echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i
......
...@@ -7,9 +7,8 @@ ...@@ -7,9 +7,8 @@
# #
# Usage: kvm-assign-cpus.sh /path/to/sysfs # Usage: kvm-assign-cpus.sh /path/to/sysfs
T=/tmp/kvm-assign-cpus.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/kvm-assign-cpus.sh.XXXXXX`"
trap 'rm -rf $T' 0 2 trap 'rm -rf $T' 0 2
mkdir $T
sysfsdir=${1-/sys/devices/system/node} sysfsdir=${1-/sys/devices/system/node}
if ! cd "$sysfsdir" > $T/msg 2>&1 if ! cd "$sysfsdir" > $T/msg 2>&1
......
...@@ -23,9 +23,8 @@ then ...@@ -23,9 +23,8 @@ then
fi fi
resdir=${2} resdir=${2}
T=${TMPDIR-/tmp}/test-linux.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/kvm-build.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
cp ${config_template} $T/config cp ${config_template} $T/config
cat << ___EOF___ >> $T/config cat << ___EOF___ >> $T/config
......
...@@ -18,9 +18,8 @@ then ...@@ -18,9 +18,8 @@ then
exit 1 exit 1
fi fi
T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/kvm-end-run-stats.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
PATH=${RCUTORTURE}/bin:$PATH; export PATH PATH=${RCUTORTURE}/bin:$PATH; export PATH
......
...@@ -30,7 +30,7 @@ do ...@@ -30,7 +30,7 @@ do
resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'`
head -1 $resdir/log head -1 $resdir/log
fi fi
TORTURE_SUITE="`cat $i/../torture_suite`" TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE
configfile=`echo $i | sed -e 's,^.*/,,'` configfile=`echo $i | sed -e 's,^.*/,,'`
rm -f $i/console.log.*.diags rm -f $i/console.log.*.diags
case "${TORTURE_SUITE}" in case "${TORTURE_SUITE}" in
......
...@@ -34,19 +34,18 @@ fi ...@@ -34,19 +34,18 @@ fi
shift shift
# Pathnames: # Pathnames:
# T: /tmp/kvm-remote.sh.$$ # T: /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp
# resdir: /tmp/kvm-remote.sh.$$/res # resdir: /tmp/kvm-remote.sh.NNNNNN/res
# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix) # rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix)
# oldrun: `pwd`/tools/testing/.../res/$otherds # oldrun: `pwd`/tools/testing/.../res/$otherds
# #
# Pathname segments: # Pathname segments:
# TD: kvm-remote.sh.$$ # TD: kvm-remote.sh.NNNNNN
# ds: yyyy.mm.dd-hh.mm.ss-remote # ds: yyyy.mm.dd-hh.mm.ss-remote
TD=kvm-remote.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`"
T=${TMPDIR-/tmp}/$TD
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T TD="`basename "$T"`"
resdir="$T/res" resdir="$T/res"
ds=`date +%Y.%m.%d-%H.%M.%S`-remote ds=`date +%Y.%m.%d-%H.%M.%S`-remote
......
...@@ -13,9 +13,8 @@ ...@@ -13,9 +13,8 @@
# #
# Authors: Paul E. McKenney <paulmck@kernel.org> # Authors: Paul E. McKenney <paulmck@kernel.org>
T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
echo ---- Running batch $* echo ---- Running batch $*
# Check arguments # Check arguments
......
...@@ -17,9 +17,8 @@ ...@@ -17,9 +17,8 @@
# #
# Authors: Paul E. McKenney <paulmck@kernel.org> # Authors: Paul E. McKenney <paulmck@kernel.org>
T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
resdir="$1" resdir="$1"
if ! test -d "$resdir" if ! test -d "$resdir"
...@@ -109,7 +108,7 @@ do ...@@ -109,7 +108,7 @@ do
if test $kruntime -lt $seconds if test $kruntime -lt $seconds
then then
echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1 grep "^(qemu) qemu:" $resdir/kvm-test-1-run*.sh.out >> $resdir/Warnings 2>&1
killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`" killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
if test -n "$killpid" if test -n "$killpid"
then then
......
...@@ -25,9 +25,8 @@ ...@@ -25,9 +25,8 @@
# #
# Authors: Paul E. McKenney <paulmck@linux.ibm.com> # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
. functions.sh . functions.sh
. $CONFIGFRAG/ver_functions.sh . $CONFIGFRAG/ver_functions.sh
......
...@@ -3,10 +3,14 @@ ...@@ -3,10 +3,14 @@
# #
# Transform a qemu-cmd file to allow reuse. # Transform a qemu-cmd file to allow reuse.
# #
# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out # Usage: kvm-transform.sh bzImage console.log jitter_dir seconds [ bootargs ] < qemu-cmd-in > qemu-cmd-out
# #
# bzImage: Kernel and initrd from the same prior kvm.sh run. # bzImage: Kernel and initrd from the same prior kvm.sh run.
# console.log: File into which to place console output. # console.log: File into which to place console output.
# jitter_dir: Jitter directory for TORTURE_JITTER_START and
# TORTURE_JITTER_STOP environment variables.
# seconds: Run duaration for *.shutdown_secs module parameter.
# bootargs: New kernel boot parameters. Beware of Robert Tables.
# #
# The original qemu-cmd file is provided on standard input. # The original qemu-cmd file is provided on standard input.
# The transformed qemu-cmd file is on standard output. # The transformed qemu-cmd file is on standard output.
...@@ -17,6 +21,9 @@ ...@@ -17,6 +21,9 @@
# #
# Authors: Paul E. McKenney <paulmck@kernel.org> # Authors: Paul E. McKenney <paulmck@kernel.org>
T=`mktemp -d /tmp/kvm-transform.sh.XXXXXXXXXX`
trap 'rm -rf $T' 0 2
image="$1" image="$1"
if test -z "$image" if test -z "$image"
then then
...@@ -41,9 +48,17 @@ then ...@@ -41,9 +48,17 @@ then
echo "Invalid duration, should be numeric in seconds: '$seconds'" echo "Invalid duration, should be numeric in seconds: '$seconds'"
exit 1 exit 1
fi fi
bootargs="$5"
# Build awk program.
echo "BEGIN {" > $T/bootarg.awk
echo $bootargs | tr -s ' ' '\012' |
awk -v dq='"' '/./ { print "\tbootarg[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk
echo $bootargs | tr -s ' ' '\012' | sed -e 's/=.*$//' |
awk -v dq='"' '/./ { print "\tbootpar[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk
cat >> $T/bootarg.awk << '___EOF___'
}
awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
-v seconds="$seconds" '
/^# seconds=/ { /^# seconds=/ {
if (seconds == "") if (seconds == "")
print $0; print $0;
...@@ -70,13 +85,7 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ ...@@ -70,13 +85,7 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
{ {
line = ""; line = "";
for (i = 1; i <= NF; i++) { for (i = 1; i <= NF; i++) {
if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) { if (line == "") {
sub(/[0-9]*$/, seconds, $i);
if (line == "")
line = $i;
else
line = line " " $i;
} else if (line == "") {
line = $i; line = $i;
} else { } else {
line = line " " $i; line = line " " $i;
...@@ -87,7 +96,44 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ ...@@ -87,7 +96,44 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
} else if ($i == "-kernel") { } else if ($i == "-kernel") {
i++; i++;
line = line " " image; line = line " " image;
} else if ($i == "-append") {
for (i++; i <= NF; i++) {
arg = $i;
lq = "";
rq = "";
if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/)
sub(/[0-9]*$/, seconds, arg);
if (arg ~ /^"/) {
lq = substr(arg, 1, 1);
arg = substr(arg, 2);
}
if (arg ~ /"$/) {
rq = substr(arg, length($i), 1);
arg = substr(arg, 1, length($i) - 1);
}
par = arg;
gsub(/=.*$/, "", par);
j = 1;
while (bootpar[j] != "") {
if (bootpar[j] == par) {
arg = "";
break;
}
j++;
}
if (line == "")
line = lq arg;
else
line = line " " lq arg;
}
for (j in bootarg)
line = line " " bootarg[j];
line = line rq;
} }
} }
print line; print line;
}' }
___EOF___
awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
-v seconds="$seconds" -f $T/bootarg.awk
...@@ -14,9 +14,8 @@ ...@@ -14,9 +14,8 @@
scriptname=$0 scriptname=$0
args="$*" args="$*"
T=${TMPDIR-/tmp}/kvm.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/kvm.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
cd `dirname $scriptname`/../../../../../ cd `dirname $scriptname`/../../../../../
......
...@@ -15,9 +15,8 @@ ...@@ -15,9 +15,8 @@
F=$1 F=$1
title=$2 title=$2
T=${TMPDIR-/tmp}/parse-build.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/parse-build.sh.XXXXXX`"
trap 'rm -rf $T' 0 trap 'rm -rf $T' 0
mkdir $T
. functions.sh . functions.sh
......
...@@ -206,9 +206,8 @@ ds="`date +%Y.%m.%d-%H.%M.%S`-torture" ...@@ -206,9 +206,8 @@ ds="`date +%Y.%m.%d-%H.%M.%S`-torture"
startdate="`date`" startdate="`date`"
starttime="`get_starttime`" starttime="`get_starttime`"
T=/tmp/torture.sh.$$ T="`mktemp -d ${TMPDIR-/tmp}/torture.sh.XXXXXX`"
trap 'rm -rf $T' 0 2 trap 'rm -rf $T' 0 2
mkdir $T
echo " --- " $scriptname $args | tee -a $T/log echo " --- " $scriptname $args | tee -a $T/log
echo " --- Results directory: " $ds | tee -a $T/log echo " --- Results directory: " $ds | tee -a $T/log
...@@ -278,6 +277,8 @@ function torture_one { ...@@ -278,6 +277,8 @@ function torture_one {
then then
cat $T/$curflavor.out | tee -a $T/log cat $T/$curflavor.out | tee -a $T/log
echo retcode=$retcode | tee -a $T/log echo retcode=$retcode | tee -a $T/log
else
echo $resdir > $T/last-resdir
fi fi
if test "$retcode" == 0 if test "$retcode" == 0
then then
...@@ -303,10 +304,12 @@ function torture_set { ...@@ -303,10 +304,12 @@ function torture_set {
shift shift
curflavor=$flavor curflavor=$flavor
torture_one "$@" torture_one "$@"
mv $T/last-resdir $T/last-resdir-nodebug || :
if test "$do_kasan" = "yes" if test "$do_kasan" = "yes"
then then
curflavor=${flavor}-kasan curflavor=${flavor}-kasan
torture_one "$@" --kasan torture_one "$@" --kasan
mv $T/last-resdir $T/last-resdir-kasan || :
fi fi
if test "$do_kcsan" = "yes" if test "$do_kcsan" = "yes"
then then
...@@ -317,6 +320,7 @@ function torture_set { ...@@ -317,6 +320,7 @@ function torture_set {
cur_kcsan_kmake_args="$kcsan_kmake_args" cur_kcsan_kmake_args="$kcsan_kmake_args"
fi fi
torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
mv $T/last-resdir $T/last-resdir-kcsan || :
fi fi
} }
...@@ -326,20 +330,34 @@ then ...@@ -326,20 +330,34 @@ then
echo " --- allmodconfig:" Start `date` | tee -a $T/log echo " --- allmodconfig:" Start `date` | tee -a $T/log
amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig" amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig"
mkdir -p "$amcdir" mkdir -p "$amcdir"
echo " --- make clean" > "$amcdir/Make.out" 2>&1 echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1
make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1 retcode=$?
buildphase='"make clean"'
if test "$retcode" -eq 0
then
echo " --- make allmodconfig" | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1
cp .config $amcdir cp .config $amcdir
make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
echo " --- make " >> "$amcdir/Make.out" 2>&1 retcode=$?
buildphase='"make allmodconfig"'
fi
if test "$retcode" -eq 0
then
echo " --- make " | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1
make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
retcode="$?" retcode="$?"
echo $retcode > "$amcdir/Make.exitcode" echo $retcode > "$amcdir/Make.exitcode"
if test "$retcode" == 0 buildphase='"make"'
fi
if test "$retcode" -eq 0
then then
echo "allmodconfig($retcode)" $amcdir >> $T/successes echo "allmodconfig($retcode)" $amcdir >> $T/successes
echo Success >> $amcdir/log
else else
echo "allmodconfig($retcode)" $amcdir >> $T/failures echo "allmodconfig($retcode)" $amcdir >> $T/failures
echo " --- allmodconfig Test summary:" >> $amcdir/log
echo " --- Summary: Exit code $retcode from $buildphase, see Make.out" >> $amcdir/log
fi fi
fi fi
...@@ -379,11 +397,48 @@ then ...@@ -379,11 +397,48 @@ then
else else
primlist= primlist=
fi fi
firsttime=1
do_kasan_save="$do_kasan"
do_kcsan_save="$do_kcsan"
for prim in $primlist for prim in $primlist
do do
if test -n "$firsttime"
then
torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
if test -f "$T/last-resdir-kasan"
then
mv $T/last-resdir-kasan $T/first-resdir-kasan || :
fi
if test -f "$T/last-resdir-kcsan"
then
mv $T/last-resdir-kcsan $T/first-resdir-kcsan || :
fi
firsttime=
do_kasan=
do_kcsan=
else
torture_bootargs=
for i in $T/first-resdir-*
do
case "$i" in
*-nodebug)
torture_suffix=
;;
*-kasan)
torture_suffix="-kasan"
;;
*-kcsan)
torture_suffix="-kcsan"
;;
esac
torture_set "refscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "refscale.scale_type=$prim"
done
fi
done done
do_kasan="$do_kasan_save"
do_kcsan="$do_kcsan_save"
if test "$do_rcuscale" = yes if test "$do_rcuscale" = yes
then then
...@@ -391,11 +446,48 @@ then ...@@ -391,11 +446,48 @@ then
else else
primlist= primlist=
fi fi
firsttime=1
do_kasan_save="$do_kasan"
do_kcsan_save="$do_kcsan"
for prim in $primlist for prim in $primlist
do do
if test -n "$firsttime"
then
torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
if test -f "$T/last-resdir-kasan"
then
mv $T/last-resdir-kasan $T/first-resdir-kasan || :
fi
if test -f "$T/last-resdir-kcsan"
then
mv $T/last-resdir-kcsan $T/first-resdir-kcsan || :
fi
firsttime=
do_kasan=
do_kcsan=
else
torture_bootargs=
for i in $T/first-resdir-*
do
case "$i" in
*-nodebug)
torture_suffix=
;;
*-kasan)
torture_suffix="-kasan"
;;
*-kcsan)
torture_suffix="-kcsan"
;;
esac
torture_set "rcuscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "rcuscale.scale_type=$prim"
done
fi
done done
do_kasan="$do_kasan_save"
do_kcsan="$do_kcsan_save"
if test "$do_kvfree" = "yes" if test "$do_kvfree" = "yes"
then then
...@@ -458,7 +550,10 @@ if test -n "$tdir" && test $compress_concurrency -gt 0 ...@@ -458,7 +550,10 @@ if test -n "$tdir" && test $compress_concurrency -gt 0
then then
# KASAN vmlinux files can approach 1GB in size, so compress them. # KASAN vmlinux files can approach 1GB in size, so compress them.
echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1 echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1
find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo-all
find "$tdir" -type f -name 're-run' -print | sed -e 's,/re-run,,' |
grep -e '-k[ac]san$' > $T/xz-todo-copy
sort $T/xz-todo-all $T/xz-todo-copy | uniq -u > $T/xz-todo
ncompresses=0 ncompresses=0
batchno=1 batchno=1
if test -s $T/xz-todo if test -s $T/xz-todo
...@@ -490,6 +585,24 @@ then ...@@ -490,6 +585,24 @@ then
echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
fi fi
wait wait
if test -s $T/xz-todo-copy
then
# The trick here is that we need corresponding
# vmlinux files from corresponding scenarios.
echo Linking vmlinux.xz files to re-use scenarios `date` | tee -a "$tdir/log-xz" | tee -a $T/log
dirstash="`pwd`"
for i in `cat $T/xz-todo-copy`
do
cd $i
find . -name vmlinux -print > $T/xz-todo-copy-vmlinux
for v in `cat $T/xz-todo-copy-vmlinux`
do
rm -f "$v"
cp -l `cat $i/re-run`/"$i/$v".xz "`dirname "$v"`"
done
cd "$dirstash"
done
fi
echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log
echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log
else else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment