Commit 7b8da4c7 authored by Christoph Lameter's avatar Christoph Lameter Committed by Linus Torvalds

vmstat: get rid of the ugly cpu_stat_off variable

The cpu_stat_off variable is unecessary since we can check if a
workqueue request is pending otherwise.  Removal of cpu_stat_off makes
it pretty easy for the vmstat shepherd to ensure that the proper things
happen.

Removing the state also removes all races related to it.  Should a
workqueue not be scheduled as needed for vmstat_update then the shepherd
will notice and schedule it as needed.  Should a workqueue be
unecessarily scheduled then the vmstat updater will disable it.

[akpm@linux-foundation.org: fix indentation, per Michal]
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1605061306460.17934@east.gentwo.orgSigned-off-by: default avatarChristoph Lameter <cl@linux.com>
Cc: Tejun Heo <htejun@gmail.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 51038171
...@@ -1352,7 +1352,6 @@ static const struct file_operations proc_vmstat_file_operations = { ...@@ -1352,7 +1352,6 @@ static const struct file_operations proc_vmstat_file_operations = {
static struct workqueue_struct *vmstat_wq; static struct workqueue_struct *vmstat_wq;
static DEFINE_PER_CPU(struct delayed_work, vmstat_work); static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ; int sysctl_stat_interval __read_mostly = HZ;
static cpumask_var_t cpu_stat_off;
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
static void refresh_vm_stats(struct work_struct *work) static void refresh_vm_stats(struct work_struct *work)
...@@ -1421,25 +1420,11 @@ static void vmstat_update(struct work_struct *w) ...@@ -1421,25 +1420,11 @@ static void vmstat_update(struct work_struct *w)
* Counters were updated so we expect more updates * Counters were updated so we expect more updates
* to occur in the future. Keep on running the * to occur in the future. Keep on running the
* update worker thread. * update worker thread.
* If we were marked on cpu_stat_off clear the flag
* so that vmstat_shepherd doesn't schedule us again.
*/ */
if (!cpumask_test_and_clear_cpu(smp_processor_id(),
cpu_stat_off)) {
queue_delayed_work_on(smp_processor_id(), vmstat_wq, queue_delayed_work_on(smp_processor_id(), vmstat_wq,
this_cpu_ptr(&vmstat_work), this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval)); round_jiffies_relative(sysctl_stat_interval));
} }
} else {
/*
* We did not update any counters so the app may be in
* a mode where it does not cause counter updates.
* We may be uselessly running vmstat_update.
* Defer the checking for differentials to the
* shepherd thread on a different processor.
*/
cpumask_set_cpu(smp_processor_id(), cpu_stat_off);
}
} }
/* /*
...@@ -1470,16 +1455,17 @@ static bool need_update(int cpu) ...@@ -1470,16 +1455,17 @@ static bool need_update(int cpu)
return false; return false;
} }
/*
* Switch off vmstat processing and then fold all the remaining differentials
* until the diffs stay at zero. The function is used by NOHZ and can only be
* invoked when tick processing is not active.
*/
void quiet_vmstat(void) void quiet_vmstat(void)
{ {
if (system_state != SYSTEM_RUNNING) if (system_state != SYSTEM_RUNNING)
return; return;
/* if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
* If we are already in hands of the shepherd then there
* is nothing for us to do here.
*/
if (cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
return; return;
if (!need_update(smp_processor_id())) if (!need_update(smp_processor_id()))
...@@ -1494,7 +1480,6 @@ void quiet_vmstat(void) ...@@ -1494,7 +1480,6 @@ void quiet_vmstat(void)
refresh_cpu_vm_stats(false); refresh_cpu_vm_stats(false);
} }
/* /*
* Shepherd worker thread that checks the * Shepherd worker thread that checks the
* differentials of processors that have their worker * differentials of processors that have their worker
...@@ -1511,20 +1496,11 @@ static void vmstat_shepherd(struct work_struct *w) ...@@ -1511,20 +1496,11 @@ static void vmstat_shepherd(struct work_struct *w)
get_online_cpus(); get_online_cpus();
/* Check processors whose vmstat worker threads have been disabled */ /* Check processors whose vmstat worker threads have been disabled */
for_each_cpu(cpu, cpu_stat_off) { for_each_online_cpu(cpu) {
struct delayed_work *dw = &per_cpu(vmstat_work, cpu); struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
if (need_update(cpu)) { if (!delayed_work_pending(dw) && need_update(cpu))
if (cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
queue_delayed_work_on(cpu, vmstat_wq, dw, 0); queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
} else {
/*
* Cancel the work if quiet_vmstat has put this
* cpu on cpu_stat_off because the work item might
* be still scheduled
*/
cancel_delayed_work(dw);
}
} }
put_online_cpus(); put_online_cpus();
...@@ -1540,10 +1516,6 @@ static void __init start_shepherd_timer(void) ...@@ -1540,10 +1516,6 @@ static void __init start_shepherd_timer(void)
INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
vmstat_update); vmstat_update);
if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
BUG();
cpumask_copy(cpu_stat_off, cpu_online_mask);
vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
schedule_delayed_work(&shepherd, schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval)); round_jiffies_relative(sysctl_stat_interval));
...@@ -1578,16 +1550,13 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb, ...@@ -1578,16 +1550,13 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb,
case CPU_ONLINE_FROZEN: case CPU_ONLINE_FROZEN:
refresh_zone_stat_thresholds(); refresh_zone_stat_thresholds();
node_set_state(cpu_to_node(cpu), N_CPU); node_set_state(cpu_to_node(cpu), N_CPU);
cpumask_set_cpu(cpu, cpu_stat_off);
break; break;
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN: case CPU_DOWN_PREPARE_FROZEN:
cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
cpumask_clear_cpu(cpu, cpu_stat_off);
break; break;
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN: case CPU_DOWN_FAILED_FROZEN:
cpumask_set_cpu(cpu, cpu_stat_off);
break; break;
case CPU_DEAD: case CPU_DEAD:
case CPU_DEAD_FROZEN: case CPU_DEAD_FROZEN:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment