Commit 144060fe authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf: Add PM notifiers to fix CPU hotplug races

Francis reports that s2r gets him spurious NMIs, this is because the
suspend code leaves the boot cpu up and running.

Cure this by adding a suspend notifier. The problem is that hotplug
and suspend are completely un-serialized and the PM notifiers run
before the suspend cpu unplug of all but the boot cpu.

This leaves a window where the user can initialize another hotplug
operation (either remove or add a cpu) resulting in either one too
many or one too few hotplug ops. Thus we cannot use the hotplug code
for the suspend case.

There's another reason to not use the hotplug code, which is that the
hotplug code totally destroys the perf state, we can do better for
suspend and simply remove all counters from the PMU so that we can
re-instate them on resume.
Reported-by: default avatarFrancis Moreau <francis.moro@gmail.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-1cvevybkgmv4s6v5y37t4847@git.kernel.orgSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 7fdba1ca
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/suspend.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/anon_inodes.h> #include <linux/anon_inodes.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
...@@ -6809,7 +6810,7 @@ static void __cpuinit perf_event_init_cpu(int cpu) ...@@ -6809,7 +6810,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
mutex_lock(&swhash->hlist_mutex); mutex_lock(&swhash->hlist_mutex);
if (swhash->hlist_refcount > 0) { if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) {
struct swevent_hlist *hlist; struct swevent_hlist *hlist;
hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
...@@ -6898,7 +6899,14 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) ...@@ -6898,7 +6899,14 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
{ {
unsigned int cpu = (long)hcpu; unsigned int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) { /*
* Ignore suspend/resume action, the perf_pm_notifier will
* take care of that.
*/
if (action & CPU_TASKS_FROZEN)
return NOTIFY_OK;
switch (action) {
case CPU_UP_PREPARE: case CPU_UP_PREPARE:
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
...@@ -6917,6 +6925,90 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) ...@@ -6917,6 +6925,90 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK; return NOTIFY_OK;
} }
static void perf_pm_resume_cpu(void *unused)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;
int idx;
idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
ctx = cpuctx->task_ctx;
perf_ctx_lock(cpuctx, ctx);
perf_pmu_disable(cpuctx->ctx.pmu);
cpu_ctx_sched_out(cpuctx, EVENT_ALL);
if (ctx)
ctx_sched_out(ctx, cpuctx, EVENT_ALL);
perf_pmu_enable(cpuctx->ctx.pmu);
perf_ctx_unlock(cpuctx, ctx);
}
srcu_read_unlock(&pmus_srcu, idx);
}
static void perf_pm_suspend_cpu(void *unused)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;
int idx;
idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
ctx = cpuctx->task_ctx;
perf_ctx_lock(cpuctx, ctx);
perf_pmu_disable(cpuctx->ctx.pmu);
perf_event_sched_in(cpuctx, ctx, current);
perf_pmu_enable(cpuctx->ctx.pmu);
perf_ctx_unlock(cpuctx, ctx);
}
srcu_read_unlock(&pmus_srcu, idx);
}
static int perf_resume(void)
{
get_online_cpus();
smp_call_function(perf_pm_resume_cpu, NULL, 1);
put_online_cpus();
return NOTIFY_OK;
}
static int perf_suspend(void)
{
get_online_cpus();
smp_call_function(perf_pm_suspend_cpu, NULL, 1);
put_online_cpus();
return NOTIFY_OK;
}
static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr)
{
switch (action) {
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
return perf_resume();
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
return perf_suspend();
default:
return NOTIFY_DONE;
}
}
static struct notifier_block perf_pm_notifier = {
.notifier_call = perf_pm,
};
void __init perf_event_init(void) void __init perf_event_init(void)
{ {
int ret; int ret;
...@@ -6931,6 +7023,7 @@ void __init perf_event_init(void) ...@@ -6931,6 +7023,7 @@ void __init perf_event_init(void)
perf_tp_register(); perf_tp_register();
perf_cpu_notifier(perf_cpu_notify); perf_cpu_notifier(perf_cpu_notify);
register_reboot_notifier(&perf_reboot_notifier); register_reboot_notifier(&perf_reboot_notifier);
register_pm_notifier(&perf_pm_notifier);
ret = init_hw_breakpoint(); ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret); WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment