Commit de79f54f authored by Markus Metzger's avatar Markus Metzger Committed by Ingo Molnar

x86, bts, hw-branch-tracer: add _noirq variants to the debug store interface

The hw-branch-tracer uses debug store functions from an on_each_cpu()
context, which is simply wrong since the functions may sleep.

Add _noirq variants for most functions, which  may be called with
interrupts disabled.

Separate per-cpu and per-task tracing and allow per-cpu tracing to be
controlled from any cpu.

Make the hw-branch-tracer use the new debug store interface, synchronize
with hotplug cpu event using get/put_online_cpus(), and remove the
unnecessary spinlock.

Make the ptrace bts and the ds selftest code use the new interface.

Defer the ds selftest.
Signed-off-by: default avatarMarkus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144555.658136000@intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 35bb7600
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
* - buffer allocation (memory accounting) * - buffer allocation (memory accounting)
* *
* *
* Copyright (C) 2007-2008 Intel Corporation. * Copyright (C) 2007-2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
*/ */
#ifndef _ASM_X86_DS_H #ifndef _ASM_X86_DS_H
...@@ -83,8 +83,10 @@ enum ds_feature { ...@@ -83,8 +83,10 @@ enum ds_feature {
* The interrupt threshold is independent from the overflow callback * The interrupt threshold is independent from the overflow callback
* to allow users to use their own overflow interrupt handling mechanism. * to allow users to use their own overflow interrupt handling mechanism.
* *
* task: the task to request recording for; * The function might sleep.
* NULL for per-cpu recording on the current cpu *
* task: the task to request recording for
* cpu: the cpu to request recording for
* base: the base pointer for the (non-pageable) buffer; * base: the base pointer for the (non-pageable) buffer;
* size: the size of the provided buffer in bytes * size: the size of the provided buffer in bytes
* ovfl: pointer to a function to be called on buffer overflow; * ovfl: pointer to a function to be called on buffer overflow;
...@@ -93,11 +95,18 @@ enum ds_feature { ...@@ -93,11 +95,18 @@ enum ds_feature {
* -1 if no interrupt threshold is requested. * -1 if no interrupt threshold is requested.
* flags: a bit-mask of the above flags * flags: a bit-mask of the above flags
*/ */
extern struct bts_tracer *ds_request_bts(struct task_struct *task, extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
void *base, size_t size, void *base, size_t size,
bts_ovfl_callback_t ovfl, bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags); size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
void *base, size_t size, void *base, size_t size,
pebs_ovfl_callback_t ovfl, pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags); size_t th, unsigned int flags);
...@@ -106,6 +115,8 @@ extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, ...@@ -106,6 +115,8 @@ extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
* Release BTS or PEBS resources * Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing * Suspend and resume BTS or PEBS tracing
* *
* Must be called with irq's enabled.
*
* tracer: the tracer handle returned from ds_request_~() * tracer: the tracer handle returned from ds_request_~()
*/ */
extern void ds_release_bts(struct bts_tracer *tracer); extern void ds_release_bts(struct bts_tracer *tracer);
...@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer); ...@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer);
extern void ds_suspend_pebs(struct pebs_tracer *tracer); extern void ds_suspend_pebs(struct pebs_tracer *tracer);
extern void ds_resume_pebs(struct pebs_tracer *tracer); extern void ds_resume_pebs(struct pebs_tracer *tracer);
/*
* Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing
*
* Cpu tracers must call this on the traced cpu.
* Task tracers must call ds_release_~_noirq() for themselves.
*
* May be called with irq's disabled.
*
* Returns 0 if successful;
* -EPERM if the cpu tracer does not trace the current cpu.
* -EPERM if the task tracer does not trace itself.
*
* tracer: the tracer handle returned from ds_request_~()
*/
extern int ds_release_bts_noirq(struct bts_tracer *tracer);
extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
/* /*
* The raw DS buffer state as it is used for BTS and PEBS recording. * The raw DS buffer state as it is used for BTS and PEBS recording.
......
...@@ -250,55 +250,45 @@ struct ds_context { ...@@ -250,55 +250,45 @@ struct ds_context {
/* Pointer to the context pointer field: */ /* Pointer to the context pointer field: */
struct ds_context **this; struct ds_context **this;
/* The traced task; NULL for current cpu: */ /* The traced task; NULL for cpu tracing: */
struct task_struct *task; struct task_struct *task;
};
static DEFINE_PER_CPU(struct ds_context *, system_context_array); /* The traced cpu; only valid if task is NULL: */
int cpu;
};
#define system_context per_cpu(system_context_array, smp_processor_id()) static DEFINE_PER_CPU(struct ds_context *, cpu_context);
static inline struct ds_context *ds_get_context(struct task_struct *task) static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
{ {
struct ds_context **p_context = struct ds_context **p_context =
(task ? &task->thread.ds_ctx : &system_context); (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
struct ds_context *context = NULL; struct ds_context *context = NULL;
struct ds_context *new_context = NULL; struct ds_context *new_context = NULL;
unsigned long irq;
/* /* Chances are small that we already have a context. */
* Chances are small that we already have a context. new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
*
* Contexts for per-cpu tracing are allocated using
* smp_call_function(). We must not sleep.
*/
new_context = kzalloc(sizeof(*new_context), GFP_ATOMIC);
if (!new_context) if (!new_context)
return NULL; return NULL;
spin_lock_irqsave(&ds_lock, irq); spin_lock_irq(&ds_lock);
context = *p_context; context = *p_context;
if (!context) { if (likely(!context)) {
context = new_context; context = new_context;
context->this = p_context; context->this = p_context;
context->task = task; context->task = task;
context->cpu = cpu;
context->count = 0; context->count = 0;
if (task)
set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
if (!task || (task == current))
wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
*p_context = context; *p_context = context;
} }
context->count++; context->count++;
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
if (context != new_context) if (context != new_context)
kfree(new_context); kfree(new_context);
...@@ -306,7 +296,7 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) ...@@ -306,7 +296,7 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
return context; return context;
} }
static inline void ds_put_context(struct ds_context *context) static void ds_put_context(struct ds_context *context)
{ {
struct task_struct *task; struct task_struct *task;
unsigned long irq; unsigned long irq;
...@@ -328,8 +318,15 @@ static inline void ds_put_context(struct ds_context *context) ...@@ -328,8 +318,15 @@ static inline void ds_put_context(struct ds_context *context)
if (task) if (task)
clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
if (!task || (task == current)) /*
wrmsrl(MSR_IA32_DS_AREA, 0); * We leave the (now dangling) pointer to the DS configuration in
* the DS_AREA msr. This is as good or as bad as replacing it with
* NULL - the hardware would crash if we enabled tracing.
*
* This saves us some problems with having to write an msr on a
* different cpu while preventing others from doing the same for the
* next context for that same cpu.
*/
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irqrestore(&ds_lock, irq);
...@@ -340,6 +337,31 @@ static inline void ds_put_context(struct ds_context *context) ...@@ -340,6 +337,31 @@ static inline void ds_put_context(struct ds_context *context)
kfree(context); kfree(context);
} }
static void ds_install_ds_area(struct ds_context *context)
{
unsigned long ds;
ds = (unsigned long)context->ds;
/*
* There is a race between the bts master and the pebs master.
*
* The thread/cpu access is synchronized via get/put_cpu() for
* task tracing and via wrmsr_on_cpu for cpu tracing.
*
* If bts and pebs are collected for the same task or same cpu,
* the same confiuration is written twice.
*/
if (context->task) {
get_cpu();
if (context->task == current)
wrmsrl(MSR_IA32_DS_AREA, ds);
set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
put_cpu();
} else
wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
(u32)((u64)ds), (u32)((u64)ds >> 32));
}
/* /*
* Call the tracer's callback on a buffer overflow. * Call the tracer's callback on a buffer overflow.
...@@ -622,6 +644,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, ...@@ -622,6 +644,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
* The value for 'no threshold' is -1, which will set the * The value for 'no threshold' is -1, which will set the
* threshold outside of the buffer, just like we want it. * threshold outside of the buffer, just like we want it.
*/ */
ith *= ds_cfg.sizeof_rec[qual];
trace->ith = (void *)(buffer + size - ith); trace->ith = (void *)(buffer + size - ith);
trace->flags = flags; trace->flags = flags;
...@@ -630,7 +653,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, ...@@ -630,7 +653,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
enum ds_qualifier qual, struct task_struct *task, enum ds_qualifier qual, struct task_struct *task,
void *base, size_t size, size_t th, unsigned int flags) int cpu, void *base, size_t size, size_t th)
{ {
struct ds_context *context; struct ds_context *context;
int error; int error;
...@@ -643,7 +666,7 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, ...@@ -643,7 +666,7 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
if (!base) if (!base)
goto out; goto out;
/* We require some space to do alignment adjustments below. */ /* We need space for alignment adjustments in ds_init_ds_trace(). */
error = -EINVAL; error = -EINVAL;
if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
goto out; goto out;
...@@ -660,25 +683,27 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, ...@@ -660,25 +683,27 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
tracer->size = size; tracer->size = size;
error = -ENOMEM; error = -ENOMEM;
context = ds_get_context(task); context = ds_get_context(task, cpu);
if (!context) if (!context)
goto out; goto out;
tracer->context = context; tracer->context = context;
ds_init_ds_trace(trace, qual, base, size, th, flags); /*
* Defer any tracer-specific initialization work for the context until
* context ownership has been clarified.
*/
error = 0; error = 0;
out: out:
return error; return error;
} }
struct bts_tracer *ds_request_bts(struct task_struct *task, static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
void *base, size_t size, void *base, size_t size,
bts_ovfl_callback_t ovfl, size_t th, bts_ovfl_callback_t ovfl, size_t th,
unsigned int flags) unsigned int flags)
{ {
struct bts_tracer *tracer; struct bts_tracer *tracer;
unsigned long irq;
int error; int error;
/* Buffer overflow notification is not yet implemented. */ /* Buffer overflow notification is not yet implemented. */
...@@ -690,42 +715,46 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, ...@@ -690,42 +715,46 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
if (error < 0) if (error < 0)
goto out; goto out;
/*
* Per-cpu tracing is typically requested using smp_call_function().
* We must not sleep.
*/
error = -ENOMEM; error = -ENOMEM;
tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
if (!tracer) if (!tracer)
goto out_put_tracer; goto out_put_tracer;
tracer->ovfl = ovfl; tracer->ovfl = ovfl;
/* Do some more error checking and acquire a tracing context. */
error = ds_request(&tracer->ds, &tracer->trace.ds, error = ds_request(&tracer->ds, &tracer->trace.ds,
ds_bts, task, base, size, th, flags); ds_bts, task, cpu, base, size, th);
if (error < 0) if (error < 0)
goto out_tracer; goto out_tracer;
/* Claim the bts part of the tracing context we acquired above. */
spin_lock_irqsave(&ds_lock, irq); spin_lock_irq(&ds_lock);
error = -EPERM; error = -EPERM;
if (tracer->ds.context->bts_master) if (tracer->ds.context->bts_master)
goto out_unlock; goto out_unlock;
tracer->ds.context->bts_master = tracer; tracer->ds.context->bts_master = tracer;
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
/*
* Now that we own the bts part of the context, let's complete the
* initialization for that part.
*/
ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
ds_install_ds_area(tracer->ds.context);
tracer->trace.read = bts_read; tracer->trace.read = bts_read;
tracer->trace.write = bts_write; tracer->trace.write = bts_write;
ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); /* Start tracing. */
ds_resume_bts(tracer); ds_resume_bts(tracer);
return tracer; return tracer;
out_unlock: out_unlock:
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
ds_put_context(tracer->ds.context); ds_put_context(tracer->ds.context);
out_tracer: out_tracer:
kfree(tracer); kfree(tracer);
...@@ -735,13 +764,27 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, ...@@ -735,13 +764,27 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
return ERR_PTR(error); return ERR_PTR(error);
} }
struct pebs_tracer *ds_request_pebs(struct task_struct *task, struct bts_tracer *ds_request_bts_task(struct task_struct *task,
void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{
return ds_request_bts(task, 0, base, size, ovfl, th, flags);
}
struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{
return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
}
static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
void *base, size_t size, void *base, size_t size,
pebs_ovfl_callback_t ovfl, size_t th, pebs_ovfl_callback_t ovfl, size_t th,
unsigned int flags) unsigned int flags)
{ {
struct pebs_tracer *tracer; struct pebs_tracer *tracer;
unsigned long irq;
int error; int error;
/* Buffer overflow notification is not yet implemented. */ /* Buffer overflow notification is not yet implemented. */
...@@ -753,37 +796,43 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, ...@@ -753,37 +796,43 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
if (error < 0) if (error < 0)
goto out; goto out;
/*
* Per-cpu tracing is typically requested using smp_call_function().
* We must not sleep.
*/
error = -ENOMEM; error = -ENOMEM;
tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
if (!tracer) if (!tracer)
goto out_put_tracer; goto out_put_tracer;
tracer->ovfl = ovfl; tracer->ovfl = ovfl;
/* Do some more error checking and acquire a tracing context. */
error = ds_request(&tracer->ds, &tracer->trace.ds, error = ds_request(&tracer->ds, &tracer->trace.ds,
ds_pebs, task, base, size, th, flags); ds_pebs, task, cpu, base, size, th);
if (error < 0) if (error < 0)
goto out_tracer; goto out_tracer;
spin_lock_irqsave(&ds_lock, irq); /* Claim the pebs part of the tracing context we acquired above. */
spin_lock_irq(&ds_lock);
error = -EPERM; error = -EPERM;
if (tracer->ds.context->pebs_master) if (tracer->ds.context->pebs_master)
goto out_unlock; goto out_unlock;
tracer->ds.context->pebs_master = tracer; tracer->ds.context->pebs_master = tracer;
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
/*
* Now that we own the pebs part of the context, let's complete the
* initialization for that part.
*/
ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
ds_install_ds_area(tracer->ds.context);
/* Start tracing. */
ds_resume_pebs(tracer); ds_resume_pebs(tracer);
return tracer; return tracer;
out_unlock: out_unlock:
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
ds_put_context(tracer->ds.context); ds_put_context(tracer->ds.context);
out_tracer: out_tracer:
kfree(tracer); kfree(tracer);
...@@ -793,16 +842,26 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, ...@@ -793,16 +842,26 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
return ERR_PTR(error); return ERR_PTR(error);
} }
void ds_release_bts(struct bts_tracer *tracer) struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{ {
struct task_struct *task; return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
}
if (!tracer) struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
return; pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{
return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
}
task = tracer->ds.context->task; static void ds_free_bts(struct bts_tracer *tracer)
{
struct task_struct *task;
ds_suspend_bts(tracer); task = tracer->ds.context->task;
WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
tracer->ds.context->bts_master = NULL; tracer->ds.context->bts_master = NULL;
...@@ -817,9 +876,69 @@ void ds_release_bts(struct bts_tracer *tracer) ...@@ -817,9 +876,69 @@ void ds_release_bts(struct bts_tracer *tracer)
kfree(tracer); kfree(tracer);
} }
void ds_release_bts(struct bts_tracer *tracer)
{
might_sleep();
if (!tracer)
return;
ds_suspend_bts(tracer);
ds_free_bts(tracer);
}
int ds_release_bts_noirq(struct bts_tracer *tracer)
{
struct task_struct *task;
unsigned long irq;
int error;
if (!tracer)
return 0;
task = tracer->ds.context->task;
local_irq_save(irq);
error = -EPERM;
if (!task &&
(tracer->ds.context->cpu != smp_processor_id()))
goto out;
error = -EPERM;
if (task && (task != current))
goto out;
ds_suspend_bts_noirq(tracer);
ds_free_bts(tracer);
error = 0;
out:
local_irq_restore(irq);
return error;
}
static void update_task_debugctlmsr(struct task_struct *task,
unsigned long debugctlmsr)
{
task->thread.debugctlmsr = debugctlmsr;
get_cpu();
if (task == current)
update_debugctlmsr(debugctlmsr);
if (task->thread.debugctlmsr)
set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
else
clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
put_cpu();
}
void ds_suspend_bts(struct bts_tracer *tracer) void ds_suspend_bts(struct bts_tracer *tracer)
{ {
struct task_struct *task; struct task_struct *task;
unsigned long debugctlmsr;
int cpu;
if (!tracer) if (!tracer)
return; return;
...@@ -827,29 +946,60 @@ void ds_suspend_bts(struct bts_tracer *tracer) ...@@ -827,29 +946,60 @@ void ds_suspend_bts(struct bts_tracer *tracer)
tracer->flags = 0; tracer->flags = 0;
task = tracer->ds.context->task; task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
if (!task || (task == current)) WARN_ON(!task && irqs_disabled());
update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
if (task) { debugctlmsr = (task ?
task->thread.debugctlmsr &= ~BTS_CONTROL; task->thread.debugctlmsr :
get_debugctlmsr_on_cpu(cpu));
debugctlmsr &= ~BTS_CONTROL;
if (!task->thread.debugctlmsr) if (task)
clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); update_task_debugctlmsr(task, debugctlmsr);
} else
update_debugctlmsr_on_cpu(cpu, debugctlmsr);
} }
void ds_resume_bts(struct bts_tracer *tracer) int ds_suspend_bts_noirq(struct bts_tracer *tracer)
{ {
struct task_struct *task; struct task_struct *task;
unsigned long control; unsigned long debugctlmsr, irq;
int cpu, error = 0;
if (!tracer) if (!tracer)
return; return 0;
tracer->flags = tracer->trace.ds.flags; tracer->flags = 0;
task = tracer->ds.context->task; task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
local_irq_save(irq);
error = -EPERM;
if (!task && (cpu != smp_processor_id()))
goto out;
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr());
debugctlmsr &= ~BTS_CONTROL;
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr(debugctlmsr);
error = 0;
out:
local_irq_restore(irq);
return error;
}
static unsigned long ds_bts_control(struct bts_tracer *tracer)
{
unsigned long control;
control = ds_cfg.ctl[dsf_bts]; control = ds_cfg.ctl[dsf_bts];
if (!(tracer->trace.ds.flags & BTS_KERNEL)) if (!(tracer->trace.ds.flags & BTS_KERNEL))
...@@ -857,25 +1007,77 @@ void ds_resume_bts(struct bts_tracer *tracer) ...@@ -857,25 +1007,77 @@ void ds_resume_bts(struct bts_tracer *tracer)
if (!(tracer->trace.ds.flags & BTS_USER)) if (!(tracer->trace.ds.flags & BTS_USER))
control |= ds_cfg.ctl[dsf_bts_user]; control |= ds_cfg.ctl[dsf_bts_user];
if (task) { return control;
task->thread.debugctlmsr |= control;
set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
}
if (!task || (task == current))
update_debugctlmsr(get_debugctlmsr() | control);
} }
void ds_release_pebs(struct pebs_tracer *tracer) void ds_resume_bts(struct bts_tracer *tracer)
{ {
struct task_struct *task; struct task_struct *task;
unsigned long debugctlmsr;
int cpu;
if (!tracer) if (!tracer)
return; return;
tracer->flags = tracer->trace.ds.flags;
task = tracer->ds.context->task; task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
ds_suspend_pebs(tracer); WARN_ON(!task && irqs_disabled());
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr_on_cpu(cpu));
debugctlmsr |= ds_bts_control(tracer);
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr_on_cpu(cpu, debugctlmsr);
}
int ds_resume_bts_noirq(struct bts_tracer *tracer)
{
struct task_struct *task;
unsigned long debugctlmsr, irq;
int cpu, error = 0;
if (!tracer)
return 0;
tracer->flags = tracer->trace.ds.flags;
task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
local_irq_save(irq);
error = -EPERM;
if (!task && (cpu != smp_processor_id()))
goto out;
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr());
debugctlmsr |= ds_bts_control(tracer);
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr(debugctlmsr);
error = 0;
out:
local_irq_restore(irq);
return error;
}
static void ds_free_pebs(struct pebs_tracer *tracer)
{
struct task_struct *task;
task = tracer->ds.context->task;
WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
tracer->ds.context->pebs_master = NULL; tracer->ds.context->pebs_master = NULL;
...@@ -886,16 +1088,68 @@ void ds_release_pebs(struct pebs_tracer *tracer) ...@@ -886,16 +1088,68 @@ void ds_release_pebs(struct pebs_tracer *tracer)
kfree(tracer); kfree(tracer);
} }
void ds_release_pebs(struct pebs_tracer *tracer)
{
might_sleep();
if (!tracer)
return;
ds_suspend_pebs(tracer);
ds_free_pebs(tracer);
}
int ds_release_pebs_noirq(struct pebs_tracer *tracer)
{
struct task_struct *task;
unsigned long irq;
int error;
if (!tracer)
return 0;
task = tracer->ds.context->task;
local_irq_save(irq);
error = -EPERM;
if (!task &&
(tracer->ds.context->cpu != smp_processor_id()))
goto out;
error = -EPERM;
if (task && (task != current))
goto out;
ds_suspend_pebs_noirq(tracer);
ds_free_pebs(tracer);
error = 0;
out:
local_irq_restore(irq);
return error;
}
void ds_suspend_pebs(struct pebs_tracer *tracer) void ds_suspend_pebs(struct pebs_tracer *tracer)
{ {
} }
int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
{
return 0;
}
void ds_resume_pebs(struct pebs_tracer *tracer) void ds_resume_pebs(struct pebs_tracer *tracer)
{ {
} }
int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
{
return 0;
}
const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
{ {
if (!tracer) if (!tracer)
...@@ -1004,26 +1258,6 @@ ds_configure(const struct ds_configuration *cfg, ...@@ -1004,26 +1258,6 @@ ds_configure(const struct ds_configuration *cfg,
printk(KERN_INFO "[ds] pebs not available\n"); printk(KERN_INFO "[ds] pebs not available\n");
} }
if (ds_cfg.sizeof_rec[ds_bts]) {
int error;
error = ds_selftest_bts();
if (error) {
WARN(1, "[ds] selftest failed. disabling bts.\n");
ds_cfg.sizeof_rec[ds_bts] = 0;
}
}
if (ds_cfg.sizeof_rec[ds_pebs]) {
int error;
error = ds_selftest_pebs();
if (error) {
WARN(1, "[ds] selftest failed. disabling pebs.\n");
ds_cfg.sizeof_rec[ds_pebs] = 0;
}
}
printk(KERN_INFO "[ds] sizes: address: %u bit, ", printk(KERN_INFO "[ds] sizes: address: %u bit, ",
8 * ds_cfg.sizeof_ptr_field); 8 * ds_cfg.sizeof_ptr_field);
printk("bts/pebs record: %u/%u bytes\n", printk("bts/pebs record: %u/%u bytes\n",
...@@ -1127,3 +1361,29 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) ...@@ -1127,3 +1361,29 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
void ds_exit_thread(struct task_struct *tsk) void ds_exit_thread(struct task_struct *tsk)
{ {
} }
static __init int ds_selftest(void)
{
if (ds_cfg.sizeof_rec[ds_bts]) {
int error;
error = ds_selftest_bts();
if (error) {
WARN(1, "[ds] selftest failed. disabling bts.\n");
ds_cfg.sizeof_rec[ds_bts] = 0;
}
}
if (ds_cfg.sizeof_rec[ds_pebs]) {
int error;
error = ds_selftest_pebs();
if (error) {
WARN(1, "[ds] selftest failed. disabling pebs.\n");
ds_cfg.sizeof_rec[ds_pebs] = 0;
}
}
return 0;
}
device_initcall(ds_selftest);
...@@ -10,11 +10,12 @@ ...@@ -10,11 +10,12 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/smp.h>
#include <asm/ds.h> #include <asm/ds.h>
#define DS_SELFTEST_BUFFER_SIZE 1021 /* Intentionally chose an odd size. */ #define BUFFER_SIZE 1021 /* Intentionally chose an odd size. */
static int ds_selftest_bts_consistency(const struct bts_trace *trace) static int ds_selftest_bts_consistency(const struct bts_trace *trace)
...@@ -125,11 +126,11 @@ int ds_selftest_bts(void) ...@@ -125,11 +126,11 @@ int ds_selftest_bts(void)
struct bts_tracer *tracer; struct bts_tracer *tracer;
int error = 0; int error = 0;
void *top; void *top;
unsigned char buffer[DS_SELFTEST_BUFFER_SIZE]; unsigned char buffer[BUFFER_SIZE];
printk(KERN_INFO "[ds] bts selftest..."); printk(KERN_INFO "[ds] bts selftest...");
tracer = ds_request_bts(NULL, buffer, DS_SELFTEST_BUFFER_SIZE, tracer = ds_request_bts_cpu(smp_processor_id(), buffer, BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL); NULL, (size_t)-1, BTS_KERNEL);
if (IS_ERR(tracer)) { if (IS_ERR(tracer)) {
error = PTR_ERR(tracer); error = PTR_ERR(tracer);
......
...@@ -800,7 +800,8 @@ static int ptrace_bts_config(struct task_struct *child, ...@@ -800,7 +800,8 @@ static int ptrace_bts_config(struct task_struct *child,
if (cfg.flags & PTRACE_BTS_O_SCHED) if (cfg.flags & PTRACE_BTS_O_SCHED)
flags |= BTS_TIMESTAMPS; flags |= BTS_TIMESTAMPS;
context->tracer = ds_request_bts(child, context->buffer, context->size, context->tracer =
ds_request_bts_task(child, context->buffer, context->size,
NULL, (size_t)-1, flags); NULL, (size_t)-1, flags);
if (unlikely(IS_ERR(context->tracer))) { if (unlikely(IS_ERR(context->tracer))) {
int error = PTR_ERR(context->tracer); int error = PTR_ERR(context->tracer);
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
* Copyright (C) 2008-2009 Intel Corporation. * Copyright (C) 2008-2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
*/ */
#include <linux/spinlock.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
...@@ -21,168 +20,113 @@ ...@@ -21,168 +20,113 @@
#define BTS_BUFFER_SIZE (1 << 13) #define BTS_BUFFER_SIZE (1 << 13)
/*
* The tracer lock protects the below per-cpu tracer array.
* It needs to be held to:
* - start tracing on all cpus
* - stop tracing on all cpus
* - start tracing on a single hotplug cpu
* - stop tracing on a single hotplug cpu
* - read the trace from all cpus
* - read the trace from a single cpu
*/
static DEFINE_SPINLOCK(bts_tracer_lock);
static DEFINE_PER_CPU(struct bts_tracer *, tracer); static DEFINE_PER_CPU(struct bts_tracer *, tracer);
static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
#define this_tracer per_cpu(tracer, smp_processor_id()) #define this_tracer per_cpu(tracer, smp_processor_id())
#define this_buffer per_cpu(buffer, smp_processor_id())
static int trace_hw_branches_enabled __read_mostly; static int trace_hw_branches_enabled __read_mostly;
static int trace_hw_branches_suspended __read_mostly; static int trace_hw_branches_suspended __read_mostly;
static struct trace_array *hw_branch_trace __read_mostly; static struct trace_array *hw_branch_trace __read_mostly;
/* static void bts_trace_init_cpu(int cpu)
* Initialize the tracer for the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_init_cpu(void *arg)
{ {
if (this_tracer) per_cpu(tracer, cpu) =
ds_release_bts(this_tracer); ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
this_tracer = ds_request_bts(NULL, this_buffer, BTS_BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL); NULL, (size_t)-1, BTS_KERNEL);
if (IS_ERR(this_tracer)) {
this_tracer = NULL; if (IS_ERR(per_cpu(tracer, cpu)))
return; per_cpu(tracer, cpu) = NULL;
}
} }
static int bts_trace_init(struct trace_array *tr) static int bts_trace_init(struct trace_array *tr)
{ {
int cpu, avail; int cpu;
spin_lock(&bts_tracer_lock);
hw_branch_trace = tr; hw_branch_trace = tr;
trace_hw_branches_enabled = 0;
on_each_cpu(bts_trace_init_cpu, NULL, 1); get_online_cpus();
for_each_online_cpu(cpu) {
/* Check on how many cpus we could enable tracing */ bts_trace_init_cpu(cpu);
avail = 0;
for_each_online_cpu(cpu)
if (per_cpu(tracer, cpu))
avail++;
trace_hw_branches_enabled = (avail ? 1 : 0); if (likely(per_cpu(tracer, cpu)))
trace_hw_branches_enabled = 1;
}
trace_hw_branches_suspended = 0; trace_hw_branches_suspended = 0;
put_online_cpus();
spin_unlock(&bts_tracer_lock);
/* If we could not enable tracing on a single cpu, we fail. */ /* If we could not enable tracing on a single cpu, we fail. */
return avail ? 0 : -EOPNOTSUPP; return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
}
/*
* Release the tracer for the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_release_cpu(void *arg)
{
if (this_tracer) {
ds_release_bts(this_tracer);
this_tracer = NULL;
}
} }
static void bts_trace_reset(struct trace_array *tr) static void bts_trace_reset(struct trace_array *tr)
{ {
spin_lock(&bts_tracer_lock); int cpu;
on_each_cpu(bts_trace_release_cpu, NULL, 1); get_online_cpus();
for_each_online_cpu(cpu) {
if (likely(per_cpu(tracer, cpu))) {
ds_release_bts(per_cpu(tracer, cpu));
per_cpu(tracer, cpu) = NULL;
}
}
trace_hw_branches_enabled = 0; trace_hw_branches_enabled = 0;
trace_hw_branches_suspended = 0; trace_hw_branches_suspended = 0;
put_online_cpus();
spin_unlock(&bts_tracer_lock);
}
/*
* Resume tracing on the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_resume_cpu(void *arg)
{
if (this_tracer)
ds_resume_bts(this_tracer);
} }
static void bts_trace_start(struct trace_array *tr) static void bts_trace_start(struct trace_array *tr)
{ {
spin_lock(&bts_tracer_lock); int cpu;
on_each_cpu(bts_trace_resume_cpu, NULL, 1); get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_resume_bts(per_cpu(tracer, cpu));
trace_hw_branches_suspended = 0; trace_hw_branches_suspended = 0;
put_online_cpus();
spin_unlock(&bts_tracer_lock);
}
/*
* Suspend tracing on the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_suspend_cpu(void *arg)
{
if (this_tracer)
ds_suspend_bts(this_tracer);
} }
static void bts_trace_stop(struct trace_array *tr) static void bts_trace_stop(struct trace_array *tr)
{ {
spin_lock(&bts_tracer_lock); int cpu;
on_each_cpu(bts_trace_suspend_cpu, NULL, 1); get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
trace_hw_branches_suspended = 1; trace_hw_branches_suspended = 1;
put_online_cpus();
spin_unlock(&bts_tracer_lock);
} }
static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
unsigned long action, void *hcpu) unsigned long action, void *hcpu)
{ {
unsigned int cpu = (unsigned long)hcpu; int cpu = (long)hcpu;
spin_lock(&bts_tracer_lock);
if (!trace_hw_branches_enabled)
goto out;
switch (action) { switch (action) {
case CPU_ONLINE: case CPU_ONLINE:
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
smp_call_function_single(cpu, bts_trace_init_cpu, NULL, 1); /* The notification is sent with interrupts enabled. */
if (trace_hw_branches_enabled) {
bts_trace_init_cpu(cpu);
if (trace_hw_branches_suspended) if (trace_hw_branches_suspended &&
smp_call_function_single(cpu, bts_trace_suspend_cpu, likely(per_cpu(tracer, cpu)))
NULL, 1); ds_suspend_bts(per_cpu(tracer, cpu));
}
break; break;
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
smp_call_function_single(cpu, bts_trace_release_cpu, NULL, 1); /* The notification is sent with interrupts enabled. */
break; if (likely(per_cpu(tracer, cpu))) {
ds_release_bts(per_cpu(tracer, cpu));
per_cpu(tracer, cpu) = NULL;
}
} }
out:
spin_unlock(&bts_tracer_lock);
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -274,7 +218,7 @@ static void trace_bts_at(const struct bts_trace *trace, void *at) ...@@ -274,7 +218,7 @@ static void trace_bts_at(const struct bts_trace *trace, void *at)
/* /*
* Collect the trace on the current cpu and write it into the ftrace buffer. * Collect the trace on the current cpu and write it into the ftrace buffer.
* *
* pre: bts_tracer_lock must be locked * pre: tracing must be suspended on the current cpu
*/ */
static void trace_bts_cpu(void *arg) static void trace_bts_cpu(void *arg)
{ {
...@@ -291,10 +235,9 @@ static void trace_bts_cpu(void *arg) ...@@ -291,10 +235,9 @@ static void trace_bts_cpu(void *arg)
if (unlikely(!this_tracer)) if (unlikely(!this_tracer))
return; return;
ds_suspend_bts(this_tracer);
trace = ds_read_bts(this_tracer); trace = ds_read_bts(this_tracer);
if (!trace) if (!trace)
goto out; return;
for (at = trace->ds.top; (void *)at < trace->ds.end; for (at = trace->ds.top; (void *)at < trace->ds.end;
at += trace->ds.size) at += trace->ds.size)
...@@ -303,18 +246,27 @@ static void trace_bts_cpu(void *arg) ...@@ -303,18 +246,27 @@ static void trace_bts_cpu(void *arg)
for (at = trace->ds.begin; (void *)at < trace->ds.top; for (at = trace->ds.begin; (void *)at < trace->ds.top;
at += trace->ds.size) at += trace->ds.size)
trace_bts_at(trace, at); trace_bts_at(trace, at);
out:
ds_resume_bts(this_tracer);
} }
static void trace_bts_prepare(struct trace_iterator *iter) static void trace_bts_prepare(struct trace_iterator *iter)
{ {
spin_lock(&bts_tracer_lock); int cpu;
get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
/*
* We need to collect the trace on the respective cpu since ftrace
* implicitly adds the record for the current cpu.
* Once that is more flexible, we could collect the data from any cpu.
*/
on_each_cpu(trace_bts_cpu, iter->tr, 1); on_each_cpu(trace_bts_cpu, iter->tr, 1);
spin_unlock(&bts_tracer_lock); for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_resume_bts(per_cpu(tracer, cpu));
put_online_cpus();
} }
static void trace_bts_close(struct trace_iterator *iter) static void trace_bts_close(struct trace_iterator *iter)
...@@ -324,12 +276,11 @@ static void trace_bts_close(struct trace_iterator *iter) ...@@ -324,12 +276,11 @@ static void trace_bts_close(struct trace_iterator *iter)
void trace_hw_branch_oops(void) void trace_hw_branch_oops(void)
{ {
spin_lock(&bts_tracer_lock); if (this_tracer) {
ds_suspend_bts_noirq(this_tracer);
if (trace_hw_branches_enabled)
trace_bts_cpu(hw_branch_trace); trace_bts_cpu(hw_branch_trace);
ds_resume_bts_noirq(this_tracer);
spin_unlock(&bts_tracer_lock); }
} }
struct tracer bts_tracer __read_mostly = struct tracer bts_tracer __read_mostly =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment