Commit be4bdbfb authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'tracing/core-v3' of...

Merge branch 'tracing/core-v3' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing into tracing/urgent
parents fc537766 20ab4425
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/ring_buffer.h> #include <linux/ring_buffer.h>
#include <linux/trace_seq.h> #include <linux/trace_seq.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/hardirq.h>
struct trace_array; struct trace_array;
struct tracer; struct tracer;
...@@ -130,10 +131,15 @@ struct ftrace_event_call { ...@@ -130,10 +131,15 @@ struct ftrace_event_call {
void *data; void *data;
atomic_t profile_count; atomic_t profile_count;
int (*profile_enable)(struct ftrace_event_call *); int (*profile_enable)(void);
void (*profile_disable)(struct ftrace_event_call *); void (*profile_disable)(void);
}; };
#define FTRACE_MAX_PROFILE_SIZE 2048
extern char *trace_profile_buf;
extern char *trace_profile_buf_nmi;
#define MAX_FILTER_PRED 32 #define MAX_FILTER_PRED 32
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
......
...@@ -100,32 +100,24 @@ struct perf_counter_attr; ...@@ -100,32 +100,24 @@ struct perf_counter_attr;
#ifdef CONFIG_EVENT_PROFILE #ifdef CONFIG_EVENT_PROFILE
#define TRACE_SYS_ENTER_PROFILE(sname) \ #define TRACE_SYS_ENTER_PROFILE(sname) \
static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call) \ static int prof_sysenter_enable_##sname(void) \
{ \ { \
int ret = 0; \ return reg_prof_syscall_enter("sys"#sname); \
if (!atomic_inc_return(&event_enter_##sname.profile_count)) \
ret = reg_prof_syscall_enter("sys"#sname); \
return ret; \
} \ } \
\ \
static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\ static void prof_sysenter_disable_##sname(void) \
{ \ { \
if (atomic_add_negative(-1, &event_enter_##sname.profile_count)) \
unreg_prof_syscall_enter("sys"#sname); \ unreg_prof_syscall_enter("sys"#sname); \
} }
#define TRACE_SYS_EXIT_PROFILE(sname) \ #define TRACE_SYS_EXIT_PROFILE(sname) \
static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call) \ static int prof_sysexit_enable_##sname(void) \
{ \ { \
int ret = 0; \ return reg_prof_syscall_exit("sys"#sname); \
if (!atomic_inc_return(&event_exit_##sname.profile_count)) \
ret = reg_prof_syscall_exit("sys"#sname); \
return ret; \
} \ } \
\ \
static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ static void prof_sysexit_disable_##sname(void) \
{ \ { \
if (atomic_add_negative(-1, &event_exit_##sname.profile_count)) \
unreg_prof_syscall_exit("sys"#sname); \ unreg_prof_syscall_exit("sys"#sname); \
} }
......
...@@ -382,19 +382,13 @@ static inline int ftrace_get_offsets_##call( \ ...@@ -382,19 +382,13 @@ static inline int ftrace_get_offsets_##call( \
* *
* NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
* *
* static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call) * static int ftrace_profile_enable_<call>(void)
* { * {
* int ret = 0; * return register_trace_<call>(ftrace_profile_<call>);
*
* if (!atomic_inc_return(&event_call->profile_count))
* ret = register_trace_<call>(ftrace_profile_<call>);
*
* return ret;
* } * }
* *
* static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call) * static void ftrace_profile_disable_<call>(void)
* { * {
* if (atomic_add_negative(-1, &event->call->profile_count))
* unregister_trace_<call>(ftrace_profile_<call>); * unregister_trace_<call>(ftrace_profile_<call>);
* } * }
* *
...@@ -405,19 +399,13 @@ static inline int ftrace_get_offsets_##call( \ ...@@ -405,19 +399,13 @@ static inline int ftrace_get_offsets_##call( \
\ \
static void ftrace_profile_##call(proto); \ static void ftrace_profile_##call(proto); \
\ \
static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ static int ftrace_profile_enable_##call(void) \
{ \ { \
int ret = 0; \ return register_trace_##call(ftrace_profile_##call); \
\
if (!atomic_inc_return(&event_call->profile_count)) \
ret = register_trace_##call(ftrace_profile_##call); \
\
return ret; \
} \ } \
\ \
static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ static void ftrace_profile_disable_##call(void) \
{ \ { \
if (atomic_add_negative(-1, &event_call->profile_count)) \
unregister_trace_##call(ftrace_profile_##call); \ unregister_trace_##call(ftrace_profile_##call); \
} }
...@@ -660,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ ...@@ -660,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
* struct ftrace_raw_##call *entry; * struct ftrace_raw_##call *entry;
* u64 __addr = 0, __count = 1; * u64 __addr = 0, __count = 1;
* unsigned long irq_flags; * unsigned long irq_flags;
* struct trace_entry *ent;
* int __entry_size; * int __entry_size;
* int __data_size; * int __data_size;
* int __cpu
* int pc; * int pc;
* *
* local_save_flags(irq_flags);
* pc = preempt_count(); * pc = preempt_count();
* *
* __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
...@@ -675,12 +664,22 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ ...@@ -675,12 +664,22 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
* sizeof(u64)); * sizeof(u64));
* __entry_size -= sizeof(u32); * __entry_size -= sizeof(u32);
* *
* do { * // Protect the non nmi buffer
* char raw_data[__entry_size]; <- allocate our sample in the stack * // This also protects the rcu read side
* struct trace_entry *ent; * local_irq_save(irq_flags);
* __cpu = smp_processor_id();
*
* if (in_nmi())
* raw_data = rcu_dereference(trace_profile_buf_nmi);
* else
* raw_data = rcu_dereference(trace_profile_buf);
* *
* zero dead bytes from alignment to avoid stack leak to userspace: * if (!raw_data)
* goto end;
* *
* raw_data = per_cpu_ptr(raw_data, __cpu);
*
* //zero dead bytes from alignment to avoid stack leak to userspace:
* *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
* entry = (struct ftrace_raw_<call> *)raw_data; * entry = (struct ftrace_raw_<call> *)raw_data;
* ent = &entry->ent; * ent = &entry->ent;
...@@ -693,7 +692,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ ...@@ -693,7 +692,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
* *
* perf_tpcounter_event(event_call->id, __addr, __count, entry, * perf_tpcounter_event(event_call->id, __addr, __count, entry,
* __entry_size); <- submit them to perf counter * __entry_size); <- submit them to perf counter
* } while (0);
* *
* } * }
*/ */
...@@ -716,11 +714,13 @@ static void ftrace_profile_##call(proto) \ ...@@ -716,11 +714,13 @@ static void ftrace_profile_##call(proto) \
struct ftrace_raw_##call *entry; \ struct ftrace_raw_##call *entry; \
u64 __addr = 0, __count = 1; \ u64 __addr = 0, __count = 1; \
unsigned long irq_flags; \ unsigned long irq_flags; \
struct trace_entry *ent; \
int __entry_size; \ int __entry_size; \
int __data_size; \ int __data_size; \
char *raw_data; \
int __cpu; \
int pc; \ int pc; \
\ \
local_save_flags(irq_flags); \
pc = preempt_count(); \ pc = preempt_count(); \
\ \
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
...@@ -728,9 +728,22 @@ static void ftrace_profile_##call(proto) \ ...@@ -728,9 +728,22 @@ static void ftrace_profile_##call(proto) \
sizeof(u64)); \ sizeof(u64)); \
__entry_size -= sizeof(u32); \ __entry_size -= sizeof(u32); \
\ \
do { \ if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \
char raw_data[__entry_size]; \ "profile buffer not large enough")) \
struct trace_entry *ent; \ return; \
\
local_irq_save(irq_flags); \
__cpu = smp_processor_id(); \
\
if (in_nmi()) \
raw_data = rcu_dereference(trace_profile_buf_nmi); \
else \
raw_data = rcu_dereference(trace_profile_buf); \
\
if (!raw_data) \
goto end; \
\
raw_data = per_cpu_ptr(raw_data, __cpu); \
\ \
*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
entry = (struct ftrace_raw_##call *)raw_data; \ entry = (struct ftrace_raw_##call *)raw_data; \
...@@ -742,9 +755,11 @@ static void ftrace_profile_##call(proto) \ ...@@ -742,9 +755,11 @@ static void ftrace_profile_##call(proto) \
\ \
{ assign; } \ { assign; } \
\ \
perf_tpcounter_event(event_call->id, __addr, __count, entry,\ perf_tpcounter_event(event_call->id, __addr, __count, entry, \
__entry_size); \ __entry_size); \
} while (0); \ \
end: \
local_irq_restore(irq_flags); \
\ \
} }
......
...@@ -8,6 +8,54 @@ ...@@ -8,6 +8,54 @@
#include <linux/module.h> #include <linux/module.h>
#include "trace.h" #include "trace.h"
/*
* We can't use a size but a type in alloc_percpu()
* So let's create a dummy type that matches the desired size
*/
typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
char *trace_profile_buf;
char *trace_profile_buf_nmi;
/* Count the events in use (per event id, not per instance) */
static int total_profile_count;
static int ftrace_profile_enable_event(struct ftrace_event_call *event)
{
char *buf;
int ret = -ENOMEM;
if (atomic_inc_return(&event->profile_count))
return 0;
if (!total_profile_count++) {
buf = (char *)alloc_percpu(profile_buf_t);
if (!buf)
goto fail_buf;
rcu_assign_pointer(trace_profile_buf, buf);
buf = (char *)alloc_percpu(profile_buf_t);
if (!buf)
goto fail_buf_nmi;
rcu_assign_pointer(trace_profile_buf_nmi, buf);
}
ret = event->profile_enable();
if (!ret)
return 0;
kfree(trace_profile_buf_nmi);
fail_buf_nmi:
kfree(trace_profile_buf);
fail_buf:
total_profile_count--;
atomic_dec(&event->profile_count);
return ret;
}
int ftrace_profile_enable(int event_id) int ftrace_profile_enable(int event_id)
{ {
struct ftrace_event_call *event; struct ftrace_event_call *event;
...@@ -17,7 +65,7 @@ int ftrace_profile_enable(int event_id) ...@@ -17,7 +65,7 @@ int ftrace_profile_enable(int event_id)
list_for_each_entry(event, &ftrace_events, list) { list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id && event->profile_enable && if (event->id == event_id && event->profile_enable &&
try_module_get(event->mod)) { try_module_get(event->mod)) {
ret = event->profile_enable(event); ret = ftrace_profile_enable_event(event);
break; break;
} }
} }
...@@ -26,6 +74,33 @@ int ftrace_profile_enable(int event_id) ...@@ -26,6 +74,33 @@ int ftrace_profile_enable(int event_id)
return ret; return ret;
} }
static void ftrace_profile_disable_event(struct ftrace_event_call *event)
{
char *buf, *nmi_buf;
if (!atomic_add_negative(-1, &event->profile_count))
return;
event->profile_disable();
if (!--total_profile_count) {
buf = trace_profile_buf;
rcu_assign_pointer(trace_profile_buf, NULL);
nmi_buf = trace_profile_buf_nmi;
rcu_assign_pointer(trace_profile_buf_nmi, NULL);
/*
* Ensure every events in profiling have finished before
* releasing the buffers
*/
synchronize_sched();
free_percpu(buf);
free_percpu(nmi_buf);
}
}
void ftrace_profile_disable(int event_id) void ftrace_profile_disable(int event_id)
{ {
struct ftrace_event_call *event; struct ftrace_event_call *event;
...@@ -33,7 +108,7 @@ void ftrace_profile_disable(int event_id) ...@@ -33,7 +108,7 @@ void ftrace_profile_disable(int event_id)
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) { list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) { if (event->id == event_id) {
event->profile_disable(event); ftrace_profile_disable_event(event);
module_put(event->mod); module_put(event->mod);
break; break;
} }
......
...@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit; ...@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit;
static void prof_syscall_enter(struct pt_regs *regs, long id) static void prof_syscall_enter(struct pt_regs *regs, long id)
{ {
struct syscall_trace_enter *rec;
struct syscall_metadata *sys_data; struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
unsigned long flags;
char *raw_data;
int syscall_nr; int syscall_nr;
int size; int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs); syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
...@@ -402,8 +405,24 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) ...@@ -402,8 +405,24 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
size = ALIGN(size + sizeof(u32), sizeof(u64)); size = ALIGN(size + sizeof(u32), sizeof(u64));
size -= sizeof(u32); size -= sizeof(u32);
do { if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
char raw_data[size]; "profile buffer not large enough"))
return;
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
cpu = smp_processor_id();
if (in_nmi())
raw_data = rcu_dereference(trace_profile_buf_nmi);
else
raw_data = rcu_dereference(trace_profile_buf);
if (!raw_data)
goto end;
raw_data = per_cpu_ptr(raw_data, cpu);
/* zero the dead bytes from align to not leak stack to user */ /* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
...@@ -415,7 +434,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) ...@@ -415,7 +434,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
syscall_get_arguments(current, regs, 0, sys_data->nb_args, syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args); (unsigned long *)&rec->args);
perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
} while(0);
end:
local_irq_restore(flags);
} }
int reg_prof_syscall_enter(char *name) int reg_prof_syscall_enter(char *name)
...@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name) ...@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name)
static void prof_syscall_exit(struct pt_regs *regs, long ret) static void prof_syscall_exit(struct pt_regs *regs, long ret)
{ {
struct syscall_metadata *sys_data; struct syscall_metadata *sys_data;
struct syscall_trace_exit rec; struct syscall_trace_exit *rec;
unsigned long flags;
int syscall_nr; int syscall_nr;
char *raw_data;
int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs); syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
...@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) ...@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
if (!sys_data) if (!sys_data)
return; return;
tracing_generic_entry_update(&rec.ent, 0, 0); /* We can probably do that at build time */
rec.ent.type = sys_data->exit_id; size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
rec.nr = syscall_nr; size -= sizeof(u32);
rec.ret = syscall_get_return_value(current, regs);
/*
* Impossible, but be paranoid with the future
* How to put this check outside runtime?
*/
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"exit event has grown above profile buffer size"))
return;
perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); /* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
cpu = smp_processor_id();
if (in_nmi())
raw_data = rcu_dereference(trace_profile_buf_nmi);
else
raw_data = rcu_dereference(trace_profile_buf);
if (!raw_data)
goto end;
raw_data = per_cpu_ptr(raw_data, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_exit *)raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->exit_id;
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size);
end:
local_irq_restore(flags);
} }
int reg_prof_syscall_exit(char *name) int reg_prof_syscall_exit(char *name)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment