Commit 430ad5a6 authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Frederic Weisbecker

perf: Factorize trace events raw sample buffer operations

Introduce ftrace_perf_buf_prepare() and ftrace_perf_buf_submit() to
gather the common code that operates on raw events sampling buffer.
This cleans up redundant code between regular trace events, syscall
events and kprobe events.

Changelog v1->v2:
- Rename function name as per Masami and Frederic's suggestion
- Add __kprobes for ftrace_perf_buf_prepare() and make
  ftrace_perf_buf_submit() inline as per Masami's suggestion
- Export ftrace_perf_buf_prepare since modules will use it
Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Acked-by: default avatarMasami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B60E92D.9000808@cn.fujitsu.com>
Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
parent 339ce1a4
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <linux/trace_seq.h> #include <linux/trace_seq.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/perf_event.h>
struct trace_array; struct trace_array;
struct tracer; struct tracer;
...@@ -138,9 +139,6 @@ struct ftrace_event_call { ...@@ -138,9 +139,6 @@ struct ftrace_event_call {
#define FTRACE_MAX_PROFILE_SIZE 2048 #define FTRACE_MAX_PROFILE_SIZE 2048
extern char *perf_trace_buf;
extern char *perf_trace_buf_nmi;
#define MAX_FILTER_PRED 32 #define MAX_FILTER_PRED 32
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
...@@ -195,6 +193,20 @@ extern void ftrace_profile_disable(int event_id); ...@@ -195,6 +193,20 @@ extern void ftrace_profile_disable(int event_id);
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str); char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event); extern void ftrace_profile_free_filter(struct perf_event *event);
extern void *
ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
unsigned long *irq_flags);
static inline void
ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
u64 count, unsigned long irq_flags)
{
struct trace_entry *entry = raw_data;
perf_tp_event(entry->type, addr, count, raw_data, size);
perf_swevent_put_recursion_context(rctx);
local_irq_restore(irq_flags);
}
#endif #endif
#endif /* _LINUX_FTRACE_EVENT_H */ #endif /* _LINUX_FTRACE_EVENT_H */
...@@ -850,22 +850,12 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ ...@@ -850,22 +850,12 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \
proto) \ proto) \
{ \ { \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
extern int perf_swevent_get_recursion_context(void); \
extern void perf_swevent_put_recursion_context(int rctx); \
extern void perf_tp_event(int, u64, u64, void *, int); \
struct ftrace_raw_##call *entry; \ struct ftrace_raw_##call *entry; \
u64 __addr = 0, __count = 1; \ u64 __addr = 0, __count = 1; \
unsigned long irq_flags; \ unsigned long irq_flags; \
struct trace_entry *ent; \
int __entry_size; \ int __entry_size; \
int __data_size; \ int __data_size; \
char *trace_buf; \
char *raw_data; \
int __cpu; \
int rctx; \ int rctx; \
int pc; \
\
pc = preempt_count(); \
\ \
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
...@@ -875,42 +865,16 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ ...@@ -875,42 +865,16 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \
if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \
"profile buffer not large enough")) \ "profile buffer not large enough")) \
return; \ return; \
\ entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare( \
local_irq_save(irq_flags); \ __entry_size, event_call->id, &rctx, &irq_flags); \
\ if (!entry) \
rctx = perf_swevent_get_recursion_context(); \ return; \
if (rctx < 0) \
goto end_recursion; \
\
__cpu = smp_processor_id(); \
\
if (in_nmi()) \
trace_buf = rcu_dereference(perf_trace_buf_nmi); \
else \
trace_buf = rcu_dereference(perf_trace_buf); \
\
if (!trace_buf) \
goto end; \
\
raw_data = per_cpu_ptr(trace_buf, __cpu); \
\
*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
entry = (struct ftrace_raw_##call *)raw_data; \
ent = &entry->ent; \
tracing_generic_entry_update(ent, irq_flags, pc); \
ent->type = event_call->id; \
\
tstruct \ tstruct \
\ \
{ assign; } \ { assign; } \
\ \
perf_tp_event(event_call->id, __addr, __count, entry, \ ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr, \
__entry_size); \ __count, irq_flags); \
\
end: \
perf_swevent_put_recursion_context(rctx); \
end_recursion: \
local_irq_restore(irq_flags); \
} }
#undef DEFINE_EVENT #undef DEFINE_EVENT
......
...@@ -6,14 +6,12 @@ ...@@ -6,14 +6,12 @@
*/ */
#include <linux/module.h> #include <linux/module.h>
#include <linux/kprobes.h>
#include "trace.h" #include "trace.h"
char *perf_trace_buf; static char *perf_trace_buf;
EXPORT_SYMBOL_GPL(perf_trace_buf); static char *perf_trace_buf_nmi;
char *perf_trace_buf_nmi;
EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
...@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id) ...@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id)
} }
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
} }
__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
int *rctxp, unsigned long *irq_flags)
{
struct trace_entry *entry;
char *trace_buf, *raw_data;
int pc, cpu;
pc = preempt_count();
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(*irq_flags);
*rctxp = perf_swevent_get_recursion_context();
if (*rctxp < 0)
goto err_recursion;
cpu = smp_processor_id();
if (in_nmi())
trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto err;
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct trace_entry *)raw_data;
tracing_generic_entry_update(entry, *irq_flags, pc);
entry->type = type;
return raw_data;
err:
perf_swevent_put_recursion_context(*rctxp);
err_recursion:
local_irq_restore(*irq_flags);
return NULL;
}
EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
...@@ -1243,14 +1243,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, ...@@ -1243,14 +1243,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct ftrace_event_call *call = &tp->call; struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry *entry; struct kprobe_trace_entry *entry;
struct trace_entry *ent; int size, __size, i;
int size, __size, i, pc, __cpu;
unsigned long irq_flags; unsigned long irq_flags;
char *trace_buf;
char *raw_data;
int rctx; int rctx;
pc = preempt_count();
__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64)); size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32); size -= sizeof(u32);
...@@ -1258,45 +1254,16 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, ...@@ -1258,45 +1254,16 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
"profile buffer not large enough")) "profile buffer not large enough"))
return 0; return 0;
/* entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
* Protect the non nmi buffer if (!entry)
* This also protects the rcu read side return 0;
*/
local_irq_save(irq_flags);
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
__cpu = smp_processor_id();
if (in_nmi())
trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, __cpu);
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct kprobe_trace_entry *)raw_data;
ent = &entry->ent;
tracing_generic_entry_update(ent, irq_flags, pc);
ent->type = call->id;
entry->nargs = tp->nr_args; entry->nargs = tp->nr_args;
entry->ip = (unsigned long)kp->addr; entry->ip = (unsigned long)kp->addr;
for (i = 0; i < tp->nr_args; i++) for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs); entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ip, 1, entry, size);
end: ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(irq_flags);
return 0; return 0;
} }
...@@ -1308,14 +1275,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, ...@@ -1308,14 +1275,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct ftrace_event_call *call = &tp->call; struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry *entry; struct kretprobe_trace_entry *entry;
struct trace_entry *ent; int size, __size, i;
int size, __size, i, pc, __cpu;
unsigned long irq_flags; unsigned long irq_flags;
char *trace_buf;
char *raw_data;
int rctx; int rctx;
pc = preempt_count();
__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64)); size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32); size -= sizeof(u32);
...@@ -1323,46 +1286,17 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, ...@@ -1323,46 +1286,17 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
"profile buffer not large enough")) "profile buffer not large enough"))
return 0; return 0;
/* entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
* Protect the non nmi buffer if (!entry)
* This also protects the rcu read side return 0;
*/
local_irq_save(irq_flags);
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
__cpu = smp_processor_id();
if (in_nmi())
trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, __cpu);
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct kretprobe_trace_entry *)raw_data;
ent = &entry->ent;
tracing_generic_entry_update(ent, irq_flags, pc);
ent->type = call->id;
entry->nargs = tp->nr_args; entry->nargs = tp->nr_args;
entry->func = (unsigned long)tp->rp.kp.addr; entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr; entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++) for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs); entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
end: ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(irq_flags);
return 0; return 0;
} }
......
...@@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) ...@@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
struct syscall_metadata *sys_data; struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec; struct syscall_trace_enter *rec;
unsigned long flags; unsigned long flags;
char *trace_buf;
char *raw_data;
int syscall_nr; int syscall_nr;
int rctx; int rctx;
int size; int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs); syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
...@@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) ...@@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
"profile buffer not large enough")) "profile buffer not large enough"))
return; return;
/* Protect the per cpu buffer, begin the rcu read side */ rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
local_irq_save(flags); sys_data->enter_event->id, &rctx, &flags);
if (!rec)
rctx = perf_swevent_get_recursion_context(); return;
if (rctx < 0)
goto end_recursion;
cpu = smp_processor_id();
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_enter *) raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->enter_event->id;
rec->nr = syscall_nr; rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args, syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args); (unsigned long *)&rec->args);
perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(flags);
} }
int prof_sysenter_enable(struct ftrace_event_call *call) int prof_sysenter_enable(struct ftrace_event_call *call)
...@@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) ...@@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
struct syscall_trace_exit *rec; struct syscall_trace_exit *rec;
unsigned long flags; unsigned long flags;
int syscall_nr; int syscall_nr;
char *trace_buf;
char *raw_data;
int rctx; int rctx;
int size; int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs); syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
...@@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) ...@@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
"exit event has grown above profile buffer size")) "exit event has grown above profile buffer size"))
return; return;
/* Protect the per cpu buffer, begin the rcu read side */ rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
local_irq_save(flags); sys_data->exit_event->id, &rctx, &flags);
if (!rec)
rctx = perf_swevent_get_recursion_context(); return;
if (rctx < 0)
goto end_recursion;
cpu = smp_processor_id();
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_exit *)raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->exit_event->id;
rec->nr = syscall_nr; rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs); rec->ret = syscall_get_return_value(current, regs);
perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(flags);
} }
int prof_sysexit_enable(struct ftrace_event_call *call) int prof_sysexit_enable(struct ftrace_event_call *call)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment