Commit e6dab5ff authored by Andrew Vagin's avatar Andrew Vagin Committed by Ingo Molnar

perf/trace: Add ability to set a target task for events

A few events are interesting not only for a current task.
For example, sched_stat_* events are interesting for a task
which wakes up. For this reason, it will be good if such
events will be delivered to a target task too.

Now a target task can be set by using __perf_task().

The original idea and a draft patch belongs to Peter Zijlstra.

I need these events for profiling sleep times. sched_switch is used for
getting callchains and sched_stat_* is used for getting time periods.
These events are combined in user space, then it can be analyzed by
perf tools.
Inspired-by: default avatarPeter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arun Sharma <asharma@fb.com>
Signed-off-by: default avatarAndrew Vagin <avagin@openvz.org>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1342016098-213063-1-git-send-email-avagin@openvz.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent d07bdfd3
...@@ -306,9 +306,10 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type, ...@@ -306,9 +306,10 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type,
static inline void static inline void
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
u64 count, struct pt_regs *regs, void *head) u64 count, struct pt_regs *regs, void *head,
struct task_struct *task)
{ {
perf_tp_event(addr, count, raw_data, size, regs, head, rctx); perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task);
} }
#endif #endif
......
...@@ -1272,7 +1272,8 @@ static inline bool perf_paranoid_kernel(void) ...@@ -1272,7 +1272,8 @@ static inline bool perf_paranoid_kernel(void)
extern void perf_event_init(void); extern void perf_event_init(void);
extern void perf_tp_event(u64 addr, u64 count, void *record, extern void perf_tp_event(u64 addr, u64 count, void *record,
int entry_size, struct pt_regs *regs, int entry_size, struct pt_regs *regs,
struct hlist_head *head, int rctx); struct hlist_head *head, int rctx,
struct task_struct *task);
extern void perf_bp_event(struct perf_event *event, void *data); extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags #ifndef perf_misc_flags
......
...@@ -73,6 +73,9 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, ...@@ -73,6 +73,9 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
__entry->prio = p->prio; __entry->prio = p->prio;
__entry->success = success; __entry->success = success;
__entry->target_cpu = task_cpu(p); __entry->target_cpu = task_cpu(p);
)
TP_perf_assign(
__perf_task(p);
), ),
TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
...@@ -325,6 +328,7 @@ DECLARE_EVENT_CLASS(sched_stat_template, ...@@ -325,6 +328,7 @@ DECLARE_EVENT_CLASS(sched_stat_template,
) )
TP_perf_assign( TP_perf_assign(
__perf_count(delay); __perf_count(delay);
__perf_task(tsk);
), ),
TP_printk("comm=%s pid=%d delay=%Lu [ns]", TP_printk("comm=%s pid=%d delay=%Lu [ns]",
......
...@@ -712,6 +712,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call ...@@ -712,6 +712,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
#undef __perf_count #undef __perf_count
#define __perf_count(c) __count = (c) #define __perf_count(c) __count = (c)
#undef __perf_task
#define __perf_task(t) __task = (t)
#undef TP_perf_assign #undef TP_perf_assign
#define TP_perf_assign(args...) args #define TP_perf_assign(args...) args
...@@ -725,6 +728,7 @@ perf_trace_##call(void *__data, proto) \ ...@@ -725,6 +728,7 @@ perf_trace_##call(void *__data, proto) \
struct ftrace_raw_##call *entry; \ struct ftrace_raw_##call *entry; \
struct pt_regs __regs; \ struct pt_regs __regs; \
u64 __addr = 0, __count = 1; \ u64 __addr = 0, __count = 1; \
struct task_struct *__task = NULL; \
struct hlist_head *head; \ struct hlist_head *head; \
int __entry_size; \ int __entry_size; \
int __data_size; \ int __data_size; \
...@@ -752,7 +756,7 @@ perf_trace_##call(void *__data, proto) \ ...@@ -752,7 +756,7 @@ perf_trace_##call(void *__data, proto) \
\ \
head = this_cpu_ptr(event_call->perf_events); \ head = this_cpu_ptr(event_call->perf_events); \
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
__count, &__regs, head); \ __count, &__regs, head, __task); \
} }
/* /*
......
...@@ -153,7 +153,8 @@ put_callchain_entry(int rctx) ...@@ -153,7 +153,8 @@ put_callchain_entry(int rctx)
put_recursion_context(__get_cpu_var(callchain_recursion), rctx); put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
} }
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs)
{ {
int rctx; int rctx;
struct perf_callchain_entry *entry; struct perf_callchain_entry *entry;
...@@ -178,6 +179,12 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) ...@@ -178,6 +179,12 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
} }
if (regs) { if (regs) {
/*
* Disallow cross-task user callchains.
*/
if (event->ctx->task && event->ctx->task != current)
goto exit_put;
perf_callchain_store(entry, PERF_CONTEXT_USER); perf_callchain_store(entry, PERF_CONTEXT_USER);
perf_callchain_user(entry, regs); perf_callchain_user(entry, regs);
} }
......
...@@ -4039,7 +4039,7 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -4039,7 +4039,7 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_CALLCHAIN) { if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1; int size = 1;
data->callchain = perf_callchain(regs); data->callchain = perf_callchain(event, regs);
if (data->callchain) if (data->callchain)
size += data->callchain->nr; size += data->callchain->nr;
...@@ -5209,7 +5209,8 @@ static int perf_tp_event_match(struct perf_event *event, ...@@ -5209,7 +5209,8 @@ static int perf_tp_event_match(struct perf_event *event,
} }
void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
struct pt_regs *regs, struct hlist_head *head, int rctx) struct pt_regs *regs, struct hlist_head *head, int rctx,
struct task_struct *task)
{ {
struct perf_sample_data data; struct perf_sample_data data;
struct perf_event *event; struct perf_event *event;
...@@ -5228,6 +5229,31 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, ...@@ -5228,6 +5229,31 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
perf_swevent_event(event, count, &data, regs); perf_swevent_event(event, count, &data, regs);
} }
/*
* If we got specified a target task, also iterate its context and
* deliver this event there too.
*/
if (task && task != current) {
struct perf_event_context *ctx;
struct trace_entry *entry = record;
rcu_read_lock();
ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
if (!ctx)
goto unlock;
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (event->attr.type != PERF_TYPE_TRACEPOINT)
continue;
if (event->attr.config != entry->type)
continue;
if (perf_tp_event_match(event, &data, regs))
perf_swevent_event(event, count, &data, regs);
}
unlock:
rcu_read_unlock();
}
perf_swevent_put_recursion_context(rctx); perf_swevent_put_recursion_context(rctx);
} }
EXPORT_SYMBOL_GPL(perf_tp_event); EXPORT_SYMBOL_GPL(perf_tp_event);
......
...@@ -101,7 +101,8 @@ __output_copy(struct perf_output_handle *handle, ...@@ -101,7 +101,8 @@ __output_copy(struct perf_output_handle *handle,
} }
/* Callchain handling */ /* Callchain handling */
extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs);
extern int get_callchain_buffers(void); extern int get_callchain_buffers(void);
extern void put_callchain_buffers(void); extern void put_callchain_buffers(void);
......
...@@ -281,7 +281,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) ...@@ -281,7 +281,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
head = this_cpu_ptr(event_function.perf_events); head = this_cpu_ptr(event_function.perf_events);
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
1, &regs, head); 1, &regs, head, NULL);
#undef ENTRY_SIZE #undef ENTRY_SIZE
} }
......
...@@ -1002,7 +1002,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, ...@@ -1002,7 +1002,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
head = this_cpu_ptr(call->perf_events); head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); perf_trace_buf_submit(entry, size, rctx,
entry->ip, 1, regs, head, NULL);
} }
/* Kretprobe profile handler */ /* Kretprobe profile handler */
...@@ -1033,7 +1034,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, ...@@ -1033,7 +1034,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
head = this_cpu_ptr(call->perf_events); head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); perf_trace_buf_submit(entry, size, rctx,
entry->ret_ip, 1, regs, head, NULL);
} }
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_PERF_EVENTS */
......
...@@ -532,7 +532,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) ...@@ -532,7 +532,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
(unsigned long *)&rec->args); (unsigned long *)&rec->args);
head = this_cpu_ptr(sys_data->enter_event->perf_events); head = this_cpu_ptr(sys_data->enter_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
} }
int perf_sysenter_enable(struct ftrace_event_call *call) int perf_sysenter_enable(struct ftrace_event_call *call)
...@@ -608,7 +608,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) ...@@ -608,7 +608,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
rec->ret = syscall_get_return_value(current, regs); rec->ret = syscall_get_return_value(current, regs);
head = this_cpu_ptr(sys_data->exit_event->perf_events); head = this_cpu_ptr(sys_data->exit_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
} }
int perf_sysexit_enable(struct ftrace_event_call *call) int perf_sysexit_enable(struct ftrace_event_call *call)
......
...@@ -670,7 +670,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) ...@@ -670,7 +670,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
head = this_cpu_ptr(call->perf_events); head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
out: out:
preempt_enable(); preempt_enable();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment