Commit 83b84503 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'tip/perf/core' of...

Merge branch 'tip/perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace into perf/core
parents 4ff16c25 5500fa51
......@@ -31,16 +31,33 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
/*
* FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
* set in the flags member.
*
* ENABLED - set/unset when ftrace_ops is registered/unregistered
* GLOBAL - set manualy by ftrace_ops user to denote the ftrace_ops
* is part of the global tracers sharing the same filter
* via set_ftrace_* debugfs files.
* DYNAMIC - set when ftrace_ops is registered to denote dynamically
* allocated ftrace_ops which need special care
* CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops
* could be controled by following calls:
* ftrace_function_local_enable
* ftrace_function_local_disable
*/
enum {
FTRACE_OPS_FL_ENABLED = 1 << 0,
FTRACE_OPS_FL_GLOBAL = 1 << 1,
FTRACE_OPS_FL_DYNAMIC = 1 << 2,
FTRACE_OPS_FL_CONTROL = 1 << 3,
};
struct ftrace_ops {
ftrace_func_t func;
struct ftrace_ops *next;
unsigned long flags;
int __percpu *disabled;
#ifdef CONFIG_DYNAMIC_FTRACE
struct ftrace_hash *notrace_hash;
struct ftrace_hash *filter_hash;
......@@ -97,6 +114,55 @@ int register_ftrace_function(struct ftrace_ops *ops);
int unregister_ftrace_function(struct ftrace_ops *ops);
void clear_ftrace_function(void);
/**
* ftrace_function_local_enable - enable controlled ftrace_ops on current cpu
*
* This function enables tracing on current cpu by decreasing
* the per cpu control variable.
* It must be called with preemption disabled and only on ftrace_ops
* registered with FTRACE_OPS_FL_CONTROL. If called without preemption
* disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
*/
static inline void ftrace_function_local_enable(struct ftrace_ops *ops)
{
if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
return;
(*this_cpu_ptr(ops->disabled))--;
}
/**
* ftrace_function_local_disable - enable controlled ftrace_ops on current cpu
*
* This function enables tracing on current cpu by decreasing
* the per cpu control variable.
* It must be called with preemption disabled and only on ftrace_ops
* registered with FTRACE_OPS_FL_CONTROL. If called without preemption
* disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
*/
static inline void ftrace_function_local_disable(struct ftrace_ops *ops)
{
if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
return;
(*this_cpu_ptr(ops->disabled))++;
}
/**
* ftrace_function_local_disabled - returns ftrace_ops disabled value
* on current cpu
*
* This function returns value of ftrace_ops::disabled on current cpu.
* It must be called with preemption disabled and only on ftrace_ops
* registered with FTRACE_OPS_FL_CONTROL. If called without preemption
* disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
*/
static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
{
WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL));
return *this_cpu_ptr(ops->disabled);
}
extern void ftrace_stub(unsigned long a0, unsigned long a1);
#else /* !CONFIG_FUNCTION_TRACER */
......@@ -184,6 +250,7 @@ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset);
void ftrace_set_global_filter(unsigned char *buf, int len, int reset);
void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
void ftrace_free_filter(struct ftrace_ops *ops);
int register_ftrace_command(struct ftrace_func_command *cmd);
int unregister_ftrace_command(struct ftrace_func_command *cmd);
......@@ -314,9 +381,6 @@ extern void ftrace_enable_daemon(void);
#else
static inline int skip_trace(unsigned long ip) { return 0; }
static inline int ftrace_force_update(void) { return 0; }
static inline void ftrace_set_filter(unsigned char *buf, int len, int reset)
{
}
static inline void ftrace_disable_daemon(void) { }
static inline void ftrace_enable_daemon(void) { }
static inline void ftrace_release_mod(struct module *mod) {}
......@@ -340,6 +404,9 @@ static inline int ftrace_text_reserved(void *start, void *end)
*/
#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
#define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; })
#define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; })
#define ftrace_free_filter(ops) do { } while (0)
static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos) { return -ENODEV; }
......
......@@ -146,6 +146,10 @@ enum trace_reg {
TRACE_REG_UNREGISTER,
TRACE_REG_PERF_REGISTER,
TRACE_REG_PERF_UNREGISTER,
TRACE_REG_PERF_OPEN,
TRACE_REG_PERF_CLOSE,
TRACE_REG_PERF_ADD,
TRACE_REG_PERF_DEL,
};
struct ftrace_event_call;
......@@ -157,7 +161,7 @@ struct ftrace_event_class {
void *perf_probe;
#endif
int (*reg)(struct ftrace_event_call *event,
enum trace_reg type);
enum trace_reg type, void *data);
int (*define_fields)(struct ftrace_event_call *);
struct list_head *(*get_fields)(struct ftrace_event_call *);
struct list_head fields;
......@@ -165,7 +169,7 @@ struct ftrace_event_class {
};
extern int ftrace_event_reg(struct ftrace_event_call *event,
enum trace_reg type);
enum trace_reg type, void *data);
enum {
TRACE_EVENT_FL_ENABLED_BIT,
......@@ -241,6 +245,7 @@ enum {
FILTER_STATIC_STRING,
FILTER_DYN_STRING,
FILTER_PTR_STRING,
FILTER_TRACE_FN,
};
#define EVENT_STORAGE_SIZE 128
......
......@@ -859,6 +859,9 @@ struct perf_event {
#ifdef CONFIG_EVENT_TRACING
struct ftrace_event_call *tp_event;
struct event_filter *filter;
#ifdef CONFIG_FUNCTION_TRACER
struct ftrace_ops ftrace_ops;
#endif
#endif
#ifdef CONFIG_CGROUP_PERF
......
......@@ -62,6 +62,8 @@
#define FTRACE_HASH_DEFAULT_BITS 10
#define FTRACE_HASH_MAX_BITS 12
#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
/* ftrace_enabled is a method to turn ftrace on or off */
int ftrace_enabled __read_mostly;
static int last_ftrace_enabled;
......@@ -89,12 +91,14 @@ static struct ftrace_ops ftrace_list_end __read_mostly = {
};
static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
static struct ftrace_ops global_ops;
static struct ftrace_ops control_ops;
static void
ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
......@@ -168,6 +172,32 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
}
#endif
static void control_ops_disable_all(struct ftrace_ops *ops)
{
int cpu;
for_each_possible_cpu(cpu)
*per_cpu_ptr(ops->disabled, cpu) = 1;
}
static int control_ops_alloc(struct ftrace_ops *ops)
{
int __percpu *disabled;
disabled = alloc_percpu(int);
if (!disabled)
return -ENOMEM;
ops->disabled = disabled;
control_ops_disable_all(ops);
return 0;
}
static void control_ops_free(struct ftrace_ops *ops)
{
free_percpu(ops->disabled);
}
static void update_global_ops(void)
{
ftrace_func_t func;
......@@ -259,6 +289,26 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
return 0;
}
static void add_ftrace_list_ops(struct ftrace_ops **list,
struct ftrace_ops *main_ops,
struct ftrace_ops *ops)
{
int first = *list == &ftrace_list_end;
add_ftrace_ops(list, ops);
if (first)
add_ftrace_ops(&ftrace_ops_list, main_ops);
}
static int remove_ftrace_list_ops(struct ftrace_ops **list,
struct ftrace_ops *main_ops,
struct ftrace_ops *ops)
{
int ret = remove_ftrace_ops(list, ops);
if (!ret && *list == &ftrace_list_end)
ret = remove_ftrace_ops(&ftrace_ops_list, main_ops);
return ret;
}
static int __register_ftrace_function(struct ftrace_ops *ops)
{
if (ftrace_disabled)
......@@ -270,15 +320,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
return -EBUSY;
/* We don't support both control and global flags set. */
if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
return -EINVAL;
if (!core_kernel_data((unsigned long)ops))
ops->flags |= FTRACE_OPS_FL_DYNAMIC;
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
int first = ftrace_global_list == &ftrace_list_end;
add_ftrace_ops(&ftrace_global_list, ops);
add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
ops->flags |= FTRACE_OPS_FL_ENABLED;
if (first)
add_ftrace_ops(&ftrace_ops_list, &global_ops);
} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
if (control_ops_alloc(ops))
return -ENOMEM;
add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
} else
add_ftrace_ops(&ftrace_ops_list, ops);
......@@ -302,11 +357,23 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
return -EINVAL;
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
ret = remove_ftrace_ops(&ftrace_global_list, ops);
if (!ret && ftrace_global_list == &ftrace_list_end)
ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops);
ret = remove_ftrace_list_ops(&ftrace_global_list,
&global_ops, ops);
if (!ret)
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
ret = remove_ftrace_list_ops(&ftrace_control_list,
&control_ops, ops);
if (!ret) {
/*
* The ftrace_ops is now removed from the list,
* so there'll be no new users. We must ensure
* all current users are done before we free
* the control data.
*/
synchronize_sched();
control_ops_free(ops);
}
} else
ret = remove_ftrace_ops(&ftrace_ops_list, ops);
......@@ -1119,6 +1186,12 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
}
void ftrace_free_filter(struct ftrace_ops *ops)
{
free_ftrace_hash(ops->filter_hash);
free_ftrace_hash(ops->notrace_hash);
}
static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
{
struct ftrace_hash *hash;
......@@ -3873,6 +3946,36 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
#endif /* CONFIG_DYNAMIC_FTRACE */
static void
ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
{
struct ftrace_ops *op;
if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT)))
return;
/*
* Some of the ops may be dynamically allocated,
* they must be freed after a synchronize_sched().
*/
preempt_disable_notrace();
trace_recursion_set(TRACE_CONTROL_BIT);
op = rcu_dereference_raw(ftrace_control_list);
while (op != &ftrace_list_end) {
if (!ftrace_function_local_disabled(op) &&
ftrace_ops_test(op, ip))
op->func(ip, parent_ip);
op = rcu_dereference_raw(op->next);
};
trace_recursion_clear(TRACE_CONTROL_BIT);
preempt_enable_notrace();
}
static struct ftrace_ops control_ops = {
.func = ftrace_ops_control_func,
};
static void
ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
{
......
......@@ -56,7 +56,7 @@ enum trace_type {
#define F_STRUCT(args...) args
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
struct struct_name { \
struct trace_entry ent; \
tstruct \
......@@ -66,7 +66,13 @@ enum trace_type {
#define TP_ARGS(args...) args
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter)
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
filter, regfn) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
#include "trace_entries.h"
......@@ -288,6 +294,8 @@ struct tracer {
/* for function tracing recursion */
#define TRACE_INTERNAL_BIT (1<<11)
#define TRACE_GLOBAL_BIT (1<<12)
#define TRACE_CONTROL_BIT (1<<13)
/*
* Abuse of the trace_recursion.
* As we need a way to maintain state if we are tracing the function
......@@ -589,6 +597,8 @@ static inline int ftrace_trace_task(struct task_struct *task)
static inline int ftrace_is_dead(void) { return 0; }
#endif
int ftrace_event_is_function(struct ftrace_event_call *call);
/*
* struct trace_parser - servers for reading the user input separated by spaces
* @cont: set if the input is not complete - no final space char was found
......@@ -766,9 +776,7 @@ struct filter_pred {
u64 val;
struct regex regex;
unsigned short *ops;
#ifdef CONFIG_FTRACE_STARTUP_TEST
struct ftrace_event_field *field;
#endif
int offset;
int not;
int op;
......@@ -818,12 +826,22 @@ extern const char *__start___trace_bprintk_fmt[];
extern const char *__stop___trace_bprintk_fmt[];
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
extern struct ftrace_event_call \
__attribute__((__aligned__(4))) event_##call;
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
#include "trace_entries.h"
#ifdef CONFIG_PERF_EVENTS
#ifdef CONFIG_FUNCTION_TRACER
int perf_ftrace_event_register(struct ftrace_event_call *call,
enum trace_reg type, void *data);
#else
#define perf_ftrace_event_register NULL
#endif /* CONFIG_FUNCTION_TRACER */
#endif /* CONFIG_PERF_EVENTS */
#endif /* _LINUX_KERNEL_TRACE_H */
......@@ -55,7 +55,7 @@
/*
* Function trace entry - function address and parent function address:
*/
FTRACE_ENTRY(function, ftrace_entry,
FTRACE_ENTRY_REG(function, ftrace_entry,
TRACE_FN,
......@@ -64,7 +64,11 @@ FTRACE_ENTRY(function, ftrace_entry,
__field( unsigned long, parent_ip )
),
F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip)
F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip),
FILTER_TRACE_FN,
perf_ftrace_event_register
);
/* Function call entry */
......@@ -78,7 +82,9 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
__field_desc( int, graph_ent, depth )
),
F_printk("--> %lx (%d)", __entry->func, __entry->depth)
F_printk("--> %lx (%d)", __entry->func, __entry->depth),
FILTER_OTHER
);
/* Function return entry */
......@@ -98,7 +104,9 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d",
__entry->func, __entry->depth,
__entry->calltime, __entry->rettime,
__entry->depth)
__entry->depth),
FILTER_OTHER
);
/*
......@@ -127,8 +135,9 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry,
F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
__entry->next_pid, __entry->next_prio, __entry->next_state,
__entry->next_cpu
)
__entry->next_cpu),
FILTER_OTHER
);
/*
......@@ -146,8 +155,9 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
__entry->next_pid, __entry->next_prio, __entry->next_state,
__entry->next_cpu
)
__entry->next_cpu),
FILTER_OTHER
);
/*
......@@ -169,7 +179,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
"\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
__entry->caller[0], __entry->caller[1], __entry->caller[2],
__entry->caller[3], __entry->caller[4], __entry->caller[5],
__entry->caller[6], __entry->caller[7])
__entry->caller[6], __entry->caller[7]),
FILTER_OTHER
);
FTRACE_ENTRY(user_stack, userstack_entry,
......@@ -185,7 +197,9 @@ FTRACE_ENTRY(user_stack, userstack_entry,
"\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
__entry->caller[0], __entry->caller[1], __entry->caller[2],
__entry->caller[3], __entry->caller[4], __entry->caller[5],
__entry->caller[6], __entry->caller[7])
__entry->caller[6], __entry->caller[7]),
FILTER_OTHER
);
/*
......@@ -202,7 +216,9 @@ FTRACE_ENTRY(bprint, bprint_entry,
),
F_printk("%08lx fmt:%p",
__entry->ip, __entry->fmt)
__entry->ip, __entry->fmt),
FILTER_OTHER
);
FTRACE_ENTRY(print, print_entry,
......@@ -215,7 +231,9 @@ FTRACE_ENTRY(print, print_entry,
),
F_printk("%08lx %s",
__entry->ip, __entry->buf)
__entry->ip, __entry->buf),
FILTER_OTHER
);
FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
......@@ -234,7 +252,9 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
F_printk("%lx %lx %lx %d %x %x",
(unsigned long)__entry->phys, __entry->value, __entry->pc,
__entry->map_id, __entry->opcode, __entry->width)
__entry->map_id, __entry->opcode, __entry->width),
FILTER_OTHER
);
FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
......@@ -252,7 +272,9 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
F_printk("%lx %lx %lx %d %x",
(unsigned long)__entry->phys, __entry->virt, __entry->len,
__entry->map_id, __entry->opcode)
__entry->map_id, __entry->opcode),
FILTER_OTHER
);
......@@ -272,6 +294,8 @@ FTRACE_ENTRY(branch, trace_branch,
F_printk("%u:%s:%s (%u)",
__entry->line,
__entry->func, __entry->file, __entry->correct)
__entry->func, __entry->file, __entry->correct),
FILTER_OTHER
);
......@@ -24,6 +24,11 @@ static int total_ref_count;
static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
/* The ftrace function trace is allowed only for root. */
if (ftrace_event_is_function(tp_event) &&
perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
return -EPERM;
/* No tracing, just counting, so no obvious leak */
if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
return 0;
......@@ -44,23 +49,17 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
return 0;
}
static int perf_trace_event_init(struct ftrace_event_call *tp_event,
static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
struct hlist_head __percpu *list;
int ret;
int ret = -ENOMEM;
int cpu;
ret = perf_trace_event_perm(tp_event, p_event);
if (ret)
return ret;
p_event->tp_event = tp_event;
if (tp_event->perf_refcount++ > 0)
return 0;
ret = -ENOMEM;
list = alloc_percpu(struct hlist_head);
if (!list)
goto fail;
......@@ -83,7 +82,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
}
}
ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
if (ret)
goto fail;
......@@ -108,6 +107,69 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
return ret;
}
static void perf_trace_event_unreg(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
int i;
if (--tp_event->perf_refcount > 0)
goto out;
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
/*
* Ensure our callback won't be called anymore. The buffers
* will be freed after that.
*/
tracepoint_synchronize_unregister();
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;
if (!--total_ref_count) {
for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
}
out:
module_put(tp_event->mod);
}
static int perf_trace_event_open(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
}
static void perf_trace_event_close(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
}
static int perf_trace_event_init(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
int ret;
ret = perf_trace_event_perm(tp_event, p_event);
if (ret)
return ret;
ret = perf_trace_event_reg(tp_event, p_event);
if (ret)
return ret;
ret = perf_trace_event_open(p_event);
if (ret) {
perf_trace_event_unreg(p_event);
return ret;
}
return 0;
}
int perf_trace_init(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event;
......@@ -130,6 +192,14 @@ int perf_trace_init(struct perf_event *p_event)
return ret;
}
void perf_trace_destroy(struct perf_event *p_event)
{
mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
mutex_unlock(&event_mutex);
}
int perf_trace_add(struct perf_event *p_event, int flags)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
......@@ -146,43 +216,14 @@ int perf_trace_add(struct perf_event *p_event, int flags)
list = this_cpu_ptr(pcpu_list);
hlist_add_head_rcu(&p_event->hlist_entry, list);
return 0;
return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);
}
void perf_trace_del(struct perf_event *p_event, int flags)
{
hlist_del_rcu(&p_event->hlist_entry);
}
void perf_trace_destroy(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
int i;
mutex_lock(&event_mutex);
if (--tp_event->perf_refcount > 0)
goto out;
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
/*
* Ensure our callback won't be called anymore. The buffers
* will be freed after that.
*/
tracepoint_synchronize_unregister();
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;
if (!--total_ref_count) {
for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
}
out:
module_put(tp_event->mod);
mutex_unlock(&event_mutex);
hlist_del_rcu(&p_event->hlist_entry);
tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
}
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
......@@ -214,3 +255,86 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
return raw_data;
}
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
#ifdef CONFIG_FUNCTION_TRACER
static void
perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
{
struct ftrace_entry *entry;
struct hlist_head *head;
struct pt_regs regs;
int rctx;
#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
sizeof(u64)) - sizeof(u32))
BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
perf_fetch_caller_regs(&regs);
entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
if (!entry)
return;
entry->ip = ip;
entry->parent_ip = parent_ip;
head = this_cpu_ptr(event_function.perf_events);
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
1, &regs, head);
#undef ENTRY_SIZE
}
static int perf_ftrace_function_register(struct perf_event *event)
{
struct ftrace_ops *ops = &event->ftrace_ops;
ops->flags |= FTRACE_OPS_FL_CONTROL;
ops->func = perf_ftrace_function_call;
return register_ftrace_function(ops);
}
static int perf_ftrace_function_unregister(struct perf_event *event)
{
struct ftrace_ops *ops = &event->ftrace_ops;
int ret = unregister_ftrace_function(ops);
ftrace_free_filter(ops);
return ret;
}
static void perf_ftrace_function_enable(struct perf_event *event)
{
ftrace_function_local_enable(&event->ftrace_ops);
}
static void perf_ftrace_function_disable(struct perf_event *event)
{
ftrace_function_local_disable(&event->ftrace_ops);
}
int perf_ftrace_event_register(struct ftrace_event_call *call,
enum trace_reg type, void *data)
{
switch (type) {
case TRACE_REG_REGISTER:
case TRACE_REG_UNREGISTER:
break;
case TRACE_REG_PERF_REGISTER:
case TRACE_REG_PERF_UNREGISTER:
return 0;
case TRACE_REG_PERF_OPEN:
return perf_ftrace_function_register(data);
case TRACE_REG_PERF_CLOSE:
return perf_ftrace_function_unregister(data);
case TRACE_REG_PERF_ADD:
perf_ftrace_function_enable(data);
return 0;
case TRACE_REG_PERF_DEL:
perf_ftrace_function_disable(data);
return 0;
}
return -EINVAL;
}
#endif /* CONFIG_FUNCTION_TRACER */
......@@ -147,7 +147,8 @@ int trace_event_raw_init(struct ftrace_event_call *call)
}
EXPORT_SYMBOL_GPL(trace_event_raw_init);
int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
int ftrace_event_reg(struct ftrace_event_call *call,
enum trace_reg type, void *data)
{
switch (type) {
case TRACE_REG_REGISTER:
......@@ -170,6 +171,11 @@ int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
call->class->perf_probe,
call);
return 0;
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
case TRACE_REG_PERF_DEL:
return 0;
#endif
}
return 0;
......@@ -209,7 +215,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
tracing_stop_cmdline_record();
call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
}
call->class->reg(call, TRACE_REG_UNREGISTER);
call->class->reg(call, TRACE_REG_UNREGISTER, NULL);
}
break;
case 1:
......@@ -218,7 +224,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
tracing_start_cmdline_record();
call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
}
ret = call->class->reg(call, TRACE_REG_REGISTER);
ret = call->class->reg(call, TRACE_REG_REGISTER, NULL);
if (ret) {
tracing_stop_cmdline_record();
pr_info("event trace: Could not enable event "
......
......@@ -81,6 +81,7 @@ enum {
FILT_ERR_TOO_MANY_PREDS,
FILT_ERR_MISSING_FIELD,
FILT_ERR_INVALID_FILTER,
FILT_ERR_IP_FIELD_ONLY,
};
static char *err_text[] = {
......@@ -96,6 +97,7 @@ static char *err_text[] = {
"Too many terms in predicate expression",
"Missing field name and/or value",
"Meaningless filter expression",
"Only 'ip' field is supported for function trace",
};
struct opstack_op {
......@@ -899,6 +901,11 @@ int filter_assign_type(const char *type)
return FILTER_OTHER;
}
static bool is_function_field(struct ftrace_event_field *field)
{
return field->filter_type == FILTER_TRACE_FN;
}
static bool is_string_field(struct ftrace_event_field *field)
{
return field->filter_type == FILTER_DYN_STRING ||
......@@ -986,6 +993,11 @@ static int init_pred(struct filter_parse_state *ps,
fn = filter_pred_strloc;
else
fn = filter_pred_pchar;
} else if (is_function_field(field)) {
if (strcmp(field->name, "ip")) {
parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0);
return -EINVAL;
}
} else {
if (field->is_signed)
ret = strict_strtoll(pred->regex.pattern, 0, &val);
......@@ -1333,10 +1345,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps,
strcpy(pred.regex.pattern, operand2);
pred.regex.len = strlen(pred.regex.pattern);
#ifdef CONFIG_FTRACE_STARTUP_TEST
pred.field = field;
#endif
return init_pred(ps, field, &pred) ? NULL : &pred;
}
......@@ -1949,6 +1958,148 @@ void ftrace_profile_free_filter(struct perf_event *event)
__free_filter(filter);
}
struct function_filter_data {
struct ftrace_ops *ops;
int first_filter;
int first_notrace;
};
#ifdef CONFIG_FUNCTION_TRACER
static char **
ftrace_function_filter_re(char *buf, int len, int *count)
{
char *str, *sep, **re;
str = kstrndup(buf, len, GFP_KERNEL);
if (!str)
return NULL;
/*
* The argv_split function takes white space
* as a separator, so convert ',' into spaces.
*/
while ((sep = strchr(str, ',')))
*sep = ' ';
re = argv_split(GFP_KERNEL, str, count);
kfree(str);
return re;
}
static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter,
int reset, char *re, int len)
{
int ret;
if (filter)
ret = ftrace_set_filter(ops, re, len, reset);
else
ret = ftrace_set_notrace(ops, re, len, reset);
return ret;
}
static int __ftrace_function_set_filter(int filter, char *buf, int len,
struct function_filter_data *data)
{
int i, re_cnt, ret;
int *reset;
char **re;
reset = filter ? &data->first_filter : &data->first_notrace;
/*
* The 'ip' field could have multiple filters set, separated
* either by space or comma. We first cut the filter and apply
* all pieces separatelly.
*/
re = ftrace_function_filter_re(buf, len, &re_cnt);
if (!re)
return -EINVAL;
for (i = 0; i < re_cnt; i++) {
ret = ftrace_function_set_regexp(data->ops, filter, *reset,
re[i], strlen(re[i]));
if (ret)
break;
if (*reset)
*reset = 0;
}
argv_free(re);
return ret;
}
static int ftrace_function_check_pred(struct filter_pred *pred, int leaf)
{
struct ftrace_event_field *field = pred->field;
if (leaf) {
/*
* Check the leaf predicate for function trace, verify:
* - only '==' and '!=' is used
* - the 'ip' field is used
*/
if ((pred->op != OP_EQ) && (pred->op != OP_NE))
return -EINVAL;
if (strcmp(field->name, "ip"))
return -EINVAL;
} else {
/*
* Check the non leaf predicate for function trace, verify:
* - only '||' is used
*/
if (pred->op != OP_OR)
return -EINVAL;
}
return 0;
}
static int ftrace_function_set_filter_cb(enum move_type move,
struct filter_pred *pred,
int *err, void *data)
{
/* Checking the node is valid for function trace. */
if ((move != MOVE_DOWN) ||
(pred->left != FILTER_PRED_INVALID)) {
*err = ftrace_function_check_pred(pred, 0);
} else {
*err = ftrace_function_check_pred(pred, 1);
if (*err)
return WALK_PRED_ABORT;
*err = __ftrace_function_set_filter(pred->op == OP_EQ,
pred->regex.pattern,
pred->regex.len,
data);
}
return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT;
}
static int ftrace_function_set_filter(struct perf_event *event,
struct event_filter *filter)
{
struct function_filter_data data = {
.first_filter = 1,
.first_notrace = 1,
.ops = &event->ftrace_ops,
};
return walk_pred_tree(filter->preds, filter->root,
ftrace_function_set_filter_cb, &data);
}
#else
static int ftrace_function_set_filter(struct perf_event *event,
struct event_filter *filter)
{
return -ENODEV;
}
#endif /* CONFIG_FUNCTION_TRACER */
int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str)
{
......@@ -1969,9 +2120,16 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
goto out_unlock;
err = create_filter(call, filter_str, false, &filter);
if (!err)
event->filter = filter;
if (err)
goto free_filter;
if (ftrace_event_is_function(call))
err = ftrace_function_set_filter(event, filter);
else
event->filter = filter;
free_filter:
if (err || ftrace_event_is_function(call))
__free_filter(filter);
out_unlock:
......
......@@ -18,6 +18,16 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ftrace
/*
* The FTRACE_ENTRY_REG macro allows ftrace entry to define register
* function and thus become accesible via perf.
*/
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
filter, regfn) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
/* not needed for this file */
#undef __field_struct
#define __field_struct(type, item)
......@@ -44,7 +54,7 @@
#define F_printk(fmt, args...) fmt, args
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
struct ____ftrace_##name { \
tstruct \
}; \
......@@ -57,8 +67,9 @@ static void __always_unused ____ftrace_check_##name(void) \
}
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
#include "trace_entries.h"
......@@ -67,7 +78,7 @@ static void __always_unused ____ftrace_check_##name(void) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), FILTER_OTHER); \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
......@@ -77,7 +88,7 @@ static void __always_unused ____ftrace_check_##name(void) \
offsetof(typeof(field), \
container.item), \
sizeof(field.container.item), \
is_signed_type(type), FILTER_OTHER); \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
......@@ -91,7 +102,7 @@ static void __always_unused ____ftrace_check_##name(void) \
ret = trace_define_field(event_call, event_storage, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), FILTER_OTHER); \
is_signed_type(type), filter_type); \
mutex_unlock(&event_storage_mutex); \
if (ret) \
return ret; \
......@@ -104,7 +115,7 @@ static void __always_unused ____ftrace_check_##name(void) \
offsetof(typeof(field), \
container.item), \
sizeof(field.container.item), \
is_signed_type(type), FILTER_OTHER); \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
......@@ -112,17 +123,18 @@ static void __always_unused ____ftrace_check_##name(void) \
#define __dynamic_array(type, item) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
0, is_signed_type(type), FILTER_OTHER);\
0, is_signed_type(type), filter_type);\
if (ret) \
return ret;
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
int \
ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
{ \
struct struct_name field; \
int ret; \
int filter_type = filter; \
\
tstruct; \
\
......@@ -152,13 +164,15 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#undef F_printk
#define F_printk(fmt, args...) #fmt ", " __stringify(args)
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
regfn) \
\
struct ftrace_event_class event_class_ftrace_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
.fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
.reg = regfn, \
}; \
\
struct ftrace_event_call __used event_##call = { \
......@@ -170,4 +184,14 @@ struct ftrace_event_call __used event_##call = { \
struct ftrace_event_call __used \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \
FTRACE_ENTRY_REG(call, struct_name, etype, \
PARAMS(tstruct), PARAMS(print), filter, NULL)
int ftrace_event_is_function(struct ftrace_event_call *call)
{
return call == &event_function;
}
#include "trace_entries.h"
......@@ -1892,7 +1892,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
#endif /* CONFIG_PERF_EVENTS */
static __kprobes
int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
int kprobe_register(struct ftrace_event_call *event,
enum trace_reg type, void *data)
{
struct trace_probe *tp = (struct trace_probe *)event->data;
......@@ -1909,6 +1910,11 @@ int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
case TRACE_REG_PERF_UNREGISTER:
disable_trace_probe(tp, TP_FLAG_PROFILE);
return 0;
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
case TRACE_REG_PERF_DEL:
return 0;
#endif
}
return 0;
......
......@@ -300,7 +300,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
unsigned long mask;
const char *str;
const char *ret = p->buffer + p->len;
int i;
int i, first = 1;
for (i = 0; flag_array[i].name && flags; i++) {
......@@ -310,14 +310,16 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
str = flag_array[i].name;
flags &= ~mask;
if (p->len && delim)
if (!first && delim)
trace_seq_puts(p, delim);
else
first = 0;
trace_seq_puts(p, str);
}
/* check for left over flags */
if (flags) {
if (p->len && delim)
if (!first && delim)
trace_seq_puts(p, delim);
trace_seq_printf(p, "0x%lx", flags);
}
......@@ -344,7 +346,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
break;
}
if (!p->len)
if (ret == (const char *)(p->buffer + p->len))
trace_seq_printf(p, "0x%lx", val);
trace_seq_putc(p, 0);
......@@ -370,7 +372,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
break;
}
if (!p->len)
if (ret == (const char *)(p->buffer + p->len))
trace_seq_printf(p, "0x%llx", val);
trace_seq_putc(p, 0);
......
......@@ -17,9 +17,9 @@ static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
static int syscall_enter_register(struct ftrace_event_call *event,
enum trace_reg type);
enum trace_reg type, void *data);
static int syscall_exit_register(struct ftrace_event_call *event,
enum trace_reg type);
enum trace_reg type, void *data);
static int syscall_enter_define_fields(struct ftrace_event_call *call);
static int syscall_exit_define_fields(struct ftrace_event_call *call);
......@@ -649,7 +649,7 @@ void perf_sysexit_disable(struct ftrace_event_call *call)
#endif /* CONFIG_PERF_EVENTS */
static int syscall_enter_register(struct ftrace_event_call *event,
enum trace_reg type)
enum trace_reg type, void *data)
{
switch (type) {
case TRACE_REG_REGISTER:
......@@ -664,13 +664,18 @@ static int syscall_enter_register(struct ftrace_event_call *event,
case TRACE_REG_PERF_UNREGISTER:
perf_sysenter_disable(event);
return 0;
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
case TRACE_REG_PERF_DEL:
return 0;
#endif
}
return 0;
}
static int syscall_exit_register(struct ftrace_event_call *event,
enum trace_reg type)
enum trace_reg type, void *data)
{
switch (type) {
case TRACE_REG_REGISTER:
......@@ -685,6 +690,11 @@ static int syscall_exit_register(struct ftrace_event_call *event,
case TRACE_REG_PERF_UNREGISTER:
perf_sysexit_disable(event);
return 0;
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
case TRACE_REG_PERF_DEL:
return 0;
#endif
}
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment