Commit 6fb2915d authored by Li Zefan's avatar Li Zefan Committed by Ingo Molnar

tracing/profile: Add filter support

- Add an ioctl to allocate a filter for a perf event.

- Free the filter when the associated perf event is to be freed.

- Do the filtering in perf_swevent_match().
Signed-off-by: default avatarLi Zefan <lizf@cn.fujitsu.com>
Acked-by: default avatarPeter Zijlstra <peterz@infradead.org>
Acked-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4AD69546.8050401@cn.fujitsu.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent b0f1a59a
...@@ -144,7 +144,7 @@ extern char *trace_profile_buf_nmi; ...@@ -144,7 +144,7 @@ extern char *trace_profile_buf_nmi;
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
extern void destroy_preds(struct ftrace_event_call *call); extern void destroy_preds(struct ftrace_event_call *call);
extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern int filter_match_preds(struct event_filter *filter, void *rec);
extern int filter_current_check_discard(struct ring_buffer *buffer, extern int filter_current_check_discard(struct ring_buffer *buffer,
struct ftrace_event_call *call, struct ftrace_event_call *call,
void *rec, void *rec,
...@@ -186,4 +186,13 @@ do { \ ...@@ -186,4 +186,13 @@ do { \
__trace_printk(ip, fmt, ##args); \ __trace_printk(ip, fmt, ##args); \
} while (0) } while (0)
#ifdef CONFIG_EVENT_PROFILE
struct perf_event;
extern int ftrace_profile_enable(int event_id);
extern void ftrace_profile_disable(int event_id);
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
#endif
#endif /* _LINUX_FTRACE_EVENT_H */ #endif /* _LINUX_FTRACE_EVENT_H */
...@@ -225,6 +225,7 @@ struct perf_counter_attr { ...@@ -225,6 +225,7 @@ struct perf_counter_attr {
#define PERF_COUNTER_IOC_RESET _IO ('$', 3) #define PERF_COUNTER_IOC_RESET _IO ('$', 3)
#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64)
#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5)
#define PERF_COUNTER_IOC_SET_FILTER _IOW('$', 6, char *)
enum perf_counter_ioc_flags { enum perf_counter_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0, PERF_IOC_FLAG_GROUP = 1U << 0,
......
...@@ -221,6 +221,7 @@ struct perf_event_attr { ...@@ -221,6 +221,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_RESET _IO ('$', 3) #define PERF_EVENT_IOC_RESET _IO ('$', 3)
#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64)
#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
enum perf_event_ioc_flags { enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0, PERF_IOC_FLAG_GROUP = 1U << 0,
...@@ -633,7 +634,12 @@ struct perf_event { ...@@ -633,7 +634,12 @@ struct perf_event {
struct pid_namespace *ns; struct pid_namespace *ns;
u64 id; u64 id;
#ifdef CONFIG_EVENT_PROFILE
struct event_filter *filter;
#endif #endif
#endif /* CONFIG_PERF_EVENTS */
}; };
/** /**
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/anon_inodes.h> #include <linux/anon_inodes.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include <linux/ftrace_event.h>
#include <asm/irq_regs.h> #include <asm/irq_regs.h>
...@@ -1658,6 +1659,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) ...@@ -1658,6 +1659,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
return ERR_PTR(err); return ERR_PTR(err);
} }
static void perf_event_free_filter(struct perf_event *event);
static void free_event_rcu(struct rcu_head *head) static void free_event_rcu(struct rcu_head *head)
{ {
struct perf_event *event; struct perf_event *event;
...@@ -1665,6 +1668,7 @@ static void free_event_rcu(struct rcu_head *head) ...@@ -1665,6 +1668,7 @@ static void free_event_rcu(struct rcu_head *head)
event = container_of(head, struct perf_event, rcu_head); event = container_of(head, struct perf_event, rcu_head);
if (event->ns) if (event->ns)
put_pid_ns(event->ns); put_pid_ns(event->ns);
perf_event_free_filter(event);
kfree(event); kfree(event);
} }
...@@ -1974,7 +1978,8 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) ...@@ -1974,7 +1978,8 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
return ret; return ret;
} }
int perf_event_set_output(struct perf_event *event, int output_fd); static int perf_event_set_output(struct perf_event *event, int output_fd);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{ {
...@@ -2002,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -2002,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case PERF_EVENT_IOC_SET_OUTPUT: case PERF_EVENT_IOC_SET_OUTPUT:
return perf_event_set_output(event, arg); return perf_event_set_output(event, arg);
case PERF_EVENT_IOC_SET_FILTER:
return perf_event_set_filter(event, (void __user *)arg);
default: default:
return -ENOTTY; return -ENOTTY;
} }
...@@ -3806,9 +3814,14 @@ static int perf_swevent_is_counting(struct perf_event *event) ...@@ -3806,9 +3814,14 @@ static int perf_swevent_is_counting(struct perf_event *event)
return 1; return 1;
} }
static int perf_tp_event_match(struct perf_event *event,
struct perf_sample_data *data);
static int perf_swevent_match(struct perf_event *event, static int perf_swevent_match(struct perf_event *event,
enum perf_type_id type, enum perf_type_id type,
u32 event_id, struct pt_regs *regs) u32 event_id,
struct perf_sample_data *data,
struct pt_regs *regs)
{ {
if (!perf_swevent_is_counting(event)) if (!perf_swevent_is_counting(event))
return 0; return 0;
...@@ -3826,6 +3839,10 @@ static int perf_swevent_match(struct perf_event *event, ...@@ -3826,6 +3839,10 @@ static int perf_swevent_match(struct perf_event *event,
return 0; return 0;
} }
if (event->attr.type == PERF_TYPE_TRACEPOINT &&
!perf_tp_event_match(event, data))
return 0;
return 1; return 1;
} }
...@@ -3842,7 +3859,7 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, ...@@ -3842,7 +3859,7 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (perf_swevent_match(event, type, event_id, regs)) if (perf_swevent_match(event, type, event_id, data, regs))
perf_swevent_add(event, nr, nmi, data, regs); perf_swevent_add(event, nr, nmi, data, regs);
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -4086,6 +4103,7 @@ static const struct pmu perf_ops_task_clock = { ...@@ -4086,6 +4103,7 @@ static const struct pmu perf_ops_task_clock = {
}; };
#ifdef CONFIG_EVENT_PROFILE #ifdef CONFIG_EVENT_PROFILE
void perf_tp_event(int event_id, u64 addr, u64 count, void *record, void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
int entry_size) int entry_size)
{ {
...@@ -4109,8 +4127,15 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, ...@@ -4109,8 +4127,15 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
} }
EXPORT_SYMBOL_GPL(perf_tp_event); EXPORT_SYMBOL_GPL(perf_tp_event);
extern int ftrace_profile_enable(int); static int perf_tp_event_match(struct perf_event *event,
extern void ftrace_profile_disable(int); struct perf_sample_data *data)
{
void *record = data->raw->data;
if (likely(!event->filter) || filter_match_preds(event->filter, record))
return 1;
return 0;
}
static void tp_perf_event_destroy(struct perf_event *event) static void tp_perf_event_destroy(struct perf_event *event)
{ {
...@@ -4135,12 +4160,53 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) ...@@ -4135,12 +4160,53 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
return &perf_ops_generic; return &perf_ops_generic;
} }
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
{
char *filter_str;
int ret;
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -EINVAL;
filter_str = strndup_user(arg, PAGE_SIZE);
if (IS_ERR(filter_str))
return PTR_ERR(filter_str);
ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
kfree(filter_str);
return ret;
}
static void perf_event_free_filter(struct perf_event *event)
{
ftrace_profile_free_filter(event);
}
#else #else
static int perf_tp_event_match(struct perf_event *event,
struct perf_sample_data *data)
{
return 1;
}
static const struct pmu *tp_perf_event_init(struct perf_event *event) static const struct pmu *tp_perf_event_init(struct perf_event *event)
{ {
return NULL; return NULL;
} }
#endif
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
{
return -ENOENT;
}
static void perf_event_free_filter(struct perf_event *event)
{
}
#endif /* CONFIG_EVENT_PROFILE */
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
...@@ -4394,7 +4460,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, ...@@ -4394,7 +4460,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
goto out; goto out;
} }
int perf_event_set_output(struct perf_event *event, int output_fd) static int perf_event_set_output(struct perf_event *event, int output_fd)
{ {
struct perf_event *output_event = NULL; struct perf_event *output_event = NULL;
struct file *output_file = NULL; struct file *output_file = NULL;
......
...@@ -743,7 +743,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, ...@@ -743,7 +743,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
struct ring_buffer *buffer, struct ring_buffer *buffer,
struct ring_buffer_event *event) struct ring_buffer_event *event)
{ {
if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { if (unlikely(call->filter_active) &&
!filter_match_preds(call->filter, rec)) {
ring_buffer_discard_commit(buffer, event); ring_buffer_discard_commit(buffer, event);
return 1; return 1;
} }
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/perf_event.h>
#include "trace.h" #include "trace.h"
#include "trace_output.h" #include "trace_output.h"
...@@ -363,9 +364,8 @@ static void filter_build_regex(struct filter_pred *pred) ...@@ -363,9 +364,8 @@ static void filter_build_regex(struct filter_pred *pred)
} }
/* return 1 if event matches, 0 otherwise (discard) */ /* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct ftrace_event_call *call, void *rec) int filter_match_preds(struct event_filter *filter, void *rec)
{ {
struct event_filter *filter = call->filter;
int match, top = 0, val1 = 0, val2 = 0; int match, top = 0, val1 = 0, val2 = 0;
int stack[MAX_FILTER_PRED]; int stack[MAX_FILTER_PRED];
struct filter_pred *pred; struct filter_pred *pred;
...@@ -538,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call) ...@@ -538,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
filter->preds[i]->fn = filter_pred_none; filter->preds[i]->fn = filter_pred_none;
} }
void destroy_preds(struct ftrace_event_call *call) static void __free_preds(struct event_filter *filter)
{ {
struct event_filter *filter = call->filter;
int i; int i;
if (!filter) if (!filter)
...@@ -553,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call) ...@@ -553,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
kfree(filter->preds); kfree(filter->preds);
kfree(filter->filter_string); kfree(filter->filter_string);
kfree(filter); kfree(filter);
}
void destroy_preds(struct ftrace_event_call *call)
{
__free_preds(call->filter);
call->filter = NULL; call->filter = NULL;
call->filter_active = 0;
} }
static int init_preds(struct ftrace_event_call *call) static struct event_filter *__alloc_preds(void)
{ {
struct event_filter *filter; struct event_filter *filter;
struct filter_pred *pred; struct filter_pred *pred;
int i; int i;
if (call->filter) filter = kzalloc(sizeof(*filter), GFP_KERNEL);
return 0; if (!filter)
return ERR_PTR(-ENOMEM);
filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (!call->filter)
return -ENOMEM;
filter->n_preds = 0; filter->n_preds = 0;
...@@ -583,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call) ...@@ -583,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
filter->preds[i] = pred; filter->preds[i] = pred;
} }
return 0; return filter;
oom: oom:
destroy_preds(call); __free_preds(filter);
return ERR_PTR(-ENOMEM);
}
return -ENOMEM; static int init_preds(struct ftrace_event_call *call)
{
if (call->filter)
return 0;
call->filter_active = 0;
call->filter = __alloc_preds();
if (IS_ERR(call->filter))
return PTR_ERR(call->filter);
return 0;
} }
static int init_subsystem_preds(struct event_subsystem *system) static int init_subsystem_preds(struct event_subsystem *system)
...@@ -629,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system) ...@@ -629,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
static int filter_add_pred_fn(struct filter_parse_state *ps, static int filter_add_pred_fn(struct filter_parse_state *ps,
struct ftrace_event_call *call, struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_pred *pred, struct filter_pred *pred,
filter_pred_fn_t fn) filter_pred_fn_t fn)
{ {
struct event_filter *filter = call->filter;
int idx, err; int idx, err;
if (filter->n_preds == MAX_FILTER_PRED) { if (filter->n_preds == MAX_FILTER_PRED) {
...@@ -647,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, ...@@ -647,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
return err; return err;
filter->n_preds++; filter->n_preds++;
call->filter_active = 1;
return 0; return 0;
} }
...@@ -726,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size, ...@@ -726,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
static int filter_add_pred(struct filter_parse_state *ps, static int filter_add_pred(struct filter_parse_state *ps,
struct ftrace_event_call *call, struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_pred *pred, struct filter_pred *pred,
bool dry_run) bool dry_run)
{ {
...@@ -795,7 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps, ...@@ -795,7 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
add_pred_fn: add_pred_fn:
if (!dry_run) if (!dry_run)
return filter_add_pred_fn(ps, call, pred, fn); return filter_add_pred_fn(ps, call, filter, pred, fn);
return 0; return 0;
} }
...@@ -1154,6 +1168,7 @@ static int check_preds(struct filter_parse_state *ps) ...@@ -1154,6 +1168,7 @@ static int check_preds(struct filter_parse_state *ps)
} }
static int replace_preds(struct ftrace_event_call *call, static int replace_preds(struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_parse_state *ps, struct filter_parse_state *ps,
char *filter_string, char *filter_string,
bool dry_run) bool dry_run)
...@@ -1200,7 +1215,7 @@ static int replace_preds(struct ftrace_event_call *call, ...@@ -1200,7 +1215,7 @@ static int replace_preds(struct ftrace_event_call *call,
add_pred: add_pred:
if (!pred) if (!pred)
return -ENOMEM; return -ENOMEM;
err = filter_add_pred(ps, call, pred, dry_run); err = filter_add_pred(ps, call, filter, pred, dry_run);
filter_free_pred(pred); filter_free_pred(pred);
if (err) if (err)
return err; return err;
...@@ -1216,6 +1231,7 @@ static int replace_system_preds(struct event_subsystem *system, ...@@ -1216,6 +1231,7 @@ static int replace_system_preds(struct event_subsystem *system,
char *filter_string) char *filter_string)
{ {
struct ftrace_event_call *call; struct ftrace_event_call *call;
struct event_filter *filter;
int err; int err;
bool fail = true; bool fail = true;
...@@ -1228,17 +1244,19 @@ static int replace_system_preds(struct event_subsystem *system, ...@@ -1228,17 +1244,19 @@ static int replace_system_preds(struct event_subsystem *system,
continue; continue;
/* try to see if the filter can be applied */ /* try to see if the filter can be applied */
err = replace_preds(call, ps, filter_string, true); err = replace_preds(call, filter, ps, filter_string, true);
if (err) if (err)
continue; continue;
/* really apply the filter */ /* really apply the filter */
filter_disable_preds(call); filter_disable_preds(call);
err = replace_preds(call, ps, filter_string, false); err = replace_preds(call, filter, ps, filter_string, false);
if (err) if (err)
filter_disable_preds(call); filter_disable_preds(call);
else else {
replace_filter_string(call->filter, filter_string); call->filter_active = 1;
replace_filter_string(filter, filter_string);
}
fail = false; fail = false;
} }
...@@ -1252,7 +1270,6 @@ static int replace_system_preds(struct event_subsystem *system, ...@@ -1252,7 +1270,6 @@ static int replace_system_preds(struct event_subsystem *system,
int apply_event_filter(struct ftrace_event_call *call, char *filter_string) int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{ {
int err; int err;
struct filter_parse_state *ps; struct filter_parse_state *ps;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
...@@ -1283,10 +1300,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) ...@@ -1283,10 +1300,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
goto out; goto out;
} }
err = replace_preds(call, ps, filter_string, false); err = replace_preds(call, call->filter, ps, filter_string, false);
if (err) if (err)
append_filter_err(ps, call->filter); append_filter_err(ps, call->filter);
else
call->filter_active = 1;
out: out:
filter_opstack_clear(ps); filter_opstack_clear(ps);
postfix_clear(ps); postfix_clear(ps);
...@@ -1301,7 +1319,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system, ...@@ -1301,7 +1319,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string) char *filter_string)
{ {
int err; int err;
struct filter_parse_state *ps; struct filter_parse_state *ps;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
...@@ -1345,3 +1362,67 @@ int apply_subsystem_event_filter(struct event_subsystem *system, ...@@ -1345,3 +1362,67 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
return err; return err;
} }
#ifdef CONFIG_EVENT_PROFILE
void ftrace_profile_free_filter(struct perf_event *event)
{
struct event_filter *filter = event->filter;
event->filter = NULL;
__free_preds(filter);
}
int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str)
{
int err;
struct event_filter *filter;
struct filter_parse_state *ps;
struct ftrace_event_call *call = NULL;
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
if (call->id == event_id)
break;
}
if (!call)
return -EINVAL;
if (event->filter)
return -EEXIST;
filter = __alloc_preds();
if (IS_ERR(filter))
return PTR_ERR(filter);
err = -ENOMEM;
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
goto free_preds;
parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps);
if (err)
goto free_ps;
err = replace_preds(call, filter, ps, filter_str, false);
if (!err)
event->filter = filter;
free_ps:
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
free_preds:
if (err)
__free_preds(filter);
mutex_unlock(&event_mutex);
return err;
}
#endif /* CONFIG_EVENT_PROFILE */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment