Commit fb34a08c authored by Jason Baron's avatar Jason Baron Committed by Frederic Weisbecker

tracing: Add trace events for each syscall entry/exit

Layer Frederic's syscall tracer on tracepoints. We create trace events
via hooking into the SYSCALL_DEFINE macros. This allows us to
individually toggle syscall entry and exit points on/off.
Signed-off-by: default avatarJason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
parent 69fd4f0e
...@@ -64,6 +64,7 @@ struct perf_counter_attr; ...@@ -64,6 +64,7 @@ struct perf_counter_attr;
#include <linux/sem.h> #include <linux/sem.h>
#include <asm/siginfo.h> #include <asm/siginfo.h>
#include <asm/signal.h> #include <asm/signal.h>
#include <linux/unistd.h>
#include <linux/quota.h> #include <linux/quota.h>
#include <linux/key.h> #include <linux/key.h>
#include <trace/syscall.h> #include <trace/syscall.h>
...@@ -112,6 +113,59 @@ struct perf_counter_attr; ...@@ -112,6 +113,59 @@ struct perf_counter_attr;
#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__)
#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__)
#define SYSCALL_TRACE_ENTER_EVENT(sname) \
static struct ftrace_event_call event_enter_##sname; \
static int init_enter_##sname(void) \
{ \
int num; \
num = syscall_name_to_nr("sys"#sname); \
if (num < 0) \
return -ENOSYS; \
register_ftrace_event(&event_syscall_enter); \
INIT_LIST_HEAD(&event_enter_##sname.fields); \
init_preds(&event_enter_##sname); \
return 0; \
} \
static struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) \
event_enter_##sname = { \
.name = "sys_enter"#sname, \
.system = "syscalls", \
.event = &event_syscall_enter, \
.raw_init = init_enter_##sname, \
.regfunc = reg_event_syscall_enter, \
.unregfunc = unreg_event_syscall_enter, \
.data = "sys"#sname, \
}
#define SYSCALL_TRACE_EXIT_EVENT(sname) \
static struct ftrace_event_call event_exit_##sname; \
static int init_exit_##sname(void) \
{ \
int num; \
num = syscall_name_to_nr("sys"#sname); \
if (num < 0) \
return -ENOSYS; \
register_ftrace_event(&event_syscall_exit); \
INIT_LIST_HEAD(&event_exit_##sname.fields); \
init_preds(&event_exit_##sname); \
return 0; \
} \
static struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) \
event_exit_##sname = { \
.name = "sys_exit"#sname, \
.system = "syscalls", \
.event = &event_syscall_exit, \
.raw_init = init_exit_##sname, \
.regfunc = reg_event_syscall_exit, \
.unregfunc = unreg_event_syscall_exit, \
.data = "sys"#sname, \
}
#define SYSCALL_METADATA(sname, nb) \ #define SYSCALL_METADATA(sname, nb) \
static const struct syscall_metadata __used \ static const struct syscall_metadata __used \
__attribute__((__aligned__(4))) \ __attribute__((__aligned__(4))) \
...@@ -121,7 +175,9 @@ struct perf_counter_attr; ...@@ -121,7 +175,9 @@ struct perf_counter_attr;
.nb_args = nb, \ .nb_args = nb, \
.types = types_##sname, \ .types = types_##sname, \
.args = args_##sname, \ .args = args_##sname, \
} }; \
SYSCALL_TRACE_ENTER_EVENT(sname); \
SYSCALL_TRACE_EXIT_EVENT(sname);
#define SYSCALL_DEFINE0(sname) \ #define SYSCALL_DEFINE0(sname) \
static const struct syscall_metadata __used \ static const struct syscall_metadata __used \
...@@ -131,8 +187,9 @@ struct perf_counter_attr; ...@@ -131,8 +187,9 @@ struct perf_counter_attr;
.name = "sys_"#sname, \ .name = "sys_"#sname, \
.nb_args = 0, \ .nb_args = 0, \
}; \ }; \
SYSCALL_TRACE_ENTER_EVENT(_##sname); \
SYSCALL_TRACE_EXIT_EVENT(_##sname); \
asmlinkage long sys_##sname(void) asmlinkage long sys_##sname(void)
#else #else
#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void)
#endif #endif
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#define _TRACE_SYSCALL_H #define _TRACE_SYSCALL_H
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#include <linux/unistd.h>
#include <linux/ftrace_event.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
...@@ -40,15 +42,13 @@ struct syscall_metadata { ...@@ -40,15 +42,13 @@ struct syscall_metadata {
#ifdef CONFIG_FTRACE_SYSCALLS #ifdef CONFIG_FTRACE_SYSCALLS
extern struct syscall_metadata *syscall_nr_to_meta(int nr); extern struct syscall_metadata *syscall_nr_to_meta(int nr);
extern void start_ftrace_syscalls(void); extern int syscall_name_to_nr(char *name);
extern void stop_ftrace_syscalls(void); extern struct trace_event event_syscall_enter;
extern void ftrace_syscall_enter(struct pt_regs *regs); extern struct trace_event event_syscall_exit;
extern void ftrace_syscall_exit(struct pt_regs *regs); extern int reg_event_syscall_enter(void *ptr);
#else extern void unreg_event_syscall_enter(void *ptr);
static inline void start_ftrace_syscalls(void) { } extern int reg_event_syscall_exit(void *ptr);
static inline void stop_ftrace_syscalls(void) { } extern void unreg_event_syscall_exit(void *ptr);
static inline void ftrace_syscall_enter(struct pt_regs *regs) { }
static inline void ftrace_syscall_exit(struct pt_regs *regs) { }
#endif #endif
#endif /* _TRACE_SYSCALL_H */ #endif /* _TRACE_SYSCALL_H */
#include <trace/syscall.h> #include <trace/syscall.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/ftrace.h>
#include <asm/syscall.h> #include <asm/syscall.h>
#include "trace_output.h" #include "trace_output.h"
#include "trace.h" #include "trace.h"
/* Keep a counter of the syscall tracing users */
static int refcount;
/* Prevent from races on thread flags toggling */
static DEFINE_MUTEX(syscall_trace_lock); static DEFINE_MUTEX(syscall_trace_lock);
static int sys_refcount_enter;
static int sys_refcount_exit;
static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
/* Option to display the parameters types */ /* Option to display the parameters types */
enum { enum {
...@@ -95,53 +96,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags) ...@@ -95,53 +96,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
return TRACE_TYPE_HANDLED; return TRACE_TYPE_HANDLED;
} }
void start_ftrace_syscalls(void) void ftrace_syscall_enter(struct pt_regs *regs, long id)
{
unsigned long flags;
struct task_struct *g, *t;
mutex_lock(&syscall_trace_lock);
/* Don't enable the flag on the tasks twice */
if (++refcount != 1)
goto unlock;
read_lock_irqsave(&tasklist_lock, flags);
do_each_thread(g, t) {
set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
} while_each_thread(g, t);
read_unlock_irqrestore(&tasklist_lock, flags);
unlock:
mutex_unlock(&syscall_trace_lock);
}
void stop_ftrace_syscalls(void)
{
unsigned long flags;
struct task_struct *g, *t;
mutex_lock(&syscall_trace_lock);
/* There are perhaps still some users */
if (--refcount)
goto unlock;
read_lock_irqsave(&tasklist_lock, flags);
do_each_thread(g, t) {
clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
} while_each_thread(g, t);
read_unlock_irqrestore(&tasklist_lock, flags);
unlock:
mutex_unlock(&syscall_trace_lock);
}
void ftrace_syscall_enter(struct pt_regs *regs)
{ {
struct syscall_trace_enter *entry; struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data; struct syscall_metadata *sys_data;
...@@ -150,6 +105,8 @@ void ftrace_syscall_enter(struct pt_regs *regs) ...@@ -150,6 +105,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
int syscall_nr; int syscall_nr;
syscall_nr = syscall_get_nr(current, regs); syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_enter_syscalls))
return;
sys_data = syscall_nr_to_meta(syscall_nr); sys_data = syscall_nr_to_meta(syscall_nr);
if (!sys_data) if (!sys_data)
...@@ -170,7 +127,7 @@ void ftrace_syscall_enter(struct pt_regs *regs) ...@@ -170,7 +127,7 @@ void ftrace_syscall_enter(struct pt_regs *regs)
trace_wake_up(); trace_wake_up();
} }
void ftrace_syscall_exit(struct pt_regs *regs) void ftrace_syscall_exit(struct pt_regs *regs, long ret)
{ {
struct syscall_trace_exit *entry; struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data; struct syscall_metadata *sys_data;
...@@ -178,6 +135,8 @@ void ftrace_syscall_exit(struct pt_regs *regs) ...@@ -178,6 +135,8 @@ void ftrace_syscall_exit(struct pt_regs *regs)
int syscall_nr; int syscall_nr;
syscall_nr = syscall_get_nr(current, regs); syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_exit_syscalls))
return;
sys_data = syscall_nr_to_meta(syscall_nr); sys_data = syscall_nr_to_meta(syscall_nr);
if (!sys_data) if (!sys_data)
...@@ -196,54 +155,94 @@ void ftrace_syscall_exit(struct pt_regs *regs) ...@@ -196,54 +155,94 @@ void ftrace_syscall_exit(struct pt_regs *regs)
trace_wake_up(); trace_wake_up();
} }
static int init_syscall_tracer(struct trace_array *tr) int reg_event_syscall_enter(void *ptr)
{ {
start_ftrace_syscalls(); int ret = 0;
int num;
return 0; char *name;
name = (char *)ptr;
num = syscall_name_to_nr(name);
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_enter)
ret = register_trace_syscall_enter(ftrace_syscall_enter);
if (ret) {
pr_info("event trace: Could not activate"
"syscall entry trace point");
} else {
set_bit(num, enabled_enter_syscalls);
sys_refcount_enter++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
} }
static void reset_syscall_tracer(struct trace_array *tr) void unreg_event_syscall_enter(void *ptr)
{ {
stop_ftrace_syscalls(); int num;
tracing_reset_online_cpus(tr); char *name;
}
static struct trace_event syscall_enter_event = {
.type = TRACE_SYSCALL_ENTER,
.trace = print_syscall_enter,
};
static struct trace_event syscall_exit_event = {
.type = TRACE_SYSCALL_EXIT,
.trace = print_syscall_exit,
};
static struct tracer syscall_tracer __read_mostly = { name = (char *)ptr;
.name = "syscall", num = syscall_name_to_nr(name);
.init = init_syscall_tracer, if (num < 0 || num >= FTRACE_SYSCALL_MAX)
.reset = reset_syscall_tracer, return;
.flags = &syscalls_flags, mutex_lock(&syscall_trace_lock);
}; sys_refcount_enter--;
clear_bit(num, enabled_enter_syscalls);
if (!sys_refcount_enter)
unregister_trace_syscall_enter(ftrace_syscall_enter);
mutex_unlock(&syscall_trace_lock);
}
__init int register_ftrace_syscalls(void) int reg_event_syscall_exit(void *ptr)
{ {
int ret; int ret = 0;
int num;
ret = register_ftrace_event(&syscall_enter_event); char *name;
if (!ret) {
printk(KERN_WARNING "event %d failed to register\n", name = (char *)ptr;
syscall_enter_event.type); num = syscall_name_to_nr(name);
WARN_ON_ONCE(1); if (num < 0 || num >= FTRACE_SYSCALL_MAX)
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_exit)
ret = register_trace_syscall_exit(ftrace_syscall_exit);
if (ret) {
pr_info("event trace: Could not activate"
"syscall exit trace point");
} else {
set_bit(num, enabled_exit_syscalls);
sys_refcount_exit++;
} }
mutex_unlock(&syscall_trace_lock);
return ret;
}
ret = register_ftrace_event(&syscall_exit_event); void unreg_event_syscall_exit(void *ptr)
if (!ret) { {
printk(KERN_WARNING "event %d failed to register\n", int num;
syscall_exit_event.type); char *name;
WARN_ON_ONCE(1);
}
return register_tracer(&syscall_tracer); name = (char *)ptr;
num = syscall_name_to_nr(name);
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
return;
mutex_lock(&syscall_trace_lock);
sys_refcount_exit--;
clear_bit(num, enabled_exit_syscalls);
if (!sys_refcount_exit)
unregister_trace_syscall_exit(ftrace_syscall_exit);
mutex_unlock(&syscall_trace_lock);
} }
device_initcall(register_ftrace_syscalls);
struct trace_event event_syscall_enter = {
.trace = print_syscall_enter,
.type = TRACE_SYSCALL_ENTER
};
struct trace_event event_syscall_exit = {
.trace = print_syscall_exit,
.type = TRACE_SYSCALL_EXIT
};
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment