Commit 9e8529af authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'trace-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull tracing updates from Steven Rostedt:
 "Along with the usual minor fixes and clean ups there are a few major
  changes with this pull request.

   1) Multiple buffers for the ftrace facility

  This feature has been requested by many people over the last few
  years.  I even heard that Google was about to implement it themselves.
  I finally had time and cleaned up the code such that you can now
  create multiple instances of the ftrace buffer and have different
  events go to different buffers.  This way, a low frequency event will
  not be lost in the noise of a high frequency event.

  Note, currently only events can go to different buffers, the tracers
  (ie function, function_graph and the latency tracers) still can only
  be written to the main buffer.

   2) The function tracer triggers have now been extended.

  The function tracer had two triggers.  One to enable tracing when a
  function is hit, and one to disable tracing.  Now you can record a
  stack trace on a single (or many) function(s), take a snapshot of the
  buffer (copy it to the snapshot buffer), and you can enable or disable
  an event to be traced when a function is hit.

   3) A perf clock has been added.

  A "perf" clock can be chosen to be used when tracing.  This will cause
  ftrace to use the same clock as perf uses, and hopefully this will
  make it easier to interleave the perf and ftrace data for analysis."

* tag 'trace-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (82 commits)
  tracepoints: Prevent null probe from being added
  tracing: Compare to 1 instead of zero for is_signed_type()
  tracing: Remove obsolete macro guard _TRACE_PROFILE_INIT
  ftrace: Get rid of ftrace_profile_bits
  tracing: Check return value of tracing_init_dentry()
  tracing: Get rid of unneeded key calculation in ftrace_hash_move()
  tracing: Reset ftrace_graph_filter_enabled if count is zero
  tracing: Fix off-by-one on allocating stat->pages
  kernel: tracing: Use strlcpy instead of strncpy
  tracing: Update debugfs README file
  tracing: Fix ftrace_dump()
  tracing: Rename trace_event_mutex to trace_event_sem
  tracing: Fix comment about prefix in arch_syscall_match_sym_name()
  tracing: Convert trace_destroy_fields() to static
  tracing: Move find_event_field() into trace_events.c
  tracing: Use TRACE_MAX_PRINT instead of constant
  tracing: Use pr_warn_once instead of open coded implementation
  ring-buffer: Add ring buffer startup selftest
  tracing: Bring Documentation/trace/ftrace.txt up to date
  tracing: Add "perf" trace_clock
  ...

Conflicts:
	kernel/trace/ftrace.c
	kernel/trace/trace.c
parents ec25e246 4c69e6ea
......@@ -320,6 +320,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
on: enable for both 32- and 64-bit processes
off: disable for both 32- and 64-bit processes
alloc_snapshot [FTRACE]
Allocate the ftrace snapshot buffer on boot up when the
main buffer is allocated. This is handy if debugging
and you need to use tracing_snapshot() on boot up, and
do not want to use tracing_snapshot_alloc() as it needs
to be done where GFP_KERNEL allocations are allowed.
amd_iommu= [HW,X86-64]
Pass parameters to the AMD IOMMU driver in the system.
Possible values are:
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -261,8 +261,10 @@ struct ftrace_probe_ops {
void (*func)(unsigned long ip,
unsigned long parent_ip,
void **data);
int (*callback)(unsigned long ip, void **data);
void (*free)(void **data);
int (*init)(struct ftrace_probe_ops *ops,
unsigned long ip, void **data);
void (*free)(struct ftrace_probe_ops *ops,
unsigned long ip, void **data);
int (*print)(struct seq_file *m,
unsigned long ip,
struct ftrace_probe_ops *ops,
......
......@@ -8,6 +8,7 @@
#include <linux/perf_event.h>
struct trace_array;
struct trace_buffer;
struct tracer;
struct dentry;
......@@ -38,6 +39,12 @@ const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
const char *ftrace_print_hex_seq(struct trace_seq *p,
const unsigned char *buf, int len);
struct trace_iterator;
struct trace_event;
int ftrace_raw_output_prep(struct trace_iterator *iter,
struct trace_event *event);
/*
* The trace entry - the most basic unit of tracing. This is what
* is printed in the end as a single line in the trace output, such as:
......@@ -61,6 +68,7 @@ struct trace_entry {
struct trace_iterator {
struct trace_array *tr;
struct tracer *trace;
struct trace_buffer *trace_buffer;
void *private;
int cpu_file;
struct mutex mutex;
......@@ -95,8 +103,6 @@ enum trace_iter_flags {
};
struct trace_event;
typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
int flags, struct trace_event *event);
......@@ -128,6 +134,13 @@ enum print_line_t {
void tracing_generic_entry_update(struct trace_entry *entry,
unsigned long flags,
int pc);
struct ftrace_event_file;
struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
struct ftrace_event_file *ftrace_file,
int type, unsigned long len,
unsigned long flags, int pc);
struct ring_buffer_event *
trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
int type, unsigned long len,
......@@ -182,53 +195,49 @@ extern int ftrace_event_reg(struct ftrace_event_call *event,
enum trace_reg type, void *data);
enum {
TRACE_EVENT_FL_ENABLED_BIT,
TRACE_EVENT_FL_FILTERED_BIT,
TRACE_EVENT_FL_RECORDED_CMD_BIT,
TRACE_EVENT_FL_CAP_ANY_BIT,
TRACE_EVENT_FL_NO_SET_FILTER_BIT,
TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
TRACE_EVENT_FL_WAS_ENABLED_BIT,
};
/*
* Event flags:
* FILTERED - The event has a filter attached
* CAP_ANY - Any user can enable for perf
* NO_SET_FILTER - Set when filter has error and is to be ignored
* IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file
* WAS_ENABLED - Set and stays set when an event was ever enabled
* (used for module unloading, if a module event is enabled,
* it is best to clear the buffers that used it).
*/
enum {
TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT),
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
};
struct ftrace_event_call {
struct list_head list;
struct ftrace_event_class *class;
char *name;
struct dentry *dir;
struct trace_event event;
const char *print_fmt;
struct event_filter *filter;
struct list_head *files;
void *mod;
void *data;
/*
* 32 bit flags:
* bit 1: enabled
* bit 2: filter_active
* bit 3: enabled cmd record
* bit 4: allow trace by non root (cap any)
* bit 5: failed to apply filter
* bit 6: ftrace internal event (do not enable)
*
* Changes to flags must hold the event_mutex.
*
* Note: Reads of flags do not hold the event_mutex since
* they occur in critical sections. But the way flags
* is currently used, these changes do no affect the code
* except that when a change is made, it may have a slight
* delay in propagating the changes to other CPUs due to
* caching and such.
* bit 0: filter_active
* bit 1: allow trace by non root (cap any)
* bit 2: failed to apply filter
* bit 3: ftrace internal event (do not enable)
* bit 4: Event was enabled by module
*/
unsigned int flags;
int flags; /* static flags of different events */
#ifdef CONFIG_PERF_EVENTS
int perf_refcount;
......@@ -236,6 +245,56 @@ struct ftrace_event_call {
#endif
};
struct trace_array;
struct ftrace_subsystem_dir;
enum {
FTRACE_EVENT_FL_ENABLED_BIT,
FTRACE_EVENT_FL_RECORDED_CMD_BIT,
FTRACE_EVENT_FL_SOFT_MODE_BIT,
FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
};
/*
* Ftrace event file flags:
* ENABLED - The event is enabled
* RECORDED_CMD - The comms should be recorded at sched_switch
* SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED
* SOFT_DISABLED - When set, do not trace the event (even though its
* tracepoint may be enabled)
*/
enum {
FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT),
FTRACE_EVENT_FL_RECORDED_CMD = (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT),
FTRACE_EVENT_FL_SOFT_MODE = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT),
FTRACE_EVENT_FL_SOFT_DISABLED = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT),
};
struct ftrace_event_file {
struct list_head list;
struct ftrace_event_call *event_call;
struct dentry *dir;
struct trace_array *tr;
struct ftrace_subsystem_dir *system;
/*
* 32 bit flags:
* bit 0: enabled
* bit 1: enabled cmd record
* bit 2: enable/disable with the soft disable bit
* bit 3: soft disabled
*
* Note: The bits must be set atomically to prevent races
* from other writers. Reads of flags do not need to be in
* sync as they occur in critical sections. But the way flags
* is currently used, these changes do not affect the code
* except that when a change is made, it may have a slight
* delay in propagating the changes to other CPUs due to
* caching and such. Which is mostly OK ;-)
*/
unsigned long flags;
};
#define __TRACE_EVENT_FLAGS(name, value) \
static int __init trace_init_flags_##name(void) \
{ \
......@@ -274,7 +333,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
extern int trace_add_event_call(struct ftrace_event_call *call);
extern void trace_remove_event_call(struct ftrace_event_call *call);
#define is_signed_type(type) (((type)(-1)) < (type)0)
#define is_signed_type(type) (((type)(-1)) < (type)1)
int trace_set_clr_event(const char *system, const char *event, int set);
......
......@@ -486,6 +486,8 @@ enum ftrace_dump_mode {
void tracing_on(void);
void tracing_off(void);
int tracing_is_on(void);
void tracing_snapshot(void);
void tracing_snapshot_alloc(void);
extern void tracing_start(void);
extern void tracing_stop(void);
......@@ -515,10 +517,32 @@ do { \
*
* This is intended as a debugging tool for the developer only.
* Please refrain from leaving trace_printks scattered around in
* your code.
* your code. (Extra memory is used for special buffers that are
* allocated when trace_printk() is used)
*
* A little optization trick is done here. If there's only one
* argument, there's no need to scan the string for printf formats.
* The trace_puts() will suffice. But how can we take advantage of
* using trace_puts() when trace_printk() has only one argument?
* By stringifying the args and checking the size we can tell
* whether or not there are args. __stringify((__VA_ARGS__)) will
* turn into "()\0" with a size of 3 when there are no args, anything
* else will be bigger. All we need to do is define a string to this,
* and then take its size and compare to 3. If it's bigger, use
* do_trace_printk() otherwise, optimize it to trace_puts(). Then just
* let gcc optimize the rest.
*/
#define trace_printk(fmt, args...) \
#define trace_printk(fmt, ...) \
do { \
char _______STR[] = __stringify((__VA_ARGS__)); \
if (sizeof(_______STR) > 3) \
do_trace_printk(fmt, ##__VA_ARGS__); \
else \
trace_puts(fmt); \
} while (0)
#define do_trace_printk(fmt, args...) \
do { \
static const char *trace_printk_fmt \
__attribute__((section("__trace_printk_fmt"))) = \
......@@ -538,7 +562,45 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...);
extern __printf(2, 3)
int __trace_printk(unsigned long ip, const char *fmt, ...);
extern void trace_dump_stack(void);
/**
* trace_puts - write a string into the ftrace buffer
* @str: the string to record
*
* Note: __trace_bputs is an internal function for trace_puts and
* the @ip is passed in via the trace_puts macro.
*
* This is similar to trace_printk() but is made for those really fast
* paths that a developer wants the least amount of "Heisenbug" affects,
* where the processing of the print format is still too much.
*
* This function allows a kernel developer to debug fast path sections
* that printk is not appropriate for. By scattering in various
* printk like tracing in the code, a developer can quickly see
* where problems are occurring.
*
* This is intended as a debugging tool for the developer only.
* Please refrain from leaving trace_puts scattered around in
* your code. (Extra memory is used for special buffers that are
* allocated when trace_puts() is used)
*
* Returns: 0 if nothing was written, positive # if string was.
* (1 when __trace_bputs is used, strlen(str) when __trace_puts is used)
*/
extern int __trace_bputs(unsigned long ip, const char *str);
extern int __trace_puts(unsigned long ip, const char *str, int size);
#define trace_puts(str) ({ \
static const char *trace_printk_fmt \
__attribute__((section("__trace_printk_fmt"))) = \
__builtin_constant_p(str) ? str : NULL; \
\
if (__builtin_constant_p(str)) \
__trace_bputs(_THIS_IP_, trace_printk_fmt); \
else \
__trace_puts(_THIS_IP_, str, strlen(str)); \
})
extern void trace_dump_stack(int skip);
/*
* The double __builtin_constant_p is because gcc will give us an error
......@@ -573,6 +635,8 @@ static inline void trace_dump_stack(void) { }
static inline void tracing_on(void) { }
static inline void tracing_off(void) { }
static inline int tracing_is_on(void) { return 0; }
static inline void tracing_snapshot(void) { }
static inline void tracing_snapshot_alloc(void) { }
static inline __printf(1, 2)
int trace_printk(const char *fmt, ...)
......
......@@ -4,6 +4,7 @@
#include <linux/kmemcheck.h>
#include <linux/mm.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
struct ring_buffer;
struct ring_buffer_iter;
......@@ -96,6 +97,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
__ring_buffer_alloc((size), (flags), &__key); \
})
void ring_buffer_wait(struct ring_buffer *buffer, int cpu);
int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table);
#define RING_BUFFER_ALL_CPUS -1
void ring_buffer_free(struct ring_buffer *buffer);
......
......@@ -16,6 +16,7 @@
extern u64 notrace trace_clock_local(void);
extern u64 notrace trace_clock(void);
extern u64 notrace trace_clock_jiffies(void);
extern u64 notrace trace_clock_global(void);
extern u64 notrace trace_clock_counter(void);
......
......@@ -227,28 +227,17 @@ static notrace enum print_line_t \
ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
struct trace_event *trace_event) \
{ \
struct ftrace_event_call *event; \
struct trace_seq *s = &iter->seq; \
struct trace_seq __maybe_unused *p = &iter->tmp_seq; \
struct ftrace_raw_##call *field; \
struct trace_entry *entry; \
struct trace_seq *p = &iter->tmp_seq; \
int ret; \
\
event = container_of(trace_event, struct ftrace_event_call, \
event); \
\
entry = iter->ent; \
\
if (entry->type != event->event.type) { \
WARN_ON_ONCE(1); \
return TRACE_TYPE_UNHANDLED; \
} \
\
field = (typeof(field))entry; \
field = (typeof(field))iter->ent; \
\
trace_seq_init(p); \
ret = trace_seq_printf(s, "%s: ", event->name); \
ret = ftrace_raw_output_prep(iter, trace_event); \
if (ret) \
return ret; \
\
ret = trace_seq_printf(s, print); \
if (!ret) \
return TRACE_TYPE_PARTIAL_LINE; \
......@@ -335,7 +324,7 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
static int notrace \
static int notrace __init \
ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
{ \
struct ftrace_raw_##call field; \
......@@ -414,7 +403,8 @@ static inline notrace int ftrace_get_offsets_##call( \
*
* static void ftrace_raw_event_<call>(void *__data, proto)
* {
* struct ftrace_event_call *event_call = __data;
* struct ftrace_event_file *ftrace_file = __data;
* struct ftrace_event_call *event_call = ftrace_file->event_call;
* struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
* struct ring_buffer_event *event;
* struct ftrace_raw_<call> *entry; <-- defined in stage 1
......@@ -423,12 +413,16 @@ static inline notrace int ftrace_get_offsets_##call( \
* int __data_size;
* int pc;
*
* if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
* &ftrace_file->flags))
* return;
*
* local_save_flags(irq_flags);
* pc = preempt_count();
*
* __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
*
* event = trace_current_buffer_lock_reserve(&buffer,
* event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
* event_<call>->event.type,
* sizeof(*entry) + __data_size,
* irq_flags, pc);
......@@ -440,7 +434,7 @@ static inline notrace int ftrace_get_offsets_##call( \
* __array macros.
*
* if (!filter_current_check_discard(buffer, event_call, entry, event))
* trace_current_buffer_unlock_commit(buffer,
* trace_nowake_buffer_unlock_commit(buffer,
* event, irq_flags, pc);
* }
*
......@@ -518,7 +512,8 @@ static inline notrace int ftrace_get_offsets_##call( \
static notrace void \
ftrace_raw_event_##call(void *__data, proto) \
{ \
struct ftrace_event_call *event_call = __data; \
struct ftrace_event_file *ftrace_file = __data; \
struct ftrace_event_call *event_call = ftrace_file->event_call; \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
struct ring_buffer_event *event; \
struct ftrace_raw_##call *entry; \
......@@ -527,12 +522,16 @@ ftrace_raw_event_##call(void *__data, proto) \
int __data_size; \
int pc; \
\
if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, \
&ftrace_file->flags)) \
return; \
\
local_save_flags(irq_flags); \
pc = preempt_count(); \
\
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
\
event = trace_current_buffer_lock_reserve(&buffer, \
event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, \
event_call->event.type, \
sizeof(*entry) + __data_size, \
irq_flags, pc); \
......@@ -581,7 +580,7 @@ static inline void ftrace_test_probe_##call(void) \
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
_TRACE_PERF_PROTO(call, PARAMS(proto)); \
static const char print_fmt_##call[] = print; \
static struct ftrace_event_class __used event_class_##call = { \
static struct ftrace_event_class __used __refdata event_class_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
.fields = LIST_HEAD_INIT(event_class_##call.fields),\
......@@ -705,5 +704,3 @@ static inline void perf_test_probe_##call(void) \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#endif /* CONFIG_PERF_EVENTS */
#undef _TRACE_PROFILE_INIT
......@@ -176,6 +176,8 @@ config IRQSOFF_TRACER
select GENERIC_TRACER
select TRACER_MAX_TRACE
select RING_BUFFER_ALLOW_SWAP
select TRACER_SNAPSHOT
select TRACER_SNAPSHOT_PER_CPU_SWAP
help
This option measures the time spent in irqs-off critical
sections, with microsecond accuracy.
......@@ -198,6 +200,8 @@ config PREEMPT_TRACER
select GENERIC_TRACER
select TRACER_MAX_TRACE
select RING_BUFFER_ALLOW_SWAP
select TRACER_SNAPSHOT
select TRACER_SNAPSHOT_PER_CPU_SWAP
help
This option measures the time spent in preemption-off critical
sections, with microsecond accuracy.
......@@ -217,6 +221,7 @@ config SCHED_TRACER
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
select TRACER_MAX_TRACE
select TRACER_SNAPSHOT
help
This tracer tracks the latency of the highest priority task
to be scheduled in, starting from the point it has woken up.
......@@ -248,6 +253,27 @@ config TRACER_SNAPSHOT
echo 1 > /sys/kernel/debug/tracing/snapshot
cat snapshot
config TRACER_SNAPSHOT_PER_CPU_SWAP
bool "Allow snapshot to swap per CPU"
depends on TRACER_SNAPSHOT
select RING_BUFFER_ALLOW_SWAP
help
Allow doing a snapshot of a single CPU buffer instead of a
full swap (all buffers). If this is set, then the following is
allowed:
echo 1 > /sys/kernel/debug/tracing/per_cpu/cpu2/snapshot
After which, only the tracing buffer for CPU 2 was swapped with
the main tracing buffer, and the other CPU buffers remain the same.
When this is enabled, this adds a little more overhead to the
trace recording, as it needs to add some checks to synchronize
recording with swaps. But this does not affect the performance
of the overall system. This is enabled by default when the preempt
or irq latency tracers are enabled, as those need to swap as well
and already adds the overhead (plus a lot more).
config TRACE_BRANCH_PROFILING
bool
select GENERIC_TRACER
......@@ -524,6 +550,29 @@ config RING_BUFFER_BENCHMARK
If unsure, say N.
config RING_BUFFER_STARTUP_TEST
bool "Ring buffer startup self test"
depends on RING_BUFFER
help
Run a simple self test on the ring buffer on boot up. Late in the
kernel boot sequence, the test will start that kicks off
a thread per cpu. Each thread will write various size events
into the ring buffer. Another thread is created to send IPIs
to each of the threads, where the IPI handler will also write
to the ring buffer, to test/stress the nesting ability.
If any anomalies are discovered, a warning will be displayed
and all ring buffers will be disabled.
The test runs for 10 seconds. This will slow your boot time
by at least 10 more seconds.
At the end of the test, statics and more checks are done.
It will output the stats of each per cpu buffer. What
was written, the sizes, what was read, what was lost, and
other similar details.
If unsure, say N
endif # FTRACE
endif # TRACING_SUPPORT
......
......@@ -72,7 +72,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
bool blk_tracer = blk_tracer_enabled;
if (blk_tracer) {
buffer = blk_tr->buffer;
buffer = blk_tr->trace_buffer.buffer;
pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
sizeof(*t) + len,
......@@ -218,7 +218,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
if (blk_tracer) {
tracing_record_cmdline(current);
buffer = blk_tr->buffer;
buffer = blk_tr->trace_buffer.buffer;
pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
sizeof(*t) + pdu_len,
......
......@@ -486,7 +486,6 @@ struct ftrace_profile_stat {
#define PROFILES_PER_PAGE \
(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
static int ftrace_profile_bits __read_mostly;
static int ftrace_profile_enabled __read_mostly;
/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
......@@ -494,7 +493,8 @@ static DEFINE_MUTEX(ftrace_profile_lock);
static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
#define FTRACE_PROFILE_HASH_BITS 10
#define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS)
static void *
function_stat_next(void *v, int idx)
......@@ -676,7 +676,7 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
for (i = 0; i < pages; i++) {
for (i = 1; i < pages; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
if (!pg->next)
goto out_free;
......@@ -724,13 +724,6 @@ static int ftrace_profile_init_cpu(int cpu)
if (!stat->hash)
return -ENOMEM;
if (!ftrace_profile_bits) {
size--;
for (; size; size >>= 1)
ftrace_profile_bits++;
}
/* Preallocate the function profiling pages */
if (ftrace_profile_pages_init(stat) < 0) {
kfree(stat->hash);
......@@ -763,7 +756,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
struct hlist_head *hhd;
unsigned long key;
key = hash_long(ip, ftrace_profile_bits);
key = hash_long(ip, FTRACE_PROFILE_HASH_BITS);
hhd = &stat->hash[key];
if (hlist_empty(hhd))
......@@ -782,7 +775,7 @@ static void ftrace_add_profile(struct ftrace_profile_stat *stat,
{
unsigned long key;
key = hash_long(rec->ip, ftrace_profile_bits);
key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS);
hlist_add_head_rcu(&rec->node, &stat->hash[key]);
}
......@@ -1079,7 +1072,7 @@ struct ftrace_func_probe {
unsigned long flags;
unsigned long ip;
void *data;
struct rcu_head rcu;
struct list_head free_list;
};
struct ftrace_func_entry {
......@@ -1329,7 +1322,6 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
struct hlist_head *hhd;
struct ftrace_hash *old_hash;
struct ftrace_hash *new_hash;
unsigned long key;
int size = src->count;
int bits = 0;
int ret;
......@@ -1372,10 +1364,6 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
for (i = 0; i < size; i++) {
hhd = &src->buckets[i];
hlist_for_each_entry_safe(entry, tn, hhd, hlist) {
if (bits > 0)
key = hash_long(entry->ip, bits);
else
key = 0;
remove_hash_entry(src, entry);
__add_hash_entry(new_hash, entry);
}
......@@ -2973,28 +2961,27 @@ static void __disable_ftrace_function_probe(void)
}
static void ftrace_free_entry_rcu(struct rcu_head *rhp)
static void ftrace_free_entry(struct ftrace_func_probe *entry)
{
struct ftrace_func_probe *entry =
container_of(rhp, struct ftrace_func_probe, rcu);
if (entry->ops->free)
entry->ops->free(&entry->data);
entry->ops->free(entry->ops, entry->ip, &entry->data);
kfree(entry);
}
int
register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
void *data)
{
struct ftrace_func_probe *entry;
struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
struct ftrace_hash *hash;
struct ftrace_page *pg;
struct dyn_ftrace *rec;
int type, len, not;
unsigned long key;
int count = 0;
char *search;
int ret;
type = filter_parse_regex(glob, strlen(glob), &search, &not);
len = strlen(search);
......@@ -3005,8 +2992,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
mutex_lock(&ftrace_lock);
if (unlikely(ftrace_disabled))
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
if (!hash) {
count = -ENOMEM;
goto out_unlock;
}
if (unlikely(ftrace_disabled)) {
count = -ENODEV;
goto out_unlock;
}
do_for_each_ftrace_rec(pg, rec) {
......@@ -3030,14 +3025,21 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
* for each function we find. We call the callback
* to give the caller an opportunity to do so.
*/
if (ops->callback) {
if (ops->callback(rec->ip, &entry->data) < 0) {
if (ops->init) {
if (ops->init(ops, rec->ip, &entry->data) < 0) {
/* caller does not like this func */
kfree(entry);
continue;
}
}
ret = enter_record(hash, rec, 0);
if (ret < 0) {
kfree(entry);
count = ret;
goto out_unlock;
}
entry->ops = ops;
entry->ip = rec->ip;
......@@ -3045,10 +3047,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
} while_for_each_ftrace_rec();
ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
if (ret < 0)
count = ret;
__enable_ftrace_function_probe();
out_unlock:
mutex_unlock(&ftrace_lock);
free_ftrace_hash(hash);
return count;
}
......@@ -3062,7 +3070,12 @@ static void
__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
void *data, int flags)
{
struct ftrace_func_entry *rec_entry;
struct ftrace_func_probe *entry;
struct ftrace_func_probe *p;
struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
struct list_head free_list;
struct ftrace_hash *hash;
struct hlist_node *tmp;
char str[KSYM_SYMBOL_LEN];
int type = MATCH_FULL;
......@@ -3083,6 +3096,14 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
}
mutex_lock(&ftrace_lock);
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
if (!hash)
/* Hmm, should report this somehow */
goto out_unlock;
INIT_LIST_HEAD(&free_list);
for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
struct hlist_head *hhd = &ftrace_func_hash[i];
......@@ -3103,12 +3124,30 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
continue;
}
rec_entry = ftrace_lookup_ip(hash, entry->ip);
/* It is possible more than one entry had this ip */
if (rec_entry)
free_hash_entry(hash, rec_entry);
hlist_del_rcu(&entry->node);
call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu);
list_add(&entry->free_list, &free_list);
}
}
__disable_ftrace_function_probe();
/*
* Remove after the disable is called. Otherwise, if the last
* probe is removed, a null hash means *all enabled*.
*/
ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
synchronize_sched();
list_for_each_entry_safe(entry, p, &free_list, free_list) {
list_del(&entry->free_list);
ftrace_free_entry(entry);
}
out_unlock:
mutex_unlock(&ftrace_lock);
free_ftrace_hash(hash);
}
void
......@@ -3736,7 +3775,8 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
if (fail)
return -EINVAL;
ftrace_graph_filter_enabled = 1;
ftrace_graph_filter_enabled = !!(*idx);
return 0;
}
......
......@@ -8,13 +8,16 @@
#include <linux/trace_clock.h>
#include <linux/trace_seq.h>
#include <linux/spinlock.h>
#include <linux/irq_work.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kthread.h> /* for self test */
#include <linux/kmemcheck.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/hash.h>
......@@ -444,6 +447,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
return ret;
}
struct rb_irq_work {
struct irq_work work;
wait_queue_head_t waiters;
bool waiters_pending;
};
/*
* head_page == tail_page && head == tail then buffer is empty.
*/
......@@ -478,6 +487,8 @@ struct ring_buffer_per_cpu {
struct list_head new_pages; /* new pages to add */
struct work_struct update_pages_work;
struct completion update_done;
struct rb_irq_work irq_work;
};
struct ring_buffer {
......@@ -497,6 +508,8 @@ struct ring_buffer {
struct notifier_block cpu_notify;
#endif
u64 (*clock)(void);
struct rb_irq_work irq_work;
};
struct ring_buffer_iter {
......@@ -508,6 +521,118 @@ struct ring_buffer_iter {
u64 read_stamp;
};
/*
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
*
* Schedules a delayed work to wake up any task that is blocked on the
* ring buffer waiters queue.
*/
static void rb_wake_up_waiters(struct irq_work *work)
{
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
wake_up_all(&rbwork->waiters);
}
/**
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
* @cpu: the cpu buffer to wait on
*
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*/
void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait);
struct rb_irq_work *work;
/*
* Depending on what the caller is waiting for, either any
* data in any cpu buffer, or a specific buffer, put the
* caller on the appropriate wait queue.
*/
if (cpu == RING_BUFFER_ALL_CPUS)
work = &buffer->irq_work;
else {
cpu_buffer = buffer->buffers[cpu];
work = &cpu_buffer->irq_work;
}
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
/*
* The events can happen in critical sections where
* checking a work queue can cause deadlocks.
* After adding a task to the queue, this flag is set
* only to notify events to try to wake up the queue
* using irq_work.
*
* We don't clear it even if the buffer is no longer
* empty. The flag only causes the next event to run
* irq_work to do the work queue wake up. The worse
* that can happen if we race with !trace_empty() is that
* an event will cause an irq_work to try to wake up
* an empty queue.
*
* There's no reason to protect this flag either, as
* the work queue and irq_work logic will do the necessary
* synchronization for the wake ups. The only thing
* that is necessary is that the wake up happens after
* a task has been queued. It's OK for spurious wake ups.
*/
work->waiters_pending = true;
if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
schedule();
finish_wait(&work->waiters, &wait);
}
/**
* ring_buffer_poll_wait - poll on buffer input
* @buffer: buffer to wait on
* @cpu: the cpu buffer to wait on
* @filp: the file descriptor
* @poll_table: The poll descriptor
*
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*
* Returns POLLIN | POLLRDNORM if data exists in the buffers,
* zero otherwise.
*/
int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *work;
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return POLLIN | POLLRDNORM;
if (cpu == RING_BUFFER_ALL_CPUS)
work = &buffer->irq_work;
else {
cpu_buffer = buffer->buffers[cpu];
work = &cpu_buffer->irq_work;
}
work->waiters_pending = true;
poll_wait(filp, &work->waiters, poll_table);
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return POLLIN | POLLRDNORM;
return 0;
}
/* buffer may be either ring_buffer or ring_buffer_per_cpu */
#define RB_WARN_ON(b, cond) \
({ \
......@@ -1063,6 +1188,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
init_completion(&cpu_buffer->update_done);
init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
init_waitqueue_head(&cpu_buffer->irq_work.waiters);
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
......@@ -1158,6 +1285,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
buffer->clock = trace_clock_local;
buffer->reader_lock_key = key;
init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
init_waitqueue_head(&buffer->irq_work.waiters);
/* need at least two pages */
if (nr_pages < 2)
nr_pages = 2;
......@@ -1553,11 +1683,22 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
if (!cpu_buffer->nr_pages_to_update)
continue;
if (cpu_online(cpu))
/* The update must run on the CPU that is being updated. */
preempt_disable();
if (cpu == smp_processor_id() || !cpu_online(cpu)) {
rb_update_pages(cpu_buffer);
cpu_buffer->nr_pages_to_update = 0;
} else {
/*
* Can not disable preemption for schedule_work_on()
* on PREEMPT_RT.
*/
preempt_enable();
schedule_work_on(cpu,
&cpu_buffer->update_pages_work);
else
rb_update_pages(cpu_buffer);
preempt_disable();
}
preempt_enable();
}
/* wait for all the updates to complete */
......@@ -1595,12 +1736,22 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
get_online_cpus();
if (cpu_online(cpu_id)) {
preempt_disable();
/* The update must run on the CPU that is being updated. */
if (cpu_id == smp_processor_id() || !cpu_online(cpu_id))
rb_update_pages(cpu_buffer);
else {
/*
* Can not disable preemption for schedule_work_on()
* on PREEMPT_RT.
*/
preempt_enable();
schedule_work_on(cpu_id,
&cpu_buffer->update_pages_work);
wait_for_completion(&cpu_buffer->update_done);
} else
rb_update_pages(cpu_buffer);
preempt_disable();
}
preempt_enable();
cpu_buffer->nr_pages_to_update = 0;
put_online_cpus();
......@@ -2612,6 +2763,22 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
rb_end_commit(cpu_buffer);
}
static __always_inline void
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&buffer->irq_work.work);
}
if (cpu_buffer->irq_work.waiters_pending) {
cpu_buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
}
/**
* ring_buffer_unlock_commit - commit a reserved
* @buffer: The buffer to commit to
......@@ -2631,6 +2798,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
rb_commit(cpu_buffer, event);
rb_wakeups(buffer, cpu_buffer);
trace_recursive_unlock();
preempt_enable_notrace();
......@@ -2803,6 +2972,8 @@ int ring_buffer_write(struct ring_buffer *buffer,
rb_commit(cpu_buffer, event);
rb_wakeups(buffer, cpu_buffer);
ret = 0;
out:
preempt_enable_notrace();
......@@ -4467,3 +4638,320 @@ static int rb_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
#endif
#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
/*
* This is a basic integrity check of the ring buffer.
* Late in the boot cycle this test will run when configured in.
* It will kick off a thread per CPU that will go into a loop
* writing to the per cpu ring buffer various sizes of data.
* Some of the data will be large items, some small.
*
* Another thread is created that goes into a spin, sending out
* IPIs to the other CPUs to also write into the ring buffer.
* this is to test the nesting ability of the buffer.
*
* Basic stats are recorded and reported. If something in the
* ring buffer should happen that's not expected, a big warning
* is displayed and all ring buffers are disabled.
*/
static struct task_struct *rb_threads[NR_CPUS] __initdata;
struct rb_test_data {
struct ring_buffer *buffer;
unsigned long events;
unsigned long bytes_written;
unsigned long bytes_alloc;
unsigned long bytes_dropped;
unsigned long events_nested;
unsigned long bytes_written_nested;
unsigned long bytes_alloc_nested;
unsigned long bytes_dropped_nested;
int min_size_nested;
int max_size_nested;
int max_size;
int min_size;
int cpu;
int cnt;
};
static struct rb_test_data rb_data[NR_CPUS] __initdata;
/* 1 meg per cpu */
#define RB_TEST_BUFFER_SIZE 1048576
static char rb_string[] __initdata =
"abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
"?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
"!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
static bool rb_test_started __initdata;
struct rb_item {
int size;
char str[];
};
static __init int rb_write_something(struct rb_test_data *data, bool nested)
{
struct ring_buffer_event *event;
struct rb_item *item;
bool started;
int event_len;
int size;
int len;
int cnt;
/* Have nested writes different that what is written */
cnt = data->cnt + (nested ? 27 : 0);
/* Multiply cnt by ~e, to make some unique increment */
size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
len = size + sizeof(struct rb_item);
started = rb_test_started;
/* read rb_test_started before checking buffer enabled */
smp_rmb();
event = ring_buffer_lock_reserve(data->buffer, len);
if (!event) {
/* Ignore dropped events before test starts. */
if (started) {
if (nested)
data->bytes_dropped += len;
else
data->bytes_dropped_nested += len;
}
return len;
}
event_len = ring_buffer_event_length(event);
if (RB_WARN_ON(data->buffer, event_len < len))
goto out;
item = ring_buffer_event_data(event);
item->size = size;
memcpy(item->str, rb_string, size);
if (nested) {
data->bytes_alloc_nested += event_len;
data->bytes_written_nested += len;
data->events_nested++;
if (!data->min_size_nested || len < data->min_size_nested)
data->min_size_nested = len;
if (len > data->max_size_nested)
data->max_size_nested = len;
} else {
data->bytes_alloc += event_len;
data->bytes_written += len;
data->events++;
if (!data->min_size || len < data->min_size)
data->max_size = len;
if (len > data->max_size)
data->max_size = len;
}
out:
ring_buffer_unlock_commit(data->buffer, event);
return 0;
}
static __init int rb_test(void *arg)
{
struct rb_test_data *data = arg;
while (!kthread_should_stop()) {
rb_write_something(data, false);
data->cnt++;
set_current_state(TASK_INTERRUPTIBLE);
/* Now sleep between a min of 100-300us and a max of 1ms */
usleep_range(((data->cnt % 3) + 1) * 100, 1000);
}
return 0;
}
static __init void rb_ipi(void *ignore)
{
struct rb_test_data *data;
int cpu = smp_processor_id();
data = &rb_data[cpu];
rb_write_something(data, true);
}
static __init int rb_hammer_test(void *arg)
{
while (!kthread_should_stop()) {
/* Send an IPI to all cpus to write data! */
smp_call_function(rb_ipi, NULL, 1);
/* No sleep, but for non preempt, let others run */
schedule();
}
return 0;
}
static __init int test_ringbuffer(void)
{
struct task_struct *rb_hammer;
struct ring_buffer *buffer;
int cpu;
int ret = 0;
pr_info("Running ring buffer tests...\n");
buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
if (WARN_ON(!buffer))
return 0;
/* Disable buffer so that threads can't write to it yet */
ring_buffer_record_off(buffer);
for_each_online_cpu(cpu) {
rb_data[cpu].buffer = buffer;
rb_data[cpu].cpu = cpu;
rb_data[cpu].cnt = cpu;
rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
"rbtester/%d", cpu);
if (WARN_ON(!rb_threads[cpu])) {
pr_cont("FAILED\n");
ret = -1;
goto out_free;
}
kthread_bind(rb_threads[cpu], cpu);
wake_up_process(rb_threads[cpu]);
}
/* Now create the rb hammer! */
rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
if (WARN_ON(!rb_hammer)) {
pr_cont("FAILED\n");
ret = -1;
goto out_free;
}
ring_buffer_record_on(buffer);
/*
* Show buffer is enabled before setting rb_test_started.
* Yes there's a small race window where events could be
* dropped and the thread wont catch it. But when a ring
* buffer gets enabled, there will always be some kind of
* delay before other CPUs see it. Thus, we don't care about
* those dropped events. We care about events dropped after
* the threads see that the buffer is active.
*/
smp_wmb();
rb_test_started = true;
set_current_state(TASK_INTERRUPTIBLE);
/* Just run for 10 seconds */;
schedule_timeout(10 * HZ);
kthread_stop(rb_hammer);
out_free:
for_each_online_cpu(cpu) {
if (!rb_threads[cpu])
break;
kthread_stop(rb_threads[cpu]);
}
if (ret) {
ring_buffer_free(buffer);
return ret;
}
/* Report! */
pr_info("finished\n");
for_each_online_cpu(cpu) {
struct ring_buffer_event *event;
struct rb_test_data *data = &rb_data[cpu];
struct rb_item *item;
unsigned long total_events;
unsigned long total_dropped;
unsigned long total_written;
unsigned long total_alloc;
unsigned long total_read = 0;
unsigned long total_size = 0;
unsigned long total_len = 0;
unsigned long total_lost = 0;
unsigned long lost;
int big_event_size;
int small_event_size;
ret = -1;
total_events = data->events + data->events_nested;
total_written = data->bytes_written + data->bytes_written_nested;
total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
big_event_size = data->max_size + data->max_size_nested;
small_event_size = data->min_size + data->min_size_nested;
pr_info("CPU %d:\n", cpu);
pr_info(" events: %ld\n", total_events);
pr_info(" dropped bytes: %ld\n", total_dropped);
pr_info(" alloced bytes: %ld\n", total_alloc);
pr_info(" written bytes: %ld\n", total_written);
pr_info(" biggest event: %d\n", big_event_size);
pr_info(" smallest event: %d\n", small_event_size);
if (RB_WARN_ON(buffer, total_dropped))
break;
ret = 0;
while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
total_lost += lost;
item = ring_buffer_event_data(event);
total_len += ring_buffer_event_length(event);
total_size += item->size + sizeof(struct rb_item);
if (memcmp(&item->str[0], rb_string, item->size) != 0) {
pr_info("FAILED!\n");
pr_info("buffer had: %.*s\n", item->size, item->str);
pr_info("expected: %.*s\n", item->size, rb_string);
RB_WARN_ON(buffer, 1);
ret = -1;
break;
}
total_read++;
}
if (ret)
break;
ret = -1;
pr_info(" read events: %ld\n", total_read);
pr_info(" lost events: %ld\n", total_lost);
pr_info(" total events: %ld\n", total_lost + total_read);
pr_info(" recorded len bytes: %ld\n", total_len);
pr_info(" recorded size bytes: %ld\n", total_size);
if (total_lost)
pr_info(" With dropped events, record len and size may not match\n"
" alloced and written from above\n");
if (!total_lost) {
if (RB_WARN_ON(buffer, total_len != total_alloc ||
total_size != total_written))
break;
}
if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
break;
ret = 0;
}
if (!ret)
pr_info("Ring buffer PASSED!\n");
ring_buffer_free(buffer);
return 0;
}
late_initcall(test_ringbuffer);
#endif /* CONFIG_RING_BUFFER_STARTUP_TEST */
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -13,6 +13,11 @@
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
#ifdef CONFIG_FTRACE_SYSCALLS
#include <asm/unistd.h> /* For NR_SYSCALLS */
#include <asm/syscall.h> /* some archs define it here */
#endif
enum trace_type {
__TRACE_FIRST_TYPE = 0,
......@@ -29,6 +34,7 @@ enum trace_type {
TRACE_GRAPH_ENT,
TRACE_USER_STACK,
TRACE_BLK,
TRACE_BPUTS,
__TRACE_LAST_TYPE,
};
......@@ -127,12 +133,21 @@ enum trace_flag_type {
#define TRACE_BUF_SIZE 1024
struct trace_array;
struct trace_cpu {
struct trace_array *tr;
struct dentry *dir;
int cpu;
};
/*
* The CPU trace array - it consists of thousands of trace entries
* plus some other descriptor data: (for example which task started
* the trace, etc.)
*/
struct trace_array_cpu {
struct trace_cpu trace_cpu;
atomic_t disabled;
void *buffer_page; /* ring buffer spare */
......@@ -151,20 +166,83 @@ struct trace_array_cpu {
char comm[TASK_COMM_LEN];
};
struct tracer;
struct trace_buffer {
struct trace_array *tr;
struct ring_buffer *buffer;
struct trace_array_cpu __percpu *data;
cycle_t time_start;
int cpu;
};
/*
* The trace array - an array of per-CPU trace arrays. This is the
* highest level data structure that individual tracers deal with.
* They have on/off state as well:
*/
struct trace_array {
struct ring_buffer *buffer;
int cpu;
struct list_head list;
char *name;
struct trace_buffer trace_buffer;
#ifdef CONFIG_TRACER_MAX_TRACE
/*
* The max_buffer is used to snapshot the trace when a maximum
* latency is reached, or when the user initiates a snapshot.
* Some tracers will use this to store a maximum trace while
* it continues examining live traces.
*
* The buffers for the max_buffer are set up the same as the trace_buffer
* When a snapshot is taken, the buffer of the max_buffer is swapped
* with the buffer of the trace_buffer and the buffers are reset for
* the trace_buffer so the tracing can continue.
*/
struct trace_buffer max_buffer;
bool allocated_snapshot;
#endif
int buffer_disabled;
cycle_t time_start;
struct trace_cpu trace_cpu; /* place holder */
#ifdef CONFIG_FTRACE_SYSCALLS
int sys_refcount_enter;
int sys_refcount_exit;
DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
#endif
int stop_count;
int clock_id;
struct tracer *current_trace;
unsigned int flags;
raw_spinlock_t start_lock;
struct dentry *dir;
struct dentry *options;
struct dentry *percpu_dir;
struct dentry *event_dir;
struct list_head systems;
struct list_head events;
struct task_struct *waiter;
struct trace_array_cpu *data[NR_CPUS];
int ref;
};
enum {
TRACE_ARRAY_FL_GLOBAL = (1 << 0)
};
extern struct list_head ftrace_trace_arrays;
/*
* The global tracer (top) should be the first trace array added,
* but we check the flag anyway.
*/
static inline struct trace_array *top_trace_array(void)
{
struct trace_array *tr;
tr = list_entry(ftrace_trace_arrays.prev,
typeof(*tr), list);
WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));
return tr;
}
#define FTRACE_CMP_TYPE(var, type) \
__builtin_types_compatible_p(typeof(var), type *)
......@@ -200,6 +278,7 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
TRACE_MMIO_RW); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
......@@ -289,9 +368,10 @@ struct tracer {
struct tracer *next;
struct tracer_flags *flags;
bool print_max;
bool use_max_tr;
bool allocated_snapshot;
bool enabled;
#ifdef CONFIG_TRACER_MAX_TRACE
bool use_max_tr;
#endif
};
......@@ -427,8 +507,6 @@ static __always_inline void trace_clear_recursion(int bit)
current->trace_recursion = val;
}
#define TRACE_PIPE_ALL_CPU -1
static inline struct ring_buffer_iter *
trace_buffer_iter(struct trace_iterator *iter, int cpu)
{
......@@ -439,10 +517,10 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu)
int tracer_init(struct tracer *t, struct trace_array *tr);
int tracing_is_enabled(void);
void tracing_reset(struct trace_array *tr, int cpu);
void tracing_reset_online_cpus(struct trace_array *tr);
void tracing_reset(struct trace_buffer *buf, int cpu);
void tracing_reset_online_cpus(struct trace_buffer *buf);
void tracing_reset_current(int cpu);
void tracing_reset_current_online_cpus(void);
void tracing_reset_all_online_cpus(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
struct dentry *trace_create_file(const char *name,
umode_t mode,
......@@ -450,6 +528,7 @@ struct dentry *trace_create_file(const char *name,
void *data,
const struct file_operations *fops);
struct dentry *tracing_init_dentry_tr(struct trace_array *tr);
struct dentry *tracing_init_dentry(void);
struct ring_buffer_event;
......@@ -583,7 +662,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2
extern int DYN_FTRACE_TEST_NAME2(void);
extern int ring_buffer_expanded;
extern bool ring_buffer_expanded;
extern bool tracing_selftest_disabled;
DECLARE_PER_CPU(int, ftrace_cpu_disabled);
......@@ -619,6 +698,8 @@ trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args);
int trace_array_printk(struct trace_array *tr,
unsigned long ip, const char *fmt, ...);
int trace_array_printk_buf(struct ring_buffer *buffer,
unsigned long ip, const char *fmt, ...);
void trace_printk_seq(struct trace_seq *s);
enum print_line_t print_trace_line(struct trace_iterator *iter);
......@@ -786,6 +867,7 @@ enum trace_iterator_flags {
TRACE_ITER_STOP_ON_FREE = 0x400000,
TRACE_ITER_IRQ_INFO = 0x800000,
TRACE_ITER_MARKERS = 0x1000000,
TRACE_ITER_FUNCTION = 0x2000000,
};
/*
......@@ -832,8 +914,8 @@ enum {
struct ftrace_event_field {
struct list_head link;
char *name;
char *type;
const char *name;
const char *type;
int filter_type;
int offset;
int size;
......@@ -851,12 +933,19 @@ struct event_filter {
struct event_subsystem {
struct list_head list;
const char *name;
struct dentry *entry;
struct event_filter *filter;
int nr_events;
int ref_count;
};
struct ftrace_subsystem_dir {
struct list_head list;
struct event_subsystem *subsystem;
struct trace_array *tr;
struct dentry *entry;
int ref_count;
int nr_events;
};
#define FILTER_PRED_INVALID ((unsigned short)-1)
#define FILTER_PRED_IS_RIGHT (1 << 15)
#define FILTER_PRED_FOLD (1 << 15)
......@@ -906,22 +995,20 @@ struct filter_pred {
unsigned short right;
};
extern struct list_head ftrace_common_fields;
extern enum regex_type
filter_parse_regex(char *buff, int len, char **search, int *not);
extern void print_event_filter(struct ftrace_event_call *call,
struct trace_seq *s);
extern int apply_event_filter(struct ftrace_event_call *call,
char *filter_string);
extern int apply_subsystem_event_filter(struct event_subsystem *system,
extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
char *filter_string);
extern void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s);
extern int filter_assign_type(const char *type);
struct list_head *
trace_get_fields(struct ftrace_event_call *event_call);
struct ftrace_event_field *
trace_find_event_field(struct ftrace_event_call *call, char *name);
static inline int
filter_check_discard(struct ftrace_event_call *call, void *rec,
......@@ -938,6 +1025,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
}
extern void trace_event_enable_cmd_record(bool enable);
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);
extern struct mutex event_mutex;
extern struct list_head ftrace_events;
......@@ -948,7 +1037,18 @@ extern const char *__stop___trace_bprintk_fmt[];
void trace_printk_init_buffers(void);
void trace_printk_start_comm(void);
int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
int set_tracer_flag(unsigned int mask, int enabled);
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
/*
* Normal trace_printk() and friends allocates special buffers
* to do the manipulation, as well as saves the print formats
* into sections to display. But the trace infrastructure wants
* to use these without the added overhead at the price of being
* a bit slower (used mainly for warnings, where we don't care
* about performance). The internal_trace_puts() is for such
* a purpose.
*/
#define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str))
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
......
......@@ -32,6 +32,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
{
struct ftrace_event_call *call = &event_branch;
struct trace_array *tr = branch_tracer;
struct trace_array_cpu *data;
struct ring_buffer_event *event;
struct trace_branch *entry;
struct ring_buffer *buffer;
......@@ -51,11 +52,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
local_irq_save(flags);
cpu = raw_smp_processor_id();
if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
if (atomic_inc_return(&data->disabled) != 1)
goto out;
pc = preempt_count();
buffer = tr->buffer;
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH,
sizeof(*entry), flags, pc);
if (!event)
......@@ -80,7 +82,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
__buffer_unlock_commit(buffer, event);
out:
atomic_dec(&tr->data[cpu]->disabled);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
......
......@@ -57,6 +57,16 @@ u64 notrace trace_clock(void)
return local_clock();
}
/*
* trace_jiffy_clock(): Simply use jiffies as a clock counter.
*/
u64 notrace trace_clock_jiffies(void)
{
u64 jiffy = jiffies - INITIAL_JIFFIES;
/* Return nsecs */
return (u64)jiffies_to_usecs(jiffy) * 1000ULL;
}
/*
* trace_clock_global(): special globally coherent trace clock
......
......@@ -223,8 +223,8 @@ FTRACE_ENTRY(bprint, bprint_entry,
__dynamic_array( u32, buf )
),
F_printk("%08lx fmt:%p",
__entry->ip, __entry->fmt),
F_printk("%pf: %s",
(void *)__entry->ip, __entry->fmt),
FILTER_OTHER
);
......@@ -238,8 +238,23 @@ FTRACE_ENTRY(print, print_entry,
__dynamic_array( char, buf )
),
F_printk("%08lx %s",
__entry->ip, __entry->buf),
F_printk("%pf: %s",
(void *)__entry->ip, __entry->buf),
FILTER_OTHER
);
FTRACE_ENTRY(bputs, bputs_entry,
TRACE_BPUTS,
F_STRUCT(
__field( unsigned long, ip )
__field( const char *, str )
),
F_printk("%pf: %s",
(void *)__entry->ip, __entry->str),
FILTER_OTHER
);
......
......@@ -34,9 +34,27 @@ char event_storage[EVENT_STORAGE_SIZE];
EXPORT_SYMBOL_GPL(event_storage);
LIST_HEAD(ftrace_events);
LIST_HEAD(ftrace_common_fields);
static LIST_HEAD(ftrace_common_fields);
struct list_head *
#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
static struct kmem_cache *field_cachep;
static struct kmem_cache *file_cachep;
/* Double loops, do not use break, only goto's work */
#define do_for_each_event_file(tr, file) \
list_for_each_entry(tr, &ftrace_trace_arrays, list) { \
list_for_each_entry(file, &tr->events, list)
#define do_for_each_event_file_safe(tr, file) \
list_for_each_entry(tr, &ftrace_trace_arrays, list) { \
struct ftrace_event_file *___n; \
list_for_each_entry_safe(file, ___n, &tr->events, list)
#define while_for_each_event_file() \
}
static struct list_head *
trace_get_fields(struct ftrace_event_call *event_call)
{
if (!event_call->class->get_fields)
......@@ -44,23 +62,45 @@ trace_get_fields(struct ftrace_event_call *event_call)
return event_call->class->get_fields(event_call);
}
static struct ftrace_event_field *
__find_event_field(struct list_head *head, char *name)
{
struct ftrace_event_field *field;
list_for_each_entry(field, head, link) {
if (!strcmp(field->name, name))
return field;
}
return NULL;
}
struct ftrace_event_field *
trace_find_event_field(struct ftrace_event_call *call, char *name)
{
struct ftrace_event_field *field;
struct list_head *head;
field = __find_event_field(&ftrace_common_fields, name);
if (field)
return field;
head = trace_get_fields(call);
return __find_event_field(head, name);
}
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
int is_signed, int filter_type)
{
struct ftrace_event_field *field;
field = kzalloc(sizeof(*field), GFP_KERNEL);
field = kmem_cache_alloc(field_cachep, GFP_TRACE);
if (!field)
goto err;
field->name = kstrdup(name, GFP_KERNEL);
if (!field->name)
goto err;
field->type = kstrdup(type, GFP_KERNEL);
if (!field->type)
goto err;
field->name = name;
field->type = type;
if (filter_type == FILTER_OTHER)
field->filter_type = filter_assign_type(type);
......@@ -76,9 +116,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
return 0;
err:
if (field)
kfree(field->name);
kfree(field);
kmem_cache_free(field_cachep, field);
return -ENOMEM;
}
......@@ -120,7 +158,7 @@ static int trace_define_common_fields(void)
return ret;
}
void trace_destroy_fields(struct ftrace_event_call *call)
static void trace_destroy_fields(struct ftrace_event_call *call)
{
struct ftrace_event_field *field, *next;
struct list_head *head;
......@@ -128,9 +166,7 @@ void trace_destroy_fields(struct ftrace_event_call *call)
head = trace_get_fields(call);
list_for_each_entry_safe(field, next, head, link) {
list_del(&field->link);
kfree(field->type);
kfree(field->name);
kfree(field);
kmem_cache_free(field_cachep, field);
}
}
......@@ -149,15 +185,17 @@ EXPORT_SYMBOL_GPL(trace_event_raw_init);
int ftrace_event_reg(struct ftrace_event_call *call,
enum trace_reg type, void *data)
{
struct ftrace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
return tracepoint_probe_register(call->name,
call->class->probe,
call);
file);
case TRACE_REG_UNREGISTER:
tracepoint_probe_unregister(call->name,
call->class->probe,
call);
file);
return 0;
#ifdef CONFIG_PERF_EVENTS
......@@ -183,54 +221,100 @@ EXPORT_SYMBOL_GPL(ftrace_event_reg);
void trace_event_enable_cmd_record(bool enable)
{
struct ftrace_event_call *call;
struct ftrace_event_file *file;
struct trace_array *tr;
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
if (!(call->flags & TRACE_EVENT_FL_ENABLED))
do_for_each_event_file(tr, file) {
if (!(file->flags & FTRACE_EVENT_FL_ENABLED))
continue;
if (enable) {
tracing_start_cmdline_record();
call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
} else {
tracing_stop_cmdline_record();
call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
}
clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
}
} while_for_each_event_file();
mutex_unlock(&event_mutex);
}
static int ftrace_event_enable_disable(struct ftrace_event_call *call,
int enable)
static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
int enable, int soft_disable)
{
struct ftrace_event_call *call = file->event_call;
int ret = 0;
int disable;
switch (enable) {
case 0:
if (call->flags & TRACE_EVENT_FL_ENABLED) {
call->flags &= ~TRACE_EVENT_FL_ENABLED;
if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
/*
* When soft_disable is set and enable is cleared, we want
* to clear the SOFT_DISABLED flag but leave the event in the
* state that it was. That is, if the event was enabled and
* SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
* is set we do not want the event to be enabled before we
* clear the bit.
*
* When soft_disable is not set but the SOFT_MODE flag is,
* we do nothing. Do not disable the tracepoint, otherwise
* "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
*/
if (soft_disable) {
disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED;
clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
} else
disable = !(file->flags & FTRACE_EVENT_FL_SOFT_MODE);
if (disable && (file->flags & FTRACE_EVENT_FL_ENABLED)) {
clear_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags);
if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) {
tracing_stop_cmdline_record();
call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
}
call->class->reg(call, TRACE_REG_UNREGISTER, NULL);
call->class->reg(call, TRACE_REG_UNREGISTER, file);
}
/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */
if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
break;
case 1:
if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
/*
* When soft_disable is set and enable is set, we want to
* register the tracepoint for the event, but leave the event
* as is. That means, if the event was already enabled, we do
* nothing (but set SOFT_MODE). If the event is disabled, we
* set SOFT_DISABLED before enabling the event tracepoint, so
* it still seems to be disabled.
*/
if (!soft_disable)
clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
else
set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) {
/* Keep the event disabled, when going to SOFT_MODE. */
if (soft_disable)
set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
if (trace_flags & TRACE_ITER_RECORD_CMD) {
tracing_start_cmdline_record();
call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
}
ret = call->class->reg(call, TRACE_REG_REGISTER, NULL);
ret = call->class->reg(call, TRACE_REG_REGISTER, file);
if (ret) {
tracing_stop_cmdline_record();
pr_info("event trace: Could not enable event "
"%s\n", call->name);
break;
}
call->flags |= TRACE_EVENT_FL_ENABLED;
set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags);
/* WAS_ENABLED gets set but never cleared. */
call->flags |= TRACE_EVENT_FL_WAS_ENABLED;
}
break;
}
......@@ -238,13 +322,19 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
return ret;
}
static void ftrace_clear_events(void)
static int ftrace_event_enable_disable(struct ftrace_event_file *file,
int enable)
{
return __ftrace_event_enable_disable(file, enable, 0);
}
static void ftrace_clear_events(struct trace_array *tr)
{
struct ftrace_event_call *call;
struct ftrace_event_file *file;
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
ftrace_event_enable_disable(call, 0);
list_for_each_entry(file, &tr->events, list) {
ftrace_event_enable_disable(file, 0);
}
mutex_unlock(&event_mutex);
}
......@@ -257,11 +347,12 @@ static void __put_system(struct event_subsystem *system)
if (--system->ref_count)
return;
list_del(&system->list);
if (filter) {
kfree(filter->filter_string);
kfree(filter);
}
kfree(system->name);
kfree(system);
}
......@@ -271,24 +362,45 @@ static void __get_system(struct event_subsystem *system)
system->ref_count++;
}
static void put_system(struct event_subsystem *system)
static void __get_system_dir(struct ftrace_subsystem_dir *dir)
{
WARN_ON_ONCE(dir->ref_count == 0);
dir->ref_count++;
__get_system(dir->subsystem);
}
static void __put_system_dir(struct ftrace_subsystem_dir *dir)
{
WARN_ON_ONCE(dir->ref_count == 0);
/* If the subsystem is about to be freed, the dir must be too */
WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1);
__put_system(dir->subsystem);
if (!--dir->ref_count)
kfree(dir);
}
static void put_system(struct ftrace_subsystem_dir *dir)
{
mutex_lock(&event_mutex);
__put_system(system);
__put_system_dir(dir);
mutex_unlock(&event_mutex);
}
/*
* __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
*/
static int __ftrace_set_clr_event(const char *match, const char *sub,
const char *event, int set)
static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
const char *sub, const char *event, int set)
{
struct ftrace_event_file *file;
struct ftrace_event_call *call;
int ret = -EINVAL;
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
if (!call->name || !call->class || !call->class->reg)
continue;
......@@ -307,7 +419,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
if (event && strcmp(event, call->name) != 0)
continue;
ftrace_event_enable_disable(call, set);
ftrace_event_enable_disable(file, set);
ret = 0;
}
......@@ -316,7 +428,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
return ret;
}
static int ftrace_set_clr_event(char *buf, int set)
static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
{
char *event = NULL, *sub = NULL, *match;
......@@ -344,7 +456,7 @@ static int ftrace_set_clr_event(char *buf, int set)
event = NULL;
}
return __ftrace_set_clr_event(match, sub, event, set);
return __ftrace_set_clr_event(tr, match, sub, event, set);
}
/**
......@@ -361,7 +473,9 @@ static int ftrace_set_clr_event(char *buf, int set)
*/
int trace_set_clr_event(const char *system, const char *event, int set)
{
return __ftrace_set_clr_event(NULL, system, event, set);
struct trace_array *tr = top_trace_array();
return __ftrace_set_clr_event(tr, NULL, system, event, set);
}
EXPORT_SYMBOL_GPL(trace_set_clr_event);
......@@ -373,6 +487,8 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_parser parser;
struct seq_file *m = file->private_data;
struct trace_array *tr = m->private;
ssize_t read, ret;
if (!cnt)
......@@ -395,7 +511,7 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
parser.buffer[parser.idx] = 0;
ret = ftrace_set_clr_event(parser.buffer + !set, set);
ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
if (ret)
goto out_put;
}
......@@ -411,17 +527,20 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
struct ftrace_event_call *call = v;
struct ftrace_event_file *file = v;
struct ftrace_event_call *call;
struct trace_array *tr = m->private;
(*pos)++;
list_for_each_entry_continue(call, &ftrace_events, list) {
list_for_each_entry_continue(file, &tr->events, list) {
call = file->event_call;
/*
* The ftrace subsystem is for showing formats only.
* They can not be enabled or disabled via the event files.
*/
if (call->class && call->class->reg)
return call;
return file;
}
return NULL;
......@@ -429,30 +548,32 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
static void *t_start(struct seq_file *m, loff_t *pos)
{
struct ftrace_event_call *call;
struct ftrace_event_file *file;
struct trace_array *tr = m->private;
loff_t l;
mutex_lock(&event_mutex);
call = list_entry(&ftrace_events, struct ftrace_event_call, list);
file = list_entry(&tr->events, struct ftrace_event_file, list);
for (l = 0; l <= *pos; ) {
call = t_next(m, call, &l);
if (!call)
file = t_next(m, file, &l);
if (!file)
break;
}
return call;
return file;
}
static void *
s_next(struct seq_file *m, void *v, loff_t *pos)
{
struct ftrace_event_call *call = v;
struct ftrace_event_file *file = v;
struct trace_array *tr = m->private;
(*pos)++;
list_for_each_entry_continue(call, &ftrace_events, list) {
if (call->flags & TRACE_EVENT_FL_ENABLED)
return call;
list_for_each_entry_continue(file, &tr->events, list) {
if (file->flags & FTRACE_EVENT_FL_ENABLED)
return file;
}
return NULL;
......@@ -460,23 +581,25 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
static void *s_start(struct seq_file *m, loff_t *pos)
{
struct ftrace_event_call *call;
struct ftrace_event_file *file;
struct trace_array *tr = m->private;
loff_t l;
mutex_lock(&event_mutex);
call = list_entry(&ftrace_events, struct ftrace_event_call, list);
file = list_entry(&tr->events, struct ftrace_event_file, list);
for (l = 0; l <= *pos; ) {
call = s_next(m, call, &l);
if (!call)
file = s_next(m, file, &l);
if (!file)
break;
}
return call;
return file;
}
static int t_show(struct seq_file *m, void *v)
{
struct ftrace_event_call *call = v;
struct ftrace_event_file *file = v;
struct ftrace_event_call *call = file->event_call;
if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
seq_printf(m, "%s:", call->class->system);
......@@ -494,25 +617,31 @@ static ssize_t
event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct ftrace_event_call *call = filp->private_data;
struct ftrace_event_file *file = filp->private_data;
char *buf;
if (call->flags & TRACE_EVENT_FL_ENABLED)
buf = "1\n";
if (file->flags & FTRACE_EVENT_FL_ENABLED) {
if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)
buf = "0*\n";
else
buf = "1\n";
} else
buf = "0\n";
return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
}
static ssize_t
event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct ftrace_event_call *call = filp->private_data;
struct ftrace_event_file *file = filp->private_data;
unsigned long val;
int ret;
if (!file)
return -EINVAL;
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
if (ret)
return ret;
......@@ -525,7 +654,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
case 0:
case 1:
mutex_lock(&event_mutex);
ret = ftrace_event_enable_disable(call, val);
ret = ftrace_event_enable_disable(file, val);
mutex_unlock(&event_mutex);
break;
......@@ -543,14 +672,18 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
const char set_to_char[4] = { '?', '0', '1', 'X' };
struct event_subsystem *system = filp->private_data;
struct ftrace_subsystem_dir *dir = filp->private_data;
struct event_subsystem *system = dir->subsystem;
struct ftrace_event_call *call;
struct ftrace_event_file *file;
struct trace_array *tr = dir->tr;
char buf[2];
int set = 0;
int ret;
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
if (!call->name || !call->class || !call->class->reg)
continue;
......@@ -562,7 +695,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
* or if all events or cleared, or if we have
* a mixture.
*/
set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED));
set |= (1 << !!(file->flags & FTRACE_EVENT_FL_ENABLED));
/*
* If we have a mixture, no need to look further.
......@@ -584,7 +717,8 @@ static ssize_t
system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct event_subsystem *system = filp->private_data;
struct ftrace_subsystem_dir *dir = filp->private_data;
struct event_subsystem *system = dir->subsystem;
const char *name = NULL;
unsigned long val;
ssize_t ret;
......@@ -607,7 +741,7 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (system)
name = system->name;
ret = __ftrace_set_clr_event(NULL, name, NULL, val);
ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
if (ret)
goto out;
......@@ -845,43 +979,75 @@ static LIST_HEAD(event_subsystems);
static int subsystem_open(struct inode *inode, struct file *filp)
{
struct event_subsystem *system = NULL;
struct ftrace_subsystem_dir *dir = NULL; /* Initialize for gcc */
struct trace_array *tr;
int ret;
if (!inode->i_private)
goto skip_search;
/* Make sure the system still exists */
mutex_lock(&event_mutex);
list_for_each_entry(system, &event_subsystems, list) {
if (system == inode->i_private) {
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
list_for_each_entry(dir, &tr->systems, list) {
if (dir == inode->i_private) {
/* Don't open systems with no events */
if (!system->nr_events) {
system = NULL;
break;
if (dir->nr_events) {
__get_system_dir(dir);
system = dir->subsystem;
}
__get_system(system);
break;
goto exit_loop;
}
}
}
exit_loop:
mutex_unlock(&event_mutex);
if (system != inode->i_private)
if (!system)
return -ENODEV;
skip_search:
/* Some versions of gcc think dir can be uninitialized here */
WARN_ON(!dir);
ret = tracing_open_generic(inode, filp);
if (ret < 0)
put_system(dir);
return ret;
}
static int system_tr_open(struct inode *inode, struct file *filp)
{
struct ftrace_subsystem_dir *dir;
struct trace_array *tr = inode->i_private;
int ret;
/* Make a temporary dir that has no system but points to tr */
dir = kzalloc(sizeof(*dir), GFP_KERNEL);
if (!dir)
return -ENOMEM;
dir->tr = tr;
ret = tracing_open_generic(inode, filp);
if (ret < 0 && system)
put_system(system);
if (ret < 0)
kfree(dir);
filp->private_data = dir;
return ret;
}
static int subsystem_release(struct inode *inode, struct file *file)
{
struct event_subsystem *system = inode->i_private;
struct ftrace_subsystem_dir *dir = file->private_data;
if (system)
put_system(system);
/*
* If dir->subsystem is NULL, then this is a temporary
* descriptor that was made for a trace_array to enable
* all subsystems.
*/
if (dir->subsystem)
put_system(dir);
else
kfree(dir);
return 0;
}
......@@ -890,7 +1056,8 @@ static ssize_t
subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct event_subsystem *system = filp->private_data;
struct ftrace_subsystem_dir *dir = filp->private_data;
struct event_subsystem *system = dir->subsystem;
struct trace_seq *s;
int r;
......@@ -915,7 +1082,7 @@ static ssize_t
subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct event_subsystem *system = filp->private_data;
struct ftrace_subsystem_dir *dir = filp->private_data;
char *buf;
int err;
......@@ -932,7 +1099,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
}
buf[cnt] = '\0';
err = apply_subsystem_event_filter(system, buf);
err = apply_subsystem_event_filter(dir, buf);
free_page((unsigned long) buf);
if (err < 0)
return err;
......@@ -1041,30 +1208,35 @@ static const struct file_operations ftrace_system_enable_fops = {
.release = subsystem_release,
};
static const struct file_operations ftrace_tr_enable_fops = {
.open = system_tr_open,
.read = system_enable_read,
.write = system_enable_write,
.llseek = default_llseek,
.release = subsystem_release,
};
static const struct file_operations ftrace_show_header_fops = {
.open = tracing_open_generic,
.read = show_header,
.llseek = default_llseek,
};
static struct dentry *event_trace_events_dir(void)
static int
ftrace_event_open(struct inode *inode, struct file *file,
const struct seq_operations *seq_ops)
{
static struct dentry *d_tracer;
static struct dentry *d_events;
if (d_events)
return d_events;
d_tracer = tracing_init_dentry();
if (!d_tracer)
return NULL;
struct seq_file *m;
int ret;
d_events = debugfs_create_dir("events", d_tracer);
if (!d_events)
pr_warning("Could not create debugfs "
"'events' directory\n");
ret = seq_open(file, seq_ops);
if (ret < 0)
return ret;
m = file->private_data;
/* copy tr over to seq ops */
m->private = inode->i_private;
return d_events;
return ret;
}
static int
......@@ -1072,117 +1244,165 @@ ftrace_event_avail_open(struct inode *inode, struct file *file)
{
const struct seq_operations *seq_ops = &show_event_seq_ops;
return seq_open(file, seq_ops);
return ftrace_event_open(inode, file, seq_ops);
}
static int
ftrace_event_set_open(struct inode *inode, struct file *file)
{
const struct seq_operations *seq_ops = &show_set_event_seq_ops;
struct trace_array *tr = inode->i_private;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
ftrace_clear_events();
ftrace_clear_events(tr);
return ftrace_event_open(inode, file, seq_ops);
}
static struct event_subsystem *
create_new_subsystem(const char *name)
{
struct event_subsystem *system;
/* need to create new entry */
system = kmalloc(sizeof(*system), GFP_KERNEL);
if (!system)
return NULL;
system->ref_count = 1;
system->name = name;
system->filter = NULL;
system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
if (!system->filter)
goto out_free;
list_add(&system->list, &event_subsystems);
return system;
return seq_open(file, seq_ops);
out_free:
kfree(system);
return NULL;
}
static struct dentry *
event_subsystem_dir(const char *name, struct dentry *d_events)
event_subsystem_dir(struct trace_array *tr, const char *name,
struct ftrace_event_file *file, struct dentry *parent)
{
struct ftrace_subsystem_dir *dir;
struct event_subsystem *system;
struct dentry *entry;
/* First see if we did not already create this dir */
list_for_each_entry(system, &event_subsystems, list) {
list_for_each_entry(dir, &tr->systems, list) {
system = dir->subsystem;
if (strcmp(system->name, name) == 0) {
system->nr_events++;
return system->entry;
}
dir->nr_events++;
file->system = dir;
return dir->entry;
}
/* need to create new entry */
system = kmalloc(sizeof(*system), GFP_KERNEL);
if (!system) {
pr_warning("No memory to create event subsystem %s\n",
name);
return d_events;
}
system->entry = debugfs_create_dir(name, d_events);
if (!system->entry) {
pr_warning("Could not create event subsystem %s\n",
name);
kfree(system);
return d_events;
}
system->nr_events = 1;
system->ref_count = 1;
system->name = kstrdup(name, GFP_KERNEL);
if (!system->name) {
debugfs_remove(system->entry);
kfree(system);
return d_events;
/* Now see if the system itself exists. */
list_for_each_entry(system, &event_subsystems, list) {
if (strcmp(system->name, name) == 0)
break;
}
/* Reset system variable when not found */
if (&system->list == &event_subsystems)
system = NULL;
list_add(&system->list, &event_subsystems);
dir = kmalloc(sizeof(*dir), GFP_KERNEL);
if (!dir)
goto out_fail;
system->filter = NULL;
if (!system) {
system = create_new_subsystem(name);
if (!system)
goto out_free;
} else
__get_system(system);
system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
if (!system->filter) {
pr_warning("Could not allocate filter for subsystem "
"'%s'\n", name);
return system->entry;
dir->entry = debugfs_create_dir(name, parent);
if (!dir->entry) {
pr_warning("Failed to create system directory %s\n", name);
__put_system(system);
goto out_free;
}
entry = debugfs_create_file("filter", 0644, system->entry, system,
dir->tr = tr;
dir->ref_count = 1;
dir->nr_events = 1;
dir->subsystem = system;
file->system = dir;
entry = debugfs_create_file("filter", 0644, dir->entry, dir,
&ftrace_subsystem_filter_fops);
if (!entry) {
kfree(system->filter);
system->filter = NULL;
pr_warning("Could not create debugfs "
"'%s/filter' entry\n", name);
pr_warning("Could not create debugfs '%s/filter' entry\n", name);
}
trace_create_file("enable", 0644, system->entry, system,
trace_create_file("enable", 0644, dir->entry, dir,
&ftrace_system_enable_fops);
return system->entry;
list_add(&dir->list, &tr->systems);
return dir->entry;
out_free:
kfree(dir);
out_fail:
/* Only print this message if failed on memory allocation */
if (!dir || !system)
pr_warning("No memory to create event subsystem %s\n",
name);
return NULL;
}
static int
event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
event_create_dir(struct dentry *parent,
struct ftrace_event_file *file,
const struct file_operations *id,
const struct file_operations *enable,
const struct file_operations *filter,
const struct file_operations *format)
{
struct ftrace_event_call *call = file->event_call;
struct trace_array *tr = file->tr;
struct list_head *head;
struct dentry *d_events;
int ret;
/*
* If the trace point header did not define TRACE_SYSTEM
* then the system would be called "TRACE_SYSTEM".
*/
if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
d_events = event_subsystem_dir(call->class->system, d_events);
if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
d_events = event_subsystem_dir(tr, call->class->system, file, parent);
if (!d_events)
return -ENOMEM;
} else
d_events = parent;
call->dir = debugfs_create_dir(call->name, d_events);
if (!call->dir) {
pr_warning("Could not create debugfs "
"'%s' directory\n", call->name);
file->dir = debugfs_create_dir(call->name, d_events);
if (!file->dir) {
pr_warning("Could not create debugfs '%s' directory\n",
call->name);
return -1;
}
if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
trace_create_file("enable", 0644, call->dir, call,
trace_create_file("enable", 0644, file->dir, file,
enable);
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && call->class->reg)
trace_create_file("id", 0444, call->dir, call,
trace_create_file("id", 0444, file->dir, call,
id);
#endif
......@@ -1196,23 +1416,76 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
if (ret < 0) {
pr_warning("Could not initialize trace point"
" events/%s\n", call->name);
return ret;
return -1;
}
}
trace_create_file("filter", 0644, call->dir, call,
trace_create_file("filter", 0644, file->dir, call,
filter);
trace_create_file("format", 0444, call->dir, call,
trace_create_file("format", 0444, file->dir, call,
format);
return 0;
}
static void remove_subsystem(struct ftrace_subsystem_dir *dir)
{
if (!dir)
return;
if (!--dir->nr_events) {
debugfs_remove_recursive(dir->entry);
list_del(&dir->list);
__put_system_dir(dir);
}
}
static void remove_event_from_tracers(struct ftrace_event_call *call)
{
struct ftrace_event_file *file;
struct trace_array *tr;
do_for_each_event_file_safe(tr, file) {
if (file->event_call != call)
continue;
list_del(&file->list);
debugfs_remove_recursive(file->dir);
remove_subsystem(file->system);
kmem_cache_free(file_cachep, file);
/*
* The do_for_each_event_file_safe() is
* a double loop. After finding the call for this
* trace_array, we use break to jump to the next
* trace_array.
*/
break;
} while_for_each_event_file();
}
static void event_remove(struct ftrace_event_call *call)
{
ftrace_event_enable_disable(call, 0);
struct trace_array *tr;
struct ftrace_event_file *file;
do_for_each_event_file(tr, file) {
if (file->event_call != call)
continue;
ftrace_event_enable_disable(file, 0);
/*
* The do_for_each_event_file() is
* a double loop. After finding the call for this
* trace_array, we use break to jump to the next
* trace_array.
*/
break;
} while_for_each_event_file();
if (call->event.funcs)
__unregister_ftrace_event(&call->event);
remove_event_from_tracers(call);
list_del(&call->list);
}
......@@ -1234,82 +1507,99 @@ static int event_init(struct ftrace_event_call *call)
}
static int
__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
const struct file_operations *id,
const struct file_operations *enable,
const struct file_operations *filter,
const struct file_operations *format)
__register_event(struct ftrace_event_call *call, struct module *mod)
{
struct dentry *d_events;
int ret;
ret = event_init(call);
if (ret < 0)
return ret;
d_events = event_trace_events_dir();
if (!d_events)
return -ENOENT;
ret = event_create_dir(call, d_events, id, enable, filter, format);
if (!ret)
list_add(&call->list, &ftrace_events);
call->mod = mod;
return ret;
return 0;
}
/* Add an event to a trace directory */
static int
__trace_add_new_event(struct ftrace_event_call *call,
struct trace_array *tr,
const struct file_operations *id,
const struct file_operations *enable,
const struct file_operations *filter,
const struct file_operations *format)
{
struct ftrace_event_file *file;
file = kmem_cache_alloc(file_cachep, GFP_TRACE);
if (!file)
return -ENOMEM;
file->event_call = call;
file->tr = tr;
list_add(&file->list, &tr->events);
return event_create_dir(tr->event_dir, file, id, enable, filter, format);
}
/*
* Just create a decriptor for early init. A descriptor is required
* for enabling events at boot. We want to enable events before
* the filesystem is initialized.
*/
static __init int
__trace_early_add_new_event(struct ftrace_event_call *call,
struct trace_array *tr)
{
struct ftrace_event_file *file;
file = kmem_cache_alloc(file_cachep, GFP_TRACE);
if (!file)
return -ENOMEM;
file->event_call = call;
file->tr = tr;
list_add(&file->list, &tr->events);
return 0;
}
struct ftrace_module_file_ops;
static void __add_event_to_tracers(struct ftrace_event_call *call,
struct ftrace_module_file_ops *file_ops);
/* Add an additional event_call dynamically */
int trace_add_event_call(struct ftrace_event_call *call)
{
int ret;
mutex_lock(&event_mutex);
ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
mutex_unlock(&event_mutex);
return ret;
}
static void remove_subsystem_dir(const char *name)
{
struct event_subsystem *system;
if (strcmp(name, TRACE_SYSTEM) == 0)
return;
ret = __register_event(call, NULL);
if (ret >= 0)
__add_event_to_tracers(call, NULL);
list_for_each_entry(system, &event_subsystems, list) {
if (strcmp(system->name, name) == 0) {
if (!--system->nr_events) {
debugfs_remove_recursive(system->entry);
list_del(&system->list);
__put_system(system);
}
break;
}
}
mutex_unlock(&event_mutex);
return ret;
}
/*
* Must be called under locking both of event_mutex and trace_event_mutex.
* Must be called under locking both of event_mutex and trace_event_sem.
*/
static void __trace_remove_event_call(struct ftrace_event_call *call)
{
event_remove(call);
trace_destroy_fields(call);
destroy_preds(call);
debugfs_remove_recursive(call->dir);
remove_subsystem_dir(call->class->system);
}
/* Remove an event_call */
void trace_remove_event_call(struct ftrace_event_call *call)
{
mutex_lock(&event_mutex);
down_write(&trace_event_mutex);
down_write(&trace_event_sem);
__trace_remove_event_call(call);
up_write(&trace_event_mutex);
up_write(&trace_event_sem);
mutex_unlock(&event_mutex);
}
......@@ -1335,6 +1625,26 @@ struct ftrace_module_file_ops {
struct file_operations filter;
};
static struct ftrace_module_file_ops *
find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod)
{
/*
* As event_calls are added in groups by module,
* when we find one file_ops, we don't need to search for
* each call in that module, as the rest should be the
* same. Only search for a new one if the last one did
* not match.
*/
if (file_ops && mod == file_ops->mod)
return file_ops;
list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
if (file_ops->mod == mod)
return file_ops;
}
return NULL;
}
static struct ftrace_module_file_ops *
trace_create_file_ops(struct module *mod)
{
......@@ -1386,9 +1696,8 @@ static void trace_module_add_events(struct module *mod)
return;
for_each_event(call, start, end) {
__trace_add_event_call(*call, mod,
&file_ops->id, &file_ops->enable,
&file_ops->filter, &file_ops->format);
__register_event(*call, mod);
__add_event_to_tracers(*call, file_ops);
}
}
......@@ -1396,12 +1705,13 @@ static void trace_module_remove_events(struct module *mod)
{
struct ftrace_module_file_ops *file_ops;
struct ftrace_event_call *call, *p;
bool found = false;
bool clear_trace = false;
down_write(&trace_event_mutex);
down_write(&trace_event_sem);
list_for_each_entry_safe(call, p, &ftrace_events, list) {
if (call->mod == mod) {
found = true;
if (call->flags & TRACE_EVENT_FL_WAS_ENABLED)
clear_trace = true;
__trace_remove_event_call(call);
}
}
......@@ -1415,14 +1725,18 @@ static void trace_module_remove_events(struct module *mod)
list_del(&file_ops->list);
kfree(file_ops);
}
up_write(&trace_event_sem);
/*
* It is safest to reset the ring buffer if the module being unloaded
* registered any events.
* registered any events that were used. The only worry is if
* a new module gets loaded, and takes on the same id as the events
* of this module. When printing out the buffer, traced events left
* over from this module may be passed to the new module events and
* unexpected results may occur.
*/
if (found)
tracing_reset_current_online_cpus();
up_write(&trace_event_mutex);
if (clear_trace)
tracing_reset_all_online_cpus();
}
static int trace_module_notify(struct notifier_block *self,
......@@ -1443,14 +1757,433 @@ static int trace_module_notify(struct notifier_block *self,
return 0;
}
static int
__trace_add_new_mod_event(struct ftrace_event_call *call,
struct trace_array *tr,
struct ftrace_module_file_ops *file_ops)
{
return __trace_add_new_event(call, tr,
&file_ops->id, &file_ops->enable,
&file_ops->filter, &file_ops->format);
}
#else
static int trace_module_notify(struct notifier_block *self,
static inline struct ftrace_module_file_ops *
find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod)
{
return NULL;
}
static inline int trace_module_notify(struct notifier_block *self,
unsigned long val, void *data)
{
return 0;
}
static inline int
__trace_add_new_mod_event(struct ftrace_event_call *call,
struct trace_array *tr,
struct ftrace_module_file_ops *file_ops)
{
return -ENODEV;
}
#endif /* CONFIG_MODULES */
/* Create a new event directory structure for a trace directory. */
static void
__trace_add_event_dirs(struct trace_array *tr)
{
struct ftrace_module_file_ops *file_ops = NULL;
struct ftrace_event_call *call;
int ret;
list_for_each_entry(call, &ftrace_events, list) {
if (call->mod) {
/*
* Directories for events by modules need to
* keep module ref counts when opened (as we don't
* want the module to disappear when reading one
* of these files). The file_ops keep account of
* the module ref count.
*/
file_ops = find_ftrace_file_ops(file_ops, call->mod);
if (!file_ops)
continue; /* Warn? */
ret = __trace_add_new_mod_event(call, tr, file_ops);
if (ret < 0)
pr_warning("Could not create directory for event %s\n",
call->name);
continue;
}
ret = __trace_add_new_event(call, tr,
&ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
if (ret < 0)
pr_warning("Could not create directory for event %s\n",
call->name);
}
}
#ifdef CONFIG_DYNAMIC_FTRACE
/* Avoid typos */
#define ENABLE_EVENT_STR "enable_event"
#define DISABLE_EVENT_STR "disable_event"
struct event_probe_data {
struct ftrace_event_file *file;
unsigned long count;
int ref;
bool enable;
};
static struct ftrace_event_file *
find_event_file(struct trace_array *tr, const char *system, const char *event)
{
struct ftrace_event_file *file;
struct ftrace_event_call *call;
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
if (!call->name || !call->class || !call->class->reg)
continue;
if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
continue;
if (strcmp(event, call->name) == 0 &&
strcmp(system, call->class->system) == 0)
return file;
}
return NULL;
}
static void
event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)
{
struct event_probe_data **pdata = (struct event_probe_data **)_data;
struct event_probe_data *data = *pdata;
if (!data)
return;
if (data->enable)
clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags);
else
set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags);
}
static void
event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data)
{
struct event_probe_data **pdata = (struct event_probe_data **)_data;
struct event_probe_data *data = *pdata;
if (!data)
return;
if (!data->count)
return;
/* Skip if the event is in a state we want to switch to */
if (data->enable == !(data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
return;
if (data->count != -1)
(data->count)--;
event_enable_probe(ip, parent_ip, _data);
}
static int
event_enable_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *_data)
{
struct event_probe_data *data = _data;
seq_printf(m, "%ps:", (void *)ip);
seq_printf(m, "%s:%s:%s",
data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
data->file->event_call->class->system,
data->file->event_call->name);
if (data->count == -1)
seq_printf(m, ":unlimited\n");
else
seq_printf(m, ":count=%ld\n", data->count);
return 0;
}
static int
event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip,
void **_data)
{
struct event_probe_data **pdata = (struct event_probe_data **)_data;
struct event_probe_data *data = *pdata;
data->ref++;
return 0;
}
static void
event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip,
void **_data)
{
struct event_probe_data **pdata = (struct event_probe_data **)_data;
struct event_probe_data *data = *pdata;
if (WARN_ON_ONCE(data->ref <= 0))
return;
data->ref--;
if (!data->ref) {
/* Remove the SOFT_MODE flag */
__ftrace_event_enable_disable(data->file, 0, 1);
module_put(data->file->event_call->mod);
kfree(data);
}
*pdata = NULL;
}
static struct ftrace_probe_ops event_enable_probe_ops = {
.func = event_enable_probe,
.print = event_enable_print,
.init = event_enable_init,
.free = event_enable_free,
};
static struct ftrace_probe_ops event_enable_count_probe_ops = {
.func = event_enable_count_probe,
.print = event_enable_print,
.init = event_enable_init,
.free = event_enable_free,
};
static struct ftrace_probe_ops event_disable_probe_ops = {
.func = event_enable_probe,
.print = event_enable_print,
.init = event_enable_init,
.free = event_enable_free,
};
static struct ftrace_probe_ops event_disable_count_probe_ops = {
.func = event_enable_count_probe,
.print = event_enable_print,
.init = event_enable_init,
.free = event_enable_free,
};
static int
event_enable_func(struct ftrace_hash *hash,
char *glob, char *cmd, char *param, int enabled)
{
struct trace_array *tr = top_trace_array();
struct ftrace_event_file *file;
struct ftrace_probe_ops *ops;
struct event_probe_data *data;
const char *system;
const char *event;
char *number;
bool enable;
int ret;
/* hash funcs only work with set_ftrace_filter */
if (!enabled)
return -EINVAL;
if (!param)
return -EINVAL;
system = strsep(&param, ":");
if (!param)
return -EINVAL;
event = strsep(&param, ":");
mutex_lock(&event_mutex);
ret = -EINVAL;
file = find_event_file(tr, system, event);
if (!file)
goto out;
enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
if (enable)
ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
else
ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
if (glob[0] == '!') {
unregister_ftrace_function_probe_func(glob+1, ops);
ret = 0;
goto out;
}
ret = -ENOMEM;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out;
data->enable = enable;
data->count = -1;
data->file = file;
if (!param)
goto out_reg;
number = strsep(&param, ":");
ret = -EINVAL;
if (!strlen(number))
goto out_free;
/*
* We use the callback data field (which is a pointer)
* as our counter.
*/
ret = kstrtoul(number, 0, &data->count);
if (ret)
goto out_free;
out_reg:
/* Don't let event modules unload while probe registered */
ret = try_module_get(file->event_call->mod);
if (!ret)
goto out_free;
ret = __ftrace_event_enable_disable(file, 1, 1);
if (ret < 0)
goto out_put;
ret = register_ftrace_function_probe(glob, ops, data);
if (!ret)
goto out_disable;
out:
mutex_unlock(&event_mutex);
return ret;
out_disable:
__ftrace_event_enable_disable(file, 0, 1);
out_put:
module_put(file->event_call->mod);
out_free:
kfree(data);
goto out;
}
static struct ftrace_func_command event_enable_cmd = {
.name = ENABLE_EVENT_STR,
.func = event_enable_func,
};
static struct ftrace_func_command event_disable_cmd = {
.name = DISABLE_EVENT_STR,
.func = event_enable_func,
};
static __init int register_event_cmds(void)
{
int ret;
ret = register_ftrace_command(&event_enable_cmd);
if (WARN_ON(ret < 0))
return ret;
ret = register_ftrace_command(&event_disable_cmd);
if (WARN_ON(ret < 0))
unregister_ftrace_command(&event_enable_cmd);
return ret;
}
#else
static inline int register_event_cmds(void) { return 0; }
#endif /* CONFIG_DYNAMIC_FTRACE */
/*
* The top level array has already had its ftrace_event_file
* descriptors created in order to allow for early events to
* be recorded. This function is called after the debugfs has been
* initialized, and we now have to create the files associated
* to the events.
*/
static __init void
__trace_early_add_event_dirs(struct trace_array *tr)
{
struct ftrace_event_file *file;
int ret;
list_for_each_entry(file, &tr->events, list) {
ret = event_create_dir(tr->event_dir, file,
&ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
if (ret < 0)
pr_warning("Could not create directory for event %s\n",
file->event_call->name);
}
}
/*
* For early boot up, the top trace array requires to have
* a list of events that can be enabled. This must be done before
* the filesystem is set up in order to allow events to be traced
* early.
*/
static __init void
__trace_early_add_events(struct trace_array *tr)
{
struct ftrace_event_call *call;
int ret;
list_for_each_entry(call, &ftrace_events, list) {
/* Early boot up should not have any modules loaded */
if (WARN_ON_ONCE(call->mod))
continue;
ret = __trace_early_add_new_event(call, tr);
if (ret < 0)
pr_warning("Could not create early event %s\n",
call->name);
}
}
/* Remove the event directory structure for a trace directory. */
static void
__trace_remove_event_dirs(struct trace_array *tr)
{
struct ftrace_event_file *file, *next;
list_for_each_entry_safe(file, next, &tr->events, list) {
list_del(&file->list);
debugfs_remove_recursive(file->dir);
remove_subsystem(file->system);
kmem_cache_free(file_cachep, file);
}
}
static void
__add_event_to_tracers(struct ftrace_event_call *call,
struct ftrace_module_file_ops *file_ops)
{
struct trace_array *tr;
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
if (file_ops)
__trace_add_new_mod_event(call, tr, file_ops);
else
__trace_add_new_event(call, tr,
&ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
}
}
static struct notifier_block trace_module_nb = {
.notifier_call = trace_module_notify,
.priority = 0,
......@@ -1464,15 +2197,135 @@ static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
static __init int setup_trace_event(char *str)
{
strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
ring_buffer_expanded = 1;
tracing_selftest_disabled = 1;
ring_buffer_expanded = true;
tracing_selftest_disabled = true;
return 1;
}
__setup("trace_event=", setup_trace_event);
/* Expects to have event_mutex held when called */
static int
create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
{
struct dentry *d_events;
struct dentry *entry;
entry = debugfs_create_file("set_event", 0644, parent,
tr, &ftrace_set_event_fops);
if (!entry) {
pr_warning("Could not create debugfs 'set_event' entry\n");
return -ENOMEM;
}
d_events = debugfs_create_dir("events", parent);
if (!d_events) {
pr_warning("Could not create debugfs 'events' directory\n");
return -ENOMEM;
}
/* ring buffer internal formats */
trace_create_file("header_page", 0444, d_events,
ring_buffer_print_page_header,
&ftrace_show_header_fops);
trace_create_file("header_event", 0444, d_events,
ring_buffer_print_entry_header,
&ftrace_show_header_fops);
trace_create_file("enable", 0644, d_events,
tr, &ftrace_tr_enable_fops);
tr->event_dir = d_events;
return 0;
}
/**
* event_trace_add_tracer - add a instance of a trace_array to events
* @parent: The parent dentry to place the files/directories for events in
* @tr: The trace array associated with these events
*
* When a new instance is created, it needs to set up its events
* directory, as well as other files associated with events. It also
* creates the event hierachry in the @parent/events directory.
*
* Returns 0 on success.
*/
int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
{
int ret;
mutex_lock(&event_mutex);
ret = create_event_toplevel_files(parent, tr);
if (ret)
goto out_unlock;
down_write(&trace_event_sem);
__trace_add_event_dirs(tr);
up_write(&trace_event_sem);
out_unlock:
mutex_unlock(&event_mutex);
return ret;
}
/*
* The top trace array already had its file descriptors created.
* Now the files themselves need to be created.
*/
static __init int
early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
{
int ret;
mutex_lock(&event_mutex);
ret = create_event_toplevel_files(parent, tr);
if (ret)
goto out_unlock;
down_write(&trace_event_sem);
__trace_early_add_event_dirs(tr);
up_write(&trace_event_sem);
out_unlock:
mutex_unlock(&event_mutex);
return ret;
}
int event_trace_del_tracer(struct trace_array *tr)
{
/* Disable any running events */
__ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
mutex_lock(&event_mutex);
down_write(&trace_event_sem);
__trace_remove_event_dirs(tr);
debugfs_remove_recursive(tr->event_dir);
up_write(&trace_event_sem);
tr->event_dir = NULL;
mutex_unlock(&event_mutex);
return 0;
}
static __init int event_trace_memsetup(void)
{
field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
file_cachep = KMEM_CACHE(ftrace_event_file, SLAB_PANIC);
return 0;
}
static __init int event_trace_enable(void)
{
struct trace_array *tr = top_trace_array();
struct ftrace_event_call **iter, *call;
char *buf = bootup_event_buf;
char *token;
......@@ -1486,6 +2339,14 @@ static __init int event_trace_enable(void)
list_add(&call->list, &ftrace_events);
}
/*
* We need the top trace array to have a working set of trace
* points at early init, before the debug files and directories
* are created. Create the file entries now, and attach them
* to the actual file dentries later.
*/
__trace_early_add_events(tr);
while (true) {
token = strsep(&buf, ",");
......@@ -1494,73 +2355,43 @@ static __init int event_trace_enable(void)
if (!*token)
continue;
ret = ftrace_set_clr_event(token, 1);
ret = ftrace_set_clr_event(tr, token, 1);
if (ret)
pr_warn("Failed to enable trace event: %s\n", token);
}
trace_printk_start_comm();
register_event_cmds();
return 0;
}
static __init int event_trace_init(void)
{
struct ftrace_event_call *call;
struct trace_array *tr;
struct dentry *d_tracer;
struct dentry *entry;
struct dentry *d_events;
int ret;
tr = top_trace_array();
d_tracer = tracing_init_dentry();
if (!d_tracer)
return 0;
entry = debugfs_create_file("available_events", 0444, d_tracer,
NULL, &ftrace_avail_fops);
tr, &ftrace_avail_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'available_events' entry\n");
entry = debugfs_create_file("set_event", 0644, d_tracer,
NULL, &ftrace_set_event_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'set_event' entry\n");
d_events = event_trace_events_dir();
if (!d_events)
return 0;
/* ring buffer internal formats */
trace_create_file("header_page", 0444, d_events,
ring_buffer_print_page_header,
&ftrace_show_header_fops);
trace_create_file("header_event", 0444, d_events,
ring_buffer_print_entry_header,
&ftrace_show_header_fops);
trace_create_file("enable", 0644, d_events,
NULL, &ftrace_system_enable_fops);
if (trace_define_common_fields())
pr_warning("tracing: Failed to allocate common fields");
/*
* Early initialization already enabled ftrace event.
* Now it's only necessary to create the event directory.
*/
list_for_each_entry(call, &ftrace_events, list) {
ret = event_create_dir(call, d_events,
&ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
if (ret < 0)
event_remove(call);
}
ret = early_event_add_tracer(d_tracer, tr);
if (ret)
return ret;
ret = register_module_notifier(&trace_module_nb);
if (ret)
......@@ -1568,6 +2399,7 @@ static __init int event_trace_init(void)
return 0;
}
early_initcall(event_trace_memsetup);
core_initcall(event_trace_enable);
fs_initcall(event_trace_init);
......@@ -1627,13 +2459,20 @@ static __init void event_test_stuff(void)
*/
static __init void event_trace_self_tests(void)
{
struct ftrace_subsystem_dir *dir;
struct ftrace_event_file *file;
struct ftrace_event_call *call;
struct event_subsystem *system;
struct trace_array *tr;
int ret;
tr = top_trace_array();
pr_info("Running tests on trace events:\n");
list_for_each_entry(call, &ftrace_events, list) {
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
/* Only test those that have a probe */
if (!call->class || !call->class->probe)
......@@ -1657,15 +2496,15 @@ static __init void event_trace_self_tests(void)
* If an event is already enabled, someone is using
* it and the self test should not be on.
*/
if (call->flags & TRACE_EVENT_FL_ENABLED) {
if (file->flags & FTRACE_EVENT_FL_ENABLED) {
pr_warning("Enabled event during self test!\n");
WARN_ON_ONCE(1);
continue;
}
ftrace_event_enable_disable(call, 1);
ftrace_event_enable_disable(file, 1);
event_test_stuff();
ftrace_event_enable_disable(call, 0);
ftrace_event_enable_disable(file, 0);
pr_cont("OK\n");
}
......@@ -1674,7 +2513,9 @@ static __init void event_trace_self_tests(void)
pr_info("Running tests on trace event systems:\n");
list_for_each_entry(system, &event_subsystems, list) {
list_for_each_entry(dir, &tr->systems, list) {
system = dir->subsystem;
/* the ftrace system is special, skip it */
if (strcmp(system->name, "ftrace") == 0)
......@@ -1682,7 +2523,7 @@ static __init void event_trace_self_tests(void)
pr_info("Testing event system %s: ", system->name);
ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
if (WARN_ON_ONCE(ret)) {
pr_warning("error enabling system %s\n",
system->name);
......@@ -1691,7 +2532,7 @@ static __init void event_trace_self_tests(void)
event_test_stuff();
ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
if (WARN_ON_ONCE(ret)) {
pr_warning("error disabling system %s\n",
system->name);
......@@ -1706,7 +2547,7 @@ static __init void event_trace_self_tests(void)
pr_info("Running tests on all trace events:\n");
pr_info("Testing all events: ");
ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
if (WARN_ON_ONCE(ret)) {
pr_warning("error enabling all events\n");
return;
......@@ -1715,7 +2556,7 @@ static __init void event_trace_self_tests(void)
event_test_stuff();
/* reset sysname */
ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
if (WARN_ON_ONCE(ret)) {
pr_warning("error disabling all events\n");
return;
......
......@@ -658,33 +658,6 @@ void print_subsystem_event_filter(struct event_subsystem *system,
mutex_unlock(&event_mutex);
}
static struct ftrace_event_field *
__find_event_field(struct list_head *head, char *name)
{
struct ftrace_event_field *field;
list_for_each_entry(field, head, link) {
if (!strcmp(field->name, name))
return field;
}
return NULL;
}
static struct ftrace_event_field *
find_event_field(struct ftrace_event_call *call, char *name)
{
struct ftrace_event_field *field;
struct list_head *head;
field = __find_event_field(&ftrace_common_fields, name);
if (field)
return field;
head = trace_get_fields(call);
return __find_event_field(head, name);
}
static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
{
stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL);
......@@ -1337,7 +1310,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps,
return NULL;
}
field = find_event_field(call, operand1);
field = trace_find_event_field(call, operand1);
if (!field) {
parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
return NULL;
......@@ -1907,16 +1880,17 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
return err;
}
int apply_subsystem_event_filter(struct event_subsystem *system,
int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
char *filter_string)
{
struct event_subsystem *system = dir->subsystem;
struct event_filter *filter;
int err = 0;
mutex_lock(&event_mutex);
/* Make sure the system still has events */
if (!system->nr_events) {
if (!dir->nr_events) {
err = -ENODEV;
goto out_unlock;
}
......
......@@ -129,7 +129,7 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
int \
static int __init \
ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
{ \
struct struct_name field; \
......@@ -168,7 +168,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
regfn) \
\
struct ftrace_event_class event_class_ftrace_##call = { \
struct ftrace_event_class __refdata event_class_ftrace_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
.fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
......
......@@ -28,7 +28,7 @@ static void tracing_stop_function_trace(void);
static int function_trace_init(struct trace_array *tr)
{
func_trace = tr;
tr->cpu = get_cpu();
tr->trace_buffer.cpu = get_cpu();
put_cpu();
tracing_start_cmdline_record();
......@@ -44,7 +44,7 @@ static void function_trace_reset(struct trace_array *tr)
static void function_trace_start(struct trace_array *tr)
{
tracing_reset_online_cpus(tr);
tracing_reset_online_cpus(&tr->trace_buffer);
}
/* Our option */
......@@ -76,7 +76,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
goto out;
cpu = smp_processor_id();
data = tr->data[cpu];
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
if (!atomic_read(&data->disabled)) {
local_save_flags(flags);
trace_function(tr, ip, parent_ip, flags, pc);
......@@ -107,7 +107,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
*/
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
......@@ -214,66 +214,89 @@ static struct tracer function_trace __read_mostly =
};
#ifdef CONFIG_DYNAMIC_FTRACE
static void
ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
static int update_count(void **data)
{
long *count = (long *)data;
if (tracing_is_on())
return;
unsigned long *count = (long *)data;
if (!*count)
return;
return 0;
if (*count != -1)
(*count)--;
tracing_on();
return 1;
}
static void
ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
ftrace_traceon_count(unsigned long ip, unsigned long parent_ip, void **data)
{
long *count = (long *)data;
if (tracing_is_on())
return;
if (update_count(data))
tracing_on();
}
static void
ftrace_traceoff_count(unsigned long ip, unsigned long parent_ip, void **data)
{
if (!tracing_is_on())
return;
if (!*count)
if (update_count(data))
tracing_off();
}
static void
ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
{
if (tracing_is_on())
return;
if (*count != -1)
(*count)--;
tracing_on();
}
static void
ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
{
if (!tracing_is_on())
return;
tracing_off();
}
static int
ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data);
/*
* Skip 4:
* ftrace_stacktrace()
* function_trace_probe_call()
* ftrace_ops_list_func()
* ftrace_call()
*/
#define STACK_SKIP 4
static struct ftrace_probe_ops traceon_probe_ops = {
.func = ftrace_traceon,
.print = ftrace_trace_onoff_print,
};
static void
ftrace_stacktrace(unsigned long ip, unsigned long parent_ip, void **data)
{
trace_dump_stack(STACK_SKIP);
}
static struct ftrace_probe_ops traceoff_probe_ops = {
.func = ftrace_traceoff,
.print = ftrace_trace_onoff_print,
};
static void
ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data)
{
if (!tracing_is_on())
return;
if (update_count(data))
trace_dump_stack(STACK_SKIP);
}
static int
ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data)
ftrace_probe_print(const char *name, struct seq_file *m,
unsigned long ip, void *data)
{
long count = (long)data;
seq_printf(m, "%ps:", (void *)ip);
if (ops == &traceon_probe_ops)
seq_printf(m, "traceon");
else
seq_printf(m, "traceoff");
seq_printf(m, "%ps:%s", (void *)ip, name);
if (count == -1)
seq_printf(m, ":unlimited\n");
......@@ -284,26 +307,61 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
}
static int
ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
ftrace_traceon_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data)
{
struct ftrace_probe_ops *ops;
/* we register both traceon and traceoff to this callback */
if (strcmp(cmd, "traceon") == 0)
ops = &traceon_probe_ops;
else
ops = &traceoff_probe_ops;
return ftrace_probe_print("traceon", m, ip, data);
}
unregister_ftrace_function_probe_func(glob, ops);
static int
ftrace_traceoff_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data)
{
return ftrace_probe_print("traceoff", m, ip, data);
}
return 0;
static int
ftrace_stacktrace_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data)
{
return ftrace_probe_print("stacktrace", m, ip, data);
}
static struct ftrace_probe_ops traceon_count_probe_ops = {
.func = ftrace_traceon_count,
.print = ftrace_traceon_print,
};
static struct ftrace_probe_ops traceoff_count_probe_ops = {
.func = ftrace_traceoff_count,
.print = ftrace_traceoff_print,
};
static struct ftrace_probe_ops stacktrace_count_probe_ops = {
.func = ftrace_stacktrace_count,
.print = ftrace_stacktrace_print,
};
static struct ftrace_probe_ops traceon_probe_ops = {
.func = ftrace_traceon,
.print = ftrace_traceon_print,
};
static struct ftrace_probe_ops traceoff_probe_ops = {
.func = ftrace_traceoff,
.print = ftrace_traceoff_print,
};
static struct ftrace_probe_ops stacktrace_probe_ops = {
.func = ftrace_stacktrace,
.print = ftrace_stacktrace_print,
};
static int
ftrace_trace_onoff_callback(struct ftrace_hash *hash,
char *glob, char *cmd, char *param, int enable)
ftrace_trace_probe_callback(struct ftrace_probe_ops *ops,
struct ftrace_hash *hash, char *glob,
char *cmd, char *param, int enable)
{
struct ftrace_probe_ops *ops;
void *count = (void *)-1;
char *number;
int ret;
......@@ -312,14 +370,10 @@ ftrace_trace_onoff_callback(struct ftrace_hash *hash,
if (!enable)
return -EINVAL;
if (glob[0] == '!')
return ftrace_trace_onoff_unreg(glob+1, cmd, param);
/* we register both traceon and traceoff to this callback */
if (strcmp(cmd, "traceon") == 0)
ops = &traceon_probe_ops;
else
ops = &traceoff_probe_ops;
if (glob[0] == '!') {
unregister_ftrace_function_probe_func(glob+1, ops);
return 0;
}
if (!param)
goto out_reg;
......@@ -343,6 +397,34 @@ ftrace_trace_onoff_callback(struct ftrace_hash *hash,
return ret < 0 ? ret : 0;
}
static int
ftrace_trace_onoff_callback(struct ftrace_hash *hash,
char *glob, char *cmd, char *param, int enable)
{
struct ftrace_probe_ops *ops;
/* we register both traceon and traceoff to this callback */
if (strcmp(cmd, "traceon") == 0)
ops = param ? &traceon_count_probe_ops : &traceon_probe_ops;
else
ops = param ? &traceoff_count_probe_ops : &traceoff_probe_ops;
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
param, enable);
}
static int
ftrace_stacktrace_callback(struct ftrace_hash *hash,
char *glob, char *cmd, char *param, int enable)
{
struct ftrace_probe_ops *ops;
ops = param ? &stacktrace_count_probe_ops : &stacktrace_probe_ops;
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
param, enable);
}
static struct ftrace_func_command ftrace_traceon_cmd = {
.name = "traceon",
.func = ftrace_trace_onoff_callback,
......@@ -353,6 +435,11 @@ static struct ftrace_func_command ftrace_traceoff_cmd = {
.func = ftrace_trace_onoff_callback,
};
static struct ftrace_func_command ftrace_stacktrace_cmd = {
.name = "stacktrace",
.func = ftrace_stacktrace_callback,
};
static int __init init_func_cmd_traceon(void)
{
int ret;
......@@ -364,6 +451,12 @@ static int __init init_func_cmd_traceon(void)
ret = register_ftrace_command(&ftrace_traceon_cmd);
if (ret)
unregister_ftrace_command(&ftrace_traceoff_cmd);
ret = register_ftrace_command(&ftrace_stacktrace_cmd);
if (ret) {
unregister_ftrace_command(&ftrace_traceoff_cmd);
unregister_ftrace_command(&ftrace_traceon_cmd);
}
return ret;
}
#else
......
......@@ -218,7 +218,7 @@ int __trace_graph_entry(struct trace_array *tr,
{
struct ftrace_event_call *call = &event_funcgraph_entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer = tr->buffer;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ftrace_graph_ent_entry *entry;
if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
......@@ -265,7 +265,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
pc = preempt_count();
......@@ -323,7 +323,7 @@ void __trace_graph_return(struct trace_array *tr,
{
struct ftrace_event_call *call = &event_funcgraph_exit;
struct ring_buffer_event *event;
struct ring_buffer *buffer = tr->buffer;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ftrace_graph_ret_entry *entry;
if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
......@@ -350,7 +350,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
pc = preempt_count();
......@@ -560,9 +560,9 @@ get_return_for_leaf(struct trace_iterator *iter,
* We need to consume the current entry to see
* the next one.
*/
ring_buffer_consume(iter->tr->buffer, iter->cpu,
ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu,
NULL, NULL);
event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu,
NULL, NULL);
}
......
......@@ -33,6 +33,7 @@ enum {
static int trace_type __read_mostly;
static int save_flags;
static bool function_enabled;
static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
static int start_irqsoff_tracer(struct trace_array *tr, int graph);
......@@ -121,7 +122,7 @@ static int func_prolog_dec(struct trace_array *tr,
if (!irqs_disabled_flags(*flags))
return 0;
*data = tr->data[cpu];
*data = per_cpu_ptr(tr->trace_buffer.data, cpu);
disabled = atomic_inc_return(&(*data)->disabled);
if (likely(disabled == 1))
......@@ -175,7 +176,7 @@ static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
per_cpu(tracing_cpu, cpu) = 0;
tracing_max_latency = 0;
tracing_reset_online_cpus(irqsoff_trace);
tracing_reset_online_cpus(&irqsoff_trace->trace_buffer);
return start_irqsoff_tracer(irqsoff_trace, set);
}
......@@ -380,7 +381,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
if (per_cpu(tracing_cpu, cpu))
return;
data = tr->data[cpu];
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
if (unlikely(!data) || atomic_read(&data->disabled))
return;
......@@ -418,7 +419,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
if (!tracer_enabled)
return;
data = tr->data[cpu];
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
if (unlikely(!data) ||
!data->critical_start || atomic_read(&data->disabled))
......@@ -528,15 +529,60 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
}
#endif /* CONFIG_PREEMPT_TRACER */
static int start_irqsoff_tracer(struct trace_array *tr, int graph)
static int register_irqsoff_function(int graph, int set)
{
int ret = 0;
int ret;
if (!graph)
ret = register_ftrace_function(&trace_ops);
else
/* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION)))
return 0;
if (graph)
ret = register_ftrace_graph(&irqsoff_graph_return,
&irqsoff_graph_entry);
else
ret = register_ftrace_function(&trace_ops);
if (!ret)
function_enabled = true;
return ret;
}
static void unregister_irqsoff_function(int graph)
{
if (!function_enabled)
return;
if (graph)
unregister_ftrace_graph();
else
unregister_ftrace_function(&trace_ops);
function_enabled = false;
}
static void irqsoff_function_set(int set)
{
if (set)
register_irqsoff_function(is_graph(), 1);
else
unregister_irqsoff_function(is_graph());
}
static int irqsoff_flag_changed(struct tracer *tracer, u32 mask, int set)
{
if (mask & TRACE_ITER_FUNCTION)
irqsoff_function_set(set);
return trace_keep_overwrite(tracer, mask, set);
}
static int start_irqsoff_tracer(struct trace_array *tr, int graph)
{
int ret;
ret = register_irqsoff_function(graph, 0);
if (!ret && tracing_is_enabled())
tracer_enabled = 1;
......@@ -550,10 +596,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
{
tracer_enabled = 0;
if (!graph)
unregister_ftrace_function(&trace_ops);
else
unregister_ftrace_graph();
unregister_irqsoff_function(graph);
}
static void __irqsoff_tracer_init(struct trace_array *tr)
......@@ -561,14 +604,14 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
save_flags = trace_flags;
/* non overwrite screws up the latency tracers */
set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
tracing_max_latency = 0;
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
tracing_reset_online_cpus(tr);
tracing_reset_online_cpus(&tr->trace_buffer);
if (start_irqsoff_tracer(tr, is_graph()))
printk(KERN_ERR "failed to start irqsoff tracer\n");
......@@ -581,8 +624,8 @@ static void irqsoff_tracer_reset(struct trace_array *tr)
stop_irqsoff_tracer(tr, is_graph());
set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
}
static void irqsoff_tracer_start(struct trace_array *tr)
......@@ -615,7 +658,7 @@ static struct tracer irqsoff_tracer __read_mostly =
.print_line = irqsoff_print_line,
.flags = &tracer_flags,
.set_flag = irqsoff_set_flag,
.flag_changed = trace_keep_overwrite,
.flag_changed = irqsoff_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_irqsoff,
#endif
......@@ -649,7 +692,7 @@ static struct tracer preemptoff_tracer __read_mostly =
.print_line = irqsoff_print_line,
.flags = &tracer_flags,
.set_flag = irqsoff_set_flag,
.flag_changed = trace_keep_overwrite,
.flag_changed = irqsoff_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptoff,
#endif
......@@ -685,7 +728,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
.print_line = irqsoff_print_line,
.flags = &tracer_flags,
.set_flag = irqsoff_set_flag,
.flag_changed = trace_keep_overwrite,
.flag_changed = irqsoff_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptirqsoff,
#endif
......
......@@ -26,7 +26,7 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file)
trace_init_global_iter(&iter);
for_each_tracing_cpu(cpu) {
atomic_inc(&iter.tr->data[cpu]->disabled);
atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
}
old_userobj = trace_flags;
......@@ -43,17 +43,17 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file)
iter.iter_flags |= TRACE_FILE_LAT_FMT;
iter.pos = -1;
if (cpu_file == TRACE_PIPE_ALL_CPU) {
if (cpu_file == RING_BUFFER_ALL_CPUS) {
for_each_tracing_cpu(cpu) {
iter.buffer_iter[cpu] =
ring_buffer_read_prepare(iter.tr->buffer, cpu);
ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu);
ring_buffer_read_start(iter.buffer_iter[cpu]);
tracing_iter_reset(&iter, cpu);
}
} else {
iter.cpu_file = cpu_file;
iter.buffer_iter[cpu_file] =
ring_buffer_read_prepare(iter.tr->buffer, cpu_file);
ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu_file);
ring_buffer_read_start(iter.buffer_iter[cpu_file]);
tracing_iter_reset(&iter, cpu_file);
}
......@@ -83,7 +83,7 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file)
trace_flags = old_userobj;
for_each_tracing_cpu(cpu) {
atomic_dec(&iter.tr->data[cpu]->disabled);
atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
}
for_each_tracing_cpu(cpu)
......@@ -115,7 +115,7 @@ static int kdb_ftdump(int argc, const char **argv)
!cpu_online(cpu_file))
return KDB_BADINT;
} else {
cpu_file = TRACE_PIPE_ALL_CPU;
cpu_file = RING_BUFFER_ALL_CPUS;
}
kdb_trap_printk++;
......
......@@ -31,7 +31,7 @@ static void mmio_reset_data(struct trace_array *tr)
overrun_detected = false;
prev_overruns = 0;
tracing_reset_online_cpus(tr);
tracing_reset_online_cpus(&tr->trace_buffer);
}
static int mmio_trace_init(struct trace_array *tr)
......@@ -128,7 +128,7 @@ static void mmio_close(struct trace_iterator *iter)
static unsigned long count_overruns(struct trace_iterator *iter)
{
unsigned long cnt = atomic_xchg(&dropped_count, 0);
unsigned long over = ring_buffer_overruns(iter->tr->buffer);
unsigned long over = ring_buffer_overruns(iter->trace_buffer->buffer);
if (over > prev_overruns)
cnt += over - prev_overruns;
......@@ -309,7 +309,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
struct mmiotrace_rw *rw)
{
struct ftrace_event_call *call = &event_mmiotrace_rw;
struct ring_buffer *buffer = tr->buffer;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_rw *entry;
int pc = preempt_count();
......@@ -330,7 +330,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
void mmio_trace_rw(struct mmiotrace_rw *rw)
{
struct trace_array *tr = mmio_trace_array;
struct trace_array_cpu *data = tr->data[smp_processor_id()];
struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id());
__trace_mmiotrace_rw(tr, data, rw);
}
......@@ -339,7 +339,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
struct mmiotrace_map *map)
{
struct ftrace_event_call *call = &event_mmiotrace_map;
struct ring_buffer *buffer = tr->buffer;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_map *entry;
int pc = preempt_count();
......@@ -363,7 +363,7 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
struct trace_array_cpu *data;
preempt_disable();
data = tr->data[smp_processor_id()];
data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id());
__trace_mmiotrace_map(tr, data, map);
preempt_enable();
}
......
......@@ -14,7 +14,7 @@
/* must be a power of 2 */
#define EVENT_HASHSIZE 128
DECLARE_RWSEM(trace_event_mutex);
DECLARE_RWSEM(trace_event_sem);
static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
......@@ -37,6 +37,22 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s)
return ret;
}
enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *entry = iter->ent;
struct bputs_entry *field;
int ret;
trace_assign_type(field, entry);
ret = trace_seq_puts(s, field->str);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
......@@ -397,6 +413,32 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
}
EXPORT_SYMBOL(ftrace_print_hex_seq);
int ftrace_raw_output_prep(struct trace_iterator *iter,
struct trace_event *trace_event)
{
struct ftrace_event_call *event;
struct trace_seq *s = &iter->seq;
struct trace_seq *p = &iter->tmp_seq;
struct trace_entry *entry;
int ret;
event = container_of(trace_event, struct ftrace_event_call, event);
entry = iter->ent;
if (entry->type != event->event.type) {
WARN_ON_ONCE(1);
return TRACE_TYPE_UNHANDLED;
}
trace_seq_init(p);
ret = trace_seq_printf(s, "%s: ", event->name);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return 0;
}
EXPORT_SYMBOL(ftrace_raw_output_prep);
#ifdef CONFIG_KRETPROBES
static inline const char *kretprobed(const char *name)
{
......@@ -617,7 +659,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
{
unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
unsigned long long abs_ts = iter->ts - iter->tr->time_start;
unsigned long long abs_ts = iter->ts - iter->trace_buffer->time_start;
unsigned long long rel_ts = next_ts - iter->ts;
struct trace_seq *s = &iter->seq;
......@@ -783,12 +825,12 @@ static int trace_search_list(struct list_head **list)
void trace_event_read_lock(void)
{
down_read(&trace_event_mutex);
down_read(&trace_event_sem);
}
void trace_event_read_unlock(void)
{
up_read(&trace_event_mutex);
up_read(&trace_event_sem);
}
/**
......@@ -811,7 +853,7 @@ int register_ftrace_event(struct trace_event *event)
unsigned key;
int ret = 0;
down_write(&trace_event_mutex);
down_write(&trace_event_sem);
if (WARN_ON(!event))
goto out;
......@@ -866,14 +908,14 @@ int register_ftrace_event(struct trace_event *event)
ret = event->type;
out:
up_write(&trace_event_mutex);
up_write(&trace_event_sem);
return ret;
}
EXPORT_SYMBOL_GPL(register_ftrace_event);
/*
* Used by module code with the trace_event_mutex held for write.
* Used by module code with the trace_event_sem held for write.
*/
int __unregister_ftrace_event(struct trace_event *event)
{
......@@ -888,9 +930,9 @@ int __unregister_ftrace_event(struct trace_event *event)
*/
int unregister_ftrace_event(struct trace_event *event)
{
down_write(&trace_event_mutex);
down_write(&trace_event_sem);
__unregister_ftrace_event(event);
up_write(&trace_event_mutex);
up_write(&trace_event_sem);
return 0;
}
......@@ -1217,6 +1259,64 @@ static struct trace_event trace_user_stack_event = {
.funcs = &trace_user_stack_funcs,
};
/* TRACE_BPUTS */
static enum print_line_t
trace_bputs_print(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct trace_entry *entry = iter->ent;
struct trace_seq *s = &iter->seq;
struct bputs_entry *field;
trace_assign_type(field, entry);
if (!seq_print_ip_sym(s, field->ip, flags))
goto partial;
if (!trace_seq_puts(s, ": "))
goto partial;
if (!trace_seq_puts(s, field->str))
goto partial;
return TRACE_TYPE_HANDLED;
partial:
return TRACE_TYPE_PARTIAL_LINE;
}
static enum print_line_t
trace_bputs_raw(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct bputs_entry *field;
struct trace_seq *s = &iter->seq;
trace_assign_type(field, iter->ent);
if (!trace_seq_printf(s, ": %lx : ", field->ip))
goto partial;
if (!trace_seq_puts(s, field->str))
goto partial;
return TRACE_TYPE_HANDLED;
partial:
return TRACE_TYPE_PARTIAL_LINE;
}
static struct trace_event_functions trace_bputs_funcs = {
.trace = trace_bputs_print,
.raw = trace_bputs_raw,
};
static struct trace_event trace_bputs_event = {
.type = TRACE_BPUTS,
.funcs = &trace_bputs_funcs,
};
/* TRACE_BPRINT */
static enum print_line_t
trace_bprint_print(struct trace_iterator *iter, int flags,
......@@ -1329,6 +1429,7 @@ static struct trace_event *events[] __initdata = {
&trace_wake_event,
&trace_stack_event,
&trace_user_stack_event,
&trace_bputs_event,
&trace_bprint_event,
&trace_print_event,
NULL
......
......@@ -4,6 +4,8 @@
#include <linux/trace_seq.h>
#include "trace.h"
extern enum print_line_t
trace_print_bputs_msg_only(struct trace_iterator *iter);
extern enum print_line_t
trace_print_bprintk_msg_only(struct trace_iterator *iter);
extern enum print_line_t
......@@ -31,7 +33,7 @@ trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
/* used by module unregistering */
extern int __unregister_ftrace_event(struct trace_event *event);
extern struct rw_semaphore trace_event_mutex;
extern struct rw_semaphore trace_event_sem;
#define MAX_MEMHEX_BYTES 8
#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
......
......@@ -28,7 +28,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
unsigned long flags, int pc)
{
struct ftrace_event_call *call = &event_context_switch;
struct ring_buffer *buffer = tr->buffer;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
......@@ -69,7 +69,7 @@ probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *n
pc = preempt_count();
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = ctx_trace->data[cpu];
data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu);
if (likely(!atomic_read(&data->disabled)))
tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
......@@ -86,7 +86,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
struct ftrace_event_call *call = &event_wakeup;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
struct ring_buffer *buffer = tr->buffer;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
sizeof(*entry), flags, pc);
......@@ -123,7 +123,7 @@ probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
pc = preempt_count();
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = ctx_trace->data[cpu];
data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu);
if (likely(!atomic_read(&data->disabled)))
tracing_sched_wakeup_trace(ctx_trace, wakee, current,
......
......@@ -37,6 +37,7 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
static void wakeup_graph_return(struct ftrace_graph_ret *trace);
static int save_flags;
static bool function_enabled;
#define TRACE_DISPLAY_GRAPH 1
......@@ -89,7 +90,7 @@ func_prolog_preempt_disable(struct trace_array *tr,
if (cpu != wakeup_current_cpu)
goto out_enable;
*data = tr->data[cpu];
*data = per_cpu_ptr(tr->trace_buffer.data, cpu);
disabled = atomic_inc_return(&(*data)->disabled);
if (unlikely(disabled != 1))
goto out;
......@@ -134,15 +135,60 @@ static struct ftrace_ops trace_ops __read_mostly =
};
#endif /* CONFIG_FUNCTION_TRACER */
static int start_func_tracer(int graph)
static int register_wakeup_function(int graph, int set)
{
int ret;
if (!graph)
ret = register_ftrace_function(&trace_ops);
else
/* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION)))
return 0;
if (graph)
ret = register_ftrace_graph(&wakeup_graph_return,
&wakeup_graph_entry);
else
ret = register_ftrace_function(&trace_ops);
if (!ret)
function_enabled = true;
return ret;
}
static void unregister_wakeup_function(int graph)
{
if (!function_enabled)
return;
if (graph)
unregister_ftrace_graph();
else
unregister_ftrace_function(&trace_ops);
function_enabled = false;
}
static void wakeup_function_set(int set)
{
if (set)
register_wakeup_function(is_graph(), 1);
else
unregister_wakeup_function(is_graph());
}
static int wakeup_flag_changed(struct tracer *tracer, u32 mask, int set)
{
if (mask & TRACE_ITER_FUNCTION)
wakeup_function_set(set);
return trace_keep_overwrite(tracer, mask, set);
}
static int start_func_tracer(int graph)
{
int ret;
ret = register_wakeup_function(graph, 0);
if (!ret && tracing_is_enabled())
tracer_enabled = 1;
......@@ -156,10 +202,7 @@ static void stop_func_tracer(int graph)
{
tracer_enabled = 0;
if (!graph)
unregister_ftrace_function(&trace_ops);
else
unregister_ftrace_graph();
unregister_wakeup_function(graph);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
......@@ -353,7 +396,7 @@ probe_wakeup_sched_switch(void *ignore,
/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
if (likely(disabled != 1))
goto out;
......@@ -365,7 +408,7 @@ probe_wakeup_sched_switch(void *ignore,
goto out_unlock;
/* The task we are waiting for is waking up */
data = wakeup_trace->data[wakeup_cpu];
data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu);
__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
......@@ -387,7 +430,7 @@ probe_wakeup_sched_switch(void *ignore,
arch_spin_unlock(&wakeup_lock);
local_irq_restore(flags);
out:
atomic_dec(&wakeup_trace->data[cpu]->disabled);
atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
}
static void __wakeup_reset(struct trace_array *tr)
......@@ -405,7 +448,7 @@ static void wakeup_reset(struct trace_array *tr)
{
unsigned long flags;
tracing_reset_online_cpus(tr);
tracing_reset_online_cpus(&tr->trace_buffer);
local_irq_save(flags);
arch_spin_lock(&wakeup_lock);
......@@ -435,7 +478,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
return;
pc = preempt_count();
disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
if (unlikely(disabled != 1))
goto out;
......@@ -458,7 +501,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
local_save_flags(flags);
data = wakeup_trace->data[wakeup_cpu];
data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu);
data->preempt_timestamp = ftrace_now(cpu);
tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
......@@ -472,7 +515,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
out_locked:
arch_spin_unlock(&wakeup_lock);
out:
atomic_dec(&wakeup_trace->data[cpu]->disabled);
atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
}
static void start_wakeup_tracer(struct trace_array *tr)
......@@ -543,8 +586,8 @@ static int __wakeup_tracer_init(struct trace_array *tr)
save_flags = trace_flags;
/* non overwrite screws up the latency tracers */
set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
tracing_max_latency = 0;
wakeup_trace = tr;
......@@ -573,8 +616,8 @@ static void wakeup_tracer_reset(struct trace_array *tr)
/* make sure we put back any tasks we are tracing */
wakeup_reset(tr);
set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
}
static void wakeup_tracer_start(struct trace_array *tr)
......@@ -600,7 +643,7 @@ static struct tracer wakeup_tracer __read_mostly =
.print_line = wakeup_print_line,
.flags = &tracer_flags,
.set_flag = wakeup_set_flag,
.flag_changed = trace_keep_overwrite,
.flag_changed = wakeup_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
......@@ -622,7 +665,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
.print_line = wakeup_print_line,
.flags = &tracer_flags,
.set_flag = wakeup_set_flag,
.flag_changed = trace_keep_overwrite,
.flag_changed = wakeup_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
......
......@@ -21,13 +21,13 @@ static inline int trace_valid_entry(struct trace_entry *entry)
return 0;
}
static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu)
{
struct ring_buffer_event *event;
struct trace_entry *entry;
unsigned int loops = 0;
while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) {
while ((event = ring_buffer_consume(buf->buffer, cpu, NULL, NULL))) {
entry = ring_buffer_event_data(event);
/*
......@@ -58,7 +58,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
* Test the trace buffer to see if all the elements
* are still sane.
*/
static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count)
{
unsigned long flags, cnt = 0;
int cpu, ret = 0;
......@@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
local_irq_save(flags);
arch_spin_lock(&ftrace_max_lock);
cnt = ring_buffer_entries(tr->buffer);
cnt = ring_buffer_entries(buf->buffer);
/*
* The trace_test_buffer_cpu runs a while loop to consume all data.
......@@ -78,7 +78,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
*/
tracing_off();
for_each_possible_cpu(cpu) {
ret = trace_test_buffer_cpu(tr, cpu);
ret = trace_test_buffer_cpu(buf, cpu);
if (ret)
break;
}
......@@ -355,7 +355,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
msleep(100);
/* we should have nothing in the buffer */
ret = trace_test_buffer(tr, &count);
ret = trace_test_buffer(&tr->trace_buffer, &count);
if (ret)
goto out;
......@@ -376,7 +376,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
ret = trace_test_buffer(&tr->trace_buffer, &count);
tracing_start();
/* we should only have one item */
......@@ -666,7 +666,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
ret = trace_test_buffer(&tr->trace_buffer, &count);
trace->reset(tr);
tracing_start();
......@@ -703,8 +703,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
/* Maximum number of functions to trace before diagnosing a hang */
#define GRAPH_MAX_FUNC_TEST 100000000
static void
__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode);
static unsigned int graph_hang_thresh;
/* Wrap the real function entry probe to avoid possible hanging */
......@@ -714,8 +712,11 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
ftrace_graph_stop();
printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
if (ftrace_dump_on_oops)
__ftrace_dump(false, DUMP_ALL);
if (ftrace_dump_on_oops) {
ftrace_dump(DUMP_ALL);
/* ftrace_dump() disables tracing */
tracing_on();
}
return 0;
}
......@@ -737,7 +738,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
* Simulate the init() callback but we attach a watchdog callback
* to detect and recover from possible hangs
*/
tracing_reset_online_cpus(tr);
tracing_reset_online_cpus(&tr->trace_buffer);
set_graph_array(tr);
ret = register_ftrace_graph(&trace_graph_return,
&trace_graph_entry_watchdog);
......@@ -760,7 +761,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
ret = trace_test_buffer(&tr->trace_buffer, &count);
trace->reset(tr);
tracing_start();
......@@ -815,9 +816,9 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
/* stop the tracing. */
tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
ret = trace_test_buffer(&tr->trace_buffer, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
ret = trace_test_buffer(&tr->max_buffer, &count);
trace->reset(tr);
tracing_start();
......@@ -877,9 +878,9 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
/* stop the tracing. */
tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
ret = trace_test_buffer(&tr->trace_buffer, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
ret = trace_test_buffer(&tr->max_buffer, &count);
trace->reset(tr);
tracing_start();
......@@ -943,11 +944,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
/* stop the tracing. */
tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
ret = trace_test_buffer(&tr->trace_buffer, NULL);
if (ret)
goto out;
ret = trace_test_buffer(&max_tr, &count);
ret = trace_test_buffer(&tr->max_buffer, &count);
if (ret)
goto out;
......@@ -973,11 +974,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
/* stop the tracing. */
tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
ret = trace_test_buffer(&tr->trace_buffer, NULL);
if (ret)
goto out;
ret = trace_test_buffer(&max_tr, &count);
ret = trace_test_buffer(&tr->max_buffer, &count);
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
......@@ -1084,10 +1085,10 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
/* stop the tracing. */
tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
ret = trace_test_buffer(&tr->trace_buffer, NULL);
printk("ret = %d\n", ret);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
ret = trace_test_buffer(&tr->max_buffer, &count);
trace->reset(tr);
......@@ -1126,7 +1127,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
/* stop the tracing. */
tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
ret = trace_test_buffer(&tr->trace_buffer, &count);
trace->reset(tr);
tracing_start();
......
......@@ -20,13 +20,24 @@
#define STACK_TRACE_ENTRIES 500
#ifdef CC_USING_FENTRY
# define fentry 1
#else
# define fentry 0
#endif
static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
{ [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
/*
* Reserve one entry for the passed in ip. This will allow
* us to remove most or all of the stack size overhead
* added by the stack tracer itself.
*/
static struct stack_trace max_stack_trace = {
.max_entries = STACK_TRACE_ENTRIES,
.entries = stack_dump_trace,
.max_entries = STACK_TRACE_ENTRIES - 1,
.entries = &stack_dump_trace[1],
};
static unsigned long max_stack_size;
......@@ -39,25 +50,34 @@ static DEFINE_MUTEX(stack_sysctl_mutex);
int stack_tracer_enabled;
static int last_stack_tracer_enabled;
static inline void check_stack(void)
static inline void
check_stack(unsigned long ip, unsigned long *stack)
{
unsigned long this_size, flags;
unsigned long *p, *top, *start;
static int tracer_frame;
int frame_size = ACCESS_ONCE(tracer_frame);
int i;
this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
this_size = THREAD_SIZE - this_size;
/* Remove the frame of the tracer */
this_size -= frame_size;
if (this_size <= max_stack_size)
return;
/* we do not handle interrupt stacks yet */
if (!object_is_on_stack(&this_size))
if (!object_is_on_stack(stack))
return;
local_irq_save(flags);
arch_spin_lock(&max_stack_lock);
/* In case another CPU set the tracer_frame on us */
if (unlikely(!frame_size))
this_size -= tracer_frame;
/* a race could have already updated it */
if (this_size <= max_stack_size)
goto out;
......@@ -69,11 +89,19 @@ static inline void check_stack(void)
save_stack_trace(&max_stack_trace);
/*
* Add the passed in ip from the function tracer.
* Searching for this on the stack will skip over
* most of the overhead from the stack tracer itself.
*/
stack_dump_trace[0] = ip;
max_stack_trace.nr_entries++;
/*
* Now find where in the stack these are.
*/
i = 0;
start = &this_size;
start = stack;
top = (unsigned long *)
(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
......@@ -97,6 +125,18 @@ static inline void check_stack(void)
found = 1;
/* Start the search from here */
start = p + 1;
/*
* We do not want to show the overhead
* of the stack tracer stack in the
* max stack. If we haven't figured
* out what that is, then figure it out
* now.
*/
if (unlikely(!tracer_frame) && i == 1) {
tracer_frame = (p - stack) *
sizeof(unsigned long);
max_stack_size -= tracer_frame;
}
}
}
......@@ -113,6 +153,7 @@ static void
stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
unsigned long stack;
int cpu;
preempt_disable_notrace();
......@@ -122,7 +163,26 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
if (per_cpu(trace_active, cpu)++ != 0)
goto out;
check_stack();
/*
* When fentry is used, the traced function does not get
* its stack frame set up, and we lose the parent.
* The ip is pretty useless because the function tracer
* was called before that function set up its stack frame.
* In this case, we use the parent ip.
*
* By adding the return address of either the parent ip
* or the current ip we can disregard most of the stack usage
* caused by the stack tracer itself.
*
* The function tracer always reports the address of where the
* mcount call was, but the stack will hold the return address.
*/
if (fentry)
ip = parent_ip;
else
ip += MCOUNT_INSN_SIZE;
check_stack(ip, &stack);
out:
per_cpu(trace_active, cpu)--;
......@@ -371,6 +431,8 @@ static __init int stack_trace_init(void)
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
if (!d_tracer)
return 0;
trace_create_file("stack_max_size", 0644, d_tracer,
&max_stack_size, &stack_max_size_fops);
......
......@@ -307,6 +307,8 @@ static int tracing_stat_init(void)
struct dentry *d_tracing;
d_tracing = tracing_init_dentry();
if (!d_tracing)
return 0;
stat_dir = debugfs_create_dir("trace_stat", d_tracing);
if (!stat_dir)
......
......@@ -12,10 +12,6 @@
#include "trace.h"
static DEFINE_MUTEX(syscall_trace_lock);
static int sys_refcount_enter;
static int sys_refcount_exit;
static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
static int syscall_enter_register(struct ftrace_event_call *event,
enum trace_reg type, void *data);
......@@ -41,7 +37,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
/*
* Only compare after the "sys" prefix. Archs that use
* syscall wrappers may have syscalls symbols aliases prefixed
* with "SyS" instead of "sys", leading to an unwanted
* with ".SyS" or ".sys" instead of "sys", leading to an unwanted
* mismatch.
*/
return !strcmp(sym + 3, name + 3);
......@@ -265,7 +261,7 @@ static void free_syscall_print_fmt(struct ftrace_event_call *call)
kfree(call->print_fmt);
}
static int syscall_enter_define_fields(struct ftrace_event_call *call)
static int __init syscall_enter_define_fields(struct ftrace_event_call *call)
{
struct syscall_trace_enter trace;
struct syscall_metadata *meta = call->data;
......@@ -288,7 +284,7 @@ static int syscall_enter_define_fields(struct ftrace_event_call *call)
return ret;
}
static int syscall_exit_define_fields(struct ftrace_event_call *call)
static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
{
struct syscall_trace_exit trace;
int ret;
......@@ -303,8 +299,9 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
return ret;
}
static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
{
struct trace_array *tr = data;
struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
......@@ -315,7 +312,7 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0)
return;
if (!test_bit(syscall_nr, enabled_enter_syscalls))
if (!test_bit(syscall_nr, tr->enabled_enter_syscalls))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
......@@ -324,7 +321,8 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
event = trace_current_buffer_lock_reserve(&buffer,
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer,
sys_data->enter_event->event.type, size, 0, 0);
if (!event)
return;
......@@ -338,8 +336,9 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
{
struct trace_array *tr = data;
struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
......@@ -349,14 +348,15 @@ static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0)
return;
if (!test_bit(syscall_nr, enabled_exit_syscalls))
if (!test_bit(syscall_nr, tr->enabled_exit_syscalls))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
if (!sys_data)
return;
event = trace_current_buffer_lock_reserve(&buffer,
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer,
sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
if (!event)
return;
......@@ -370,8 +370,10 @@ static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
static int reg_event_syscall_enter(struct ftrace_event_call *call)
static int reg_event_syscall_enter(struct ftrace_event_file *file,
struct ftrace_event_call *call)
{
struct trace_array *tr = file->tr;
int ret = 0;
int num;
......@@ -379,33 +381,37 @@ static int reg_event_syscall_enter(struct ftrace_event_call *call)
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_enter)
ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
if (!tr->sys_refcount_enter)
ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
if (!ret) {
set_bit(num, enabled_enter_syscalls);
sys_refcount_enter++;
set_bit(num, tr->enabled_enter_syscalls);
tr->sys_refcount_enter++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
}
static void unreg_event_syscall_enter(struct ftrace_event_call *call)
static void unreg_event_syscall_enter(struct ftrace_event_file *file,
struct ftrace_event_call *call)
{
struct trace_array *tr = file->tr;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return;
mutex_lock(&syscall_trace_lock);
sys_refcount_enter--;
clear_bit(num, enabled_enter_syscalls);
if (!sys_refcount_enter)
unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
tr->sys_refcount_enter--;
clear_bit(num, tr->enabled_enter_syscalls);
if (!tr->sys_refcount_enter)
unregister_trace_sys_enter(ftrace_syscall_enter, tr);
mutex_unlock(&syscall_trace_lock);
}
static int reg_event_syscall_exit(struct ftrace_event_call *call)
static int reg_event_syscall_exit(struct ftrace_event_file *file,
struct ftrace_event_call *call)
{
struct trace_array *tr = file->tr;
int ret = 0;
int num;
......@@ -413,28 +419,30 @@ static int reg_event_syscall_exit(struct ftrace_event_call *call)
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_exit)
ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
if (!tr->sys_refcount_exit)
ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
if (!ret) {
set_bit(num, enabled_exit_syscalls);
sys_refcount_exit++;
set_bit(num, tr->enabled_exit_syscalls);
tr->sys_refcount_exit++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
}
static void unreg_event_syscall_exit(struct ftrace_event_call *call)
static void unreg_event_syscall_exit(struct ftrace_event_file *file,
struct ftrace_event_call *call)
{
struct trace_array *tr = file->tr;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return;
mutex_lock(&syscall_trace_lock);
sys_refcount_exit--;
clear_bit(num, enabled_exit_syscalls);
if (!sys_refcount_exit)
unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
tr->sys_refcount_exit--;
clear_bit(num, tr->enabled_exit_syscalls);
if (!tr->sys_refcount_exit)
unregister_trace_sys_exit(ftrace_syscall_exit, tr);
mutex_unlock(&syscall_trace_lock);
}
......@@ -471,7 +479,7 @@ struct trace_event_functions exit_syscall_print_funcs = {
.trace = print_syscall_exit,
};
struct ftrace_event_class event_class_syscall_enter = {
struct ftrace_event_class __refdata event_class_syscall_enter = {
.system = "syscalls",
.reg = syscall_enter_register,
.define_fields = syscall_enter_define_fields,
......@@ -479,7 +487,7 @@ struct ftrace_event_class event_class_syscall_enter = {
.raw_init = init_syscall_trace,
};
struct ftrace_event_class event_class_syscall_exit = {
struct ftrace_event_class __refdata event_class_syscall_exit = {
.system = "syscalls",
.reg = syscall_exit_register,
.define_fields = syscall_exit_define_fields,
......@@ -685,11 +693,13 @@ static void perf_sysexit_disable(struct ftrace_event_call *call)
static int syscall_enter_register(struct ftrace_event_call *event,
enum trace_reg type, void *data)
{
struct ftrace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
return reg_event_syscall_enter(event);
return reg_event_syscall_enter(file, event);
case TRACE_REG_UNREGISTER:
unreg_event_syscall_enter(event);
unreg_event_syscall_enter(file, event);
return 0;
#ifdef CONFIG_PERF_EVENTS
......@@ -711,11 +721,13 @@ static int syscall_enter_register(struct ftrace_event_call *event,
static int syscall_exit_register(struct ftrace_event_call *event,
enum trace_reg type, void *data)
{
struct ftrace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
return reg_event_syscall_exit(event);
return reg_event_syscall_exit(file, event);
case TRACE_REG_UNREGISTER:
unreg_event_syscall_exit(event);
unreg_event_syscall_exit(file, event);
return 0;
#ifdef CONFIG_PERF_EVENTS
......
......@@ -112,7 +112,8 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry,
int nr_probes = 0;
struct tracepoint_func *old, *new;
WARN_ON(!probe);
if (WARN_ON(!probe))
return ERR_PTR(-EINVAL);
debug_print_probes(entry);
old = entry->funcs;
......@@ -152,13 +153,18 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry,
debug_print_probes(entry);
/* (N -> M), (N > 1, M >= 0) probes */
if (probe) {
for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
if (!probe ||
(old[nr_probes].func == probe &&
old[nr_probes].data == data))
if (old[nr_probes].func == probe &&
old[nr_probes].data == data)
nr_del++;
}
}
/*
* If probe is NULL, then nr_probes = nr_del = 0, and then the
* entire entry will be removed.
*/
if (nr_probes - nr_del == 0) {
/* N -> 0, (N > 1) */
entry->funcs = NULL;
......@@ -173,8 +179,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry,
if (new == NULL)
return ERR_PTR(-ENOMEM);
for (i = 0; old[i].func; i++)
if (probe &&
(old[i].func != probe || old[i].data != data))
if (old[i].func != probe || old[i].data != data)
new[j++] = old[i];
new[nr_probes - nr_del].func = NULL;
entry->refcount = nr_probes - nr_del;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment