Commit cdf072ac authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'trace-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing updates from Steven Rostedt:
 "Major changes:

   - Changed location of tracing repo from personal git repo to:
     git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git

   - Added Masami Hiramatsu as co-maintainer

   - Updated MAINTAINERS file to separate out FTRACE as it is more than
     just TRACING.

  Minor changes:

   - Added Mark Rutland as FTRACE reviewer

   - Updated user_events to make it on its way to remove the BROKEN tag.
     The changes should now be acceptable but will run it through a
     cycle and hopefully we can remove the BROKEN tag next release.

   - Added filtering to eprobes

   - Added a delta time to the benchmark trace event

   - Have the histogram and filter callbacks called via a switch
     statement instead of indirect functions. This speeds it up to avoid
     retpolines.

   - Add a way to wake up ring buffer waiters waiting for the ring
     buffer to fill up to its watermark.

   - New ioctl() on the trace_pipe_raw file to wake up ring buffer
     waiters.

   - Wake up waiters when the ring buffer is disabled. A reader may
     block when the ring buffer is disabled, but if it was blocked when
     the ring buffer is disabled it should then wake up.

  Fixes:

   - Allow splice to read partially read ring buffer pages. This fixes
     splice never moving forward.

   - Fix inverted compare that made the "shortest" ring buffer wait
     queue actually the longest.

   - Fix a race in the ring buffer between resetting a page when a
     writer goes to another page, and the reader.

   - Fix ftrace accounting bug when function hooks are added at boot up
     before the weak functions are set to "disabled".

   - Fix bug that freed a user allocated snapshot buffer when enabling a
     tracer.

   - Fix possible recursive locks in osnoise tracer

   - Fix recursive locking direct functions

   - Other minor clean ups and fixes"

* tag 'trace-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (44 commits)
  ftrace: Create separate entry in MAINTAINERS for function hooks
  tracing: Update MAINTAINERS to reflect new tracing git repo
  tracing: Do not free snapshot if tracer is on cmdline
  ftrace: Still disable enabled records marked as disabled
  tracing/user_events: Move pages/locks into groups to prepare for namespaces
  tracing: Add Masami Hiramatsu as co-maintainer
  tracing: Remove unused variable 'dups'
  MAINTAINERS: add myself as a tracing reviewer
  ring-buffer: Fix race between reset page and reading page
  tracing/user_events: Update ABI documentation to align to bits vs bytes
  tracing/user_events: Use bits vs bytes for enabled status page data
  tracing/user_events: Use refcount instead of atomic for ref tracking
  tracing/user_events: Ensure user provided strings are safely formatted
  tracing/user_events: Use WRITE instead of READ for io vector import
  tracing/user_events: Use NULL for strstr checks
  tracing: Fix spelling mistake "preapre" -> "prepare"
  tracing: Wake up waiters when tracing is disabled
  tracing: Add ioctl() to force ring buffer waiters to wake up
  tracing: Wake up ring buffer waiters on closing of the file
  ring-buffer: Add ring_buffer_wake_waiters()
  ...
parents dc553428 4f881a69
...@@ -20,14 +20,14 @@ dynamic_events is the same as the ioctl with the u: prefix applied. ...@@ -20,14 +20,14 @@ dynamic_events is the same as the ioctl with the u: prefix applied.
Typically programs will register a set of events that they wish to expose to Typically programs will register a set of events that they wish to expose to
tools that can read trace_events (such as ftrace and perf). The registration tools that can read trace_events (such as ftrace and perf). The registration
process gives back two ints to the program for each event. The first int is the process gives back two ints to the program for each event. The first int is
status index. This index describes which byte in the the status bit. This describes which bit in little-endian format in the
/sys/kernel/debug/tracing/user_events_status file represents this event. The /sys/kernel/debug/tracing/user_events_status file represents this event. The
second int is the write index. This index describes the data when a write() or second int is the write index which describes the data when a write() or
writev() is called on the /sys/kernel/debug/tracing/user_events_data file. writev() is called on the /sys/kernel/debug/tracing/user_events_data file.
The structures referenced in this document are contained with the The structures referenced in this document are contained within the
/include/uap/linux/user_events.h file in the source tree. /include/uapi/linux/user_events.h file in the source tree.
**NOTE:** *Both user_events_status and user_events_data are under the tracefs **NOTE:** *Both user_events_status and user_events_data are under the tracefs
filesystem and may be mounted at different paths than above.* filesystem and may be mounted at different paths than above.*
...@@ -38,18 +38,18 @@ Registering within a user process is done via ioctl() out to the ...@@ -38,18 +38,18 @@ Registering within a user process is done via ioctl() out to the
/sys/kernel/debug/tracing/user_events_data file. The command to issue is /sys/kernel/debug/tracing/user_events_data file. The command to issue is
DIAG_IOCSREG. DIAG_IOCSREG.
This command takes a struct user_reg as an argument:: This command takes a packed struct user_reg as an argument::
struct user_reg { struct user_reg {
u32 size; u32 size;
u64 name_args; u64 name_args;
u32 status_index; u32 status_bit;
u32 write_index; u32 write_index;
}; };
The struct user_reg requires two inputs, the first is the size of the structure The struct user_reg requires two inputs, the first is the size of the structure
to ensure forward and backward compatibility. The second is the command string to ensure forward and backward compatibility. The second is the command string
to issue for registering. Upon success two outputs are set, the status index to issue for registering. Upon success two outputs are set, the status bit
and the write index. and the write index.
User based events show up under tracefs like any other event under the User based events show up under tracefs like any other event under the
...@@ -111,15 +111,56 @@ in realtime. This allows user programs to only incur the cost of the write() or ...@@ -111,15 +111,56 @@ in realtime. This allows user programs to only incur the cost of the write() or
writev() calls when something is actively attached to the event. writev() calls when something is actively attached to the event.
User programs call mmap() on /sys/kernel/debug/tracing/user_events_status to User programs call mmap() on /sys/kernel/debug/tracing/user_events_status to
check the status for each event that is registered. The byte to check in the check the status for each event that is registered. The bit to check in the
file is given back after the register ioctl() via user_reg.status_index. file is given back after the register ioctl() via user_reg.status_bit. The bit
is always in little-endian format. Programs can check if the bit is set either
using a byte-wise index with a mask or a long-wise index with a little-endian
mask.
Currently the size of user_events_status is a single page, however, custom Currently the size of user_events_status is a single page, however, custom
kernel configurations can change this size to allow more user based events. In kernel configurations can change this size to allow more user based events. In
all cases the size of the file is a multiple of a page size. all cases the size of the file is a multiple of a page size.
For example, if the register ioctl() gives back a status_index of 3 you would For example, if the register ioctl() gives back a status_bit of 3 you would
check byte 3 of the returned mmap data to see if anything is attached to that check byte 0 (3 / 8) of the returned mmap data and then AND the result with 8
event. (1 << (3 % 8)) to see if anything is attached to that event.
A byte-wise index check is performed as follows::
int index, mask;
char *status_page;
index = status_bit / 8;
mask = 1 << (status_bit % 8);
...
if (status_page[index] & mask) {
/* Enabled */
}
A long-wise index check is performed as follows::
#include <asm/bitsperlong.h>
#include <endian.h>
#if __BITS_PER_LONG == 64
#define endian_swap(x) htole64(x)
#else
#define endian_swap(x) htole32(x)
#endif
long index, mask, *status_page;
index = status_bit / __BITS_PER_LONG;
mask = 1L << (status_bit % __BITS_PER_LONG);
mask = endian_swap(mask);
...
if (status_page[index] & mask) {
/* Enabled */
}
Administrators can easily check the status of all registered events by reading Administrators can easily check the status of all registered events by reading
the user_events_status file directly via a terminal. The output is as follows:: the user_events_status file directly via a terminal. The output is as follows::
...@@ -137,7 +178,7 @@ For example, on a system that has a single event the output looks like this:: ...@@ -137,7 +178,7 @@ For example, on a system that has a single event the output looks like this::
Active: 1 Active: 1
Busy: 0 Busy: 0
Max: 4096 Max: 32768
If a user enables the user event via ftrace, the output would change to this:: If a user enables the user event via ftrace, the output would change to this::
...@@ -145,21 +186,10 @@ If a user enables the user event via ftrace, the output would change to this:: ...@@ -145,21 +186,10 @@ If a user enables the user event via ftrace, the output would change to this::
Active: 1 Active: 1
Busy: 1 Busy: 1
Max: 4096 Max: 32768
**NOTE:** *A status index of 0 will never be returned. This allows user
programs to have an index that can be used on error cases.*
Status Bits
^^^^^^^^^^^
The byte being checked will be non-zero if anything is attached. Programs can
check specific bits in the byte to see what mechanism has been attached.
The following values are defined to aid in checking what has been attached:
**EVENT_STATUS_FTRACE** - Bit set if ftrace has been attached (Bit 0).
**EVENT_STATUS_PERF** - Bit set if perf has been attached (Bit 1). **NOTE:** *A status bit of 0 will never be returned. This allows user programs
to have a bit that can be used on error cases.*
Writing Data Writing Data
------------ ------------
......
...@@ -8433,6 +8433,19 @@ L: platform-driver-x86@vger.kernel.org ...@@ -8433,6 +8433,19 @@ L: platform-driver-x86@vger.kernel.org
S: Maintained S: Maintained
F: drivers/platform/x86/fujitsu-tablet.c F: drivers/platform/x86/fujitsu-tablet.c
FUNCTION HOOKS (FTRACE)
M: Steven Rostedt <rostedt@goodmis.org>
M: Masami Hiramatsu <mhiramat@kernel.org>
R: Mark Rutland <mark.rutland@arm.com>
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
F: Documentation/trace/ftrace*
F: kernel/trace/ftrace*
F: kernel/trace/fgraph.c
F: arch/*/*/*/*ftrace*
F: arch/*/*/*ftrace*
F: include/*/ftrace.h
FUNGIBLE ETHERNET DRIVERS FUNGIBLE ETHERNET DRIVERS
M: Dimitris Michailidis <dmichail@fungible.com> M: Dimitris Michailidis <dmichail@fungible.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
...@@ -11422,7 +11435,7 @@ M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> ...@@ -11422,7 +11435,7 @@ M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
M: "David S. Miller" <davem@davemloft.net> M: "David S. Miller" <davem@davemloft.net>
M: Masami Hiramatsu <mhiramat@kernel.org> M: Masami Hiramatsu <mhiramat@kernel.org>
S: Maintained S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
F: Documentation/trace/kprobes.rst F: Documentation/trace/kprobes.rst
F: include/asm-generic/kprobes.h F: include/asm-generic/kprobes.h
F: include/linux/kprobes.h F: include/linux/kprobes.h
...@@ -20771,14 +20784,11 @@ F: drivers/hwmon/pmbus/tps546d24.c ...@@ -20771,14 +20784,11 @@ F: drivers/hwmon/pmbus/tps546d24.c
TRACING TRACING
M: Steven Rostedt <rostedt@goodmis.org> M: Steven Rostedt <rostedt@goodmis.org>
M: Ingo Molnar <mingo@redhat.com> M: Masami Hiramatsu <mhiramat@kernel.org>
S: Maintained S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
F: Documentation/trace/ftrace.rst F: Documentation/trace/*
F: arch/*/*/*/*ftrace*
F: arch/*/*/*ftrace*
F: fs/tracefs/ F: fs/tracefs/
F: include/*/ftrace.h
F: include/linux/trace*.h F: include/linux/trace*.h
F: include/trace/ F: include/trace/
F: kernel/trace/ F: kernel/trace/
...@@ -20787,7 +20797,7 @@ F: tools/testing/selftests/ftrace/ ...@@ -20787,7 +20797,7 @@ F: tools/testing/selftests/ftrace/
TRACING MMIO ACCESSES (MMIOTRACE) TRACING MMIO ACCESSES (MMIOTRACE)
M: Steven Rostedt <rostedt@goodmis.org> M: Steven Rostedt <rostedt@goodmis.org>
M: Ingo Molnar <mingo@kernel.org> M: Masami Hiramatsu <mhiramat@kernel.org>
R: Karol Herbst <karolherbst@gmail.com> R: Karol Herbst <karolherbst@gmail.com>
R: Pekka Paalanen <ppaalanen@gmail.com> R: Pekka Paalanen <ppaalanen@gmail.com>
L: linux-kernel@vger.kernel.org L: linux-kernel@vger.kernel.org
......
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
extern atomic_t modifying_ftrace_code;
extern void __fentry__(void); extern void __fentry__(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr) static inline unsigned long ftrace_call_adjust(unsigned long addr)
......
...@@ -50,8 +50,6 @@ extern const int kretprobe_blacklist_size; ...@@ -50,8 +50,6 @@ extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p); void arch_remove_kprobe(struct kprobe *p);
extern void arch_kprobe_override_function(struct pt_regs *regs);
/* Architecture specific copy of original instruction*/ /* Architecture specific copy of original instruction*/
struct arch_specific_insn { struct arch_specific_insn {
/* copy of the original instruction */ /* copy of the original instruction */
......
...@@ -59,8 +59,6 @@ ...@@ -59,8 +59,6 @@
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
#define stack_addr(regs) ((unsigned long *)regs->sp)
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
(b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
......
...@@ -1122,47 +1122,6 @@ static inline void unpause_graph_tracing(void) { } ...@@ -1122,47 +1122,6 @@ static inline void unpause_graph_tracing(void) { }
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_TRACING #ifdef CONFIG_TRACING
/* flags for current->trace */
enum {
TSK_TRACE_FL_TRACE_BIT = 0,
TSK_TRACE_FL_GRAPH_BIT = 1,
};
enum {
TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
};
static inline void set_tsk_trace_trace(struct task_struct *tsk)
{
set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
}
static inline void clear_tsk_trace_trace(struct task_struct *tsk)
{
clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
}
static inline int test_tsk_trace_trace(struct task_struct *tsk)
{
return tsk->trace & TSK_TRACE_FL_TRACE;
}
static inline void set_tsk_trace_graph(struct task_struct *tsk)
{
set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
}
static inline void clear_tsk_trace_graph(struct task_struct *tsk)
{
clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
}
static inline int test_tsk_trace_graph(struct task_struct *tsk)
{
return tsk->trace & TSK_TRACE_FL_GRAPH;
}
enum ftrace_dump_mode; enum ftrace_dump_mode;
extern enum ftrace_dump_mode ftrace_dump_on_oops; extern enum ftrace_dump_mode ftrace_dump_on_oops;
......
...@@ -101,7 +101,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k ...@@ -101,7 +101,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table); struct file *filp, poll_table *poll_table);
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
#define RING_BUFFER_ALL_CPUS -1 #define RING_BUFFER_ALL_CPUS -1
......
...@@ -1390,9 +1390,6 @@ struct task_struct { ...@@ -1390,9 +1390,6 @@ struct task_struct {
#endif #endif
#ifdef CONFIG_TRACING #ifdef CONFIG_TRACING
/* State flags for use by tracers: */
unsigned long trace;
/* Bitmask and counter of trace recursion: */ /* Bitmask and counter of trace recursion: */
unsigned long trace_recursion; unsigned long trace_recursion;
#endif /* CONFIG_TRACING */ #endif /* CONFIG_TRACING */
......
...@@ -92,6 +92,7 @@ struct trace_iterator { ...@@ -92,6 +92,7 @@ struct trace_iterator {
unsigned int temp_size; unsigned int temp_size;
char *fmt; /* modified format holder */ char *fmt; /* modified format holder */
unsigned int fmt_size; unsigned int fmt_size;
long wait_index;
/* trace_seq for __print_flags() and __print_symbolic() etc. */ /* trace_seq for __print_flags() and __print_symbolic() etc. */
struct trace_seq tmp_seq; struct trace_seq tmp_seq;
......
...@@ -20,15 +20,6 @@ ...@@ -20,15 +20,6 @@
#define USER_EVENTS_SYSTEM "user_events" #define USER_EVENTS_SYSTEM "user_events"
#define USER_EVENTS_PREFIX "u:" #define USER_EVENTS_PREFIX "u:"
/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */
#define EVENT_BIT_FTRACE 0
#define EVENT_BIT_PERF 1
#define EVENT_BIT_OTHER 7
#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE)
#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF)
#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER)
/* Create dynamic location entry within a 32-bit value */ /* Create dynamic location entry within a 32-bit value */
#define DYN_LOC(offset, size) ((size) << 16 | (offset)) #define DYN_LOC(offset, size) ((size) << 16 | (offset))
...@@ -45,12 +36,12 @@ struct user_reg { ...@@ -45,12 +36,12 @@ struct user_reg {
/* Input: Pointer to string with event name, description and flags */ /* Input: Pointer to string with event name, description and flags */
__u64 name_args; __u64 name_args;
/* Output: Byte index of the event within the status page */ /* Output: Bitwise index of the event within the status page */
__u32 status_index; __u32 status_bit;
/* Output: Index of the event to use when writing data */ /* Output: Index of the event to use when writing data */
__u32 write_index; __u32 write_index;
}; } __attribute__((__packed__));
#define DIAG_IOC_MAGIC '*' #define DIAG_IOC_MAGIC '*'
......
...@@ -1644,6 +1644,18 @@ ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_ex ...@@ -1644,6 +1644,18 @@ ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_ex
static struct ftrace_ops * static struct ftrace_ops *
ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
static bool skip_record(struct dyn_ftrace *rec)
{
/*
* At boot up, weak functions are set to disable. Function tracing
* can be enabled before they are, and they still need to be disabled now.
* If the record is disabled, still continue if it is marked as already
* enabled (this is needed to keep the accounting working).
*/
return rec->flags & FTRACE_FL_DISABLED &&
!(rec->flags & FTRACE_FL_ENABLED);
}
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash, int filter_hash,
bool inc) bool inc)
...@@ -1693,7 +1705,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, ...@@ -1693,7 +1705,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int in_hash = 0; int in_hash = 0;
int match = 0; int match = 0;
if (rec->flags & FTRACE_FL_DISABLED) if (skip_record(rec))
continue; continue;
if (all) { if (all) {
...@@ -2126,7 +2138,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) ...@@ -2126,7 +2138,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
ftrace_bug_type = FTRACE_BUG_UNKNOWN; ftrace_bug_type = FTRACE_BUG_UNKNOWN;
if (rec->flags & FTRACE_FL_DISABLED) if (skip_record(rec))
return FTRACE_UPDATE_IGNORE; return FTRACE_UPDATE_IGNORE;
/* /*
...@@ -2241,7 +2253,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) ...@@ -2241,7 +2253,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
if (update) { if (update) {
/* If there's no more users, clear all flags */ /* If there's no more users, clear all flags */
if (!ftrace_rec_count(rec)) if (!ftrace_rec_count(rec))
rec->flags = 0; rec->flags &= FTRACE_FL_DISABLED;
else else
/* /*
* Just disable the record, but keep the ops TRAMP * Just disable the record, but keep the ops TRAMP
...@@ -2634,7 +2646,7 @@ void __weak ftrace_replace_code(int mod_flags) ...@@ -2634,7 +2646,7 @@ void __weak ftrace_replace_code(int mod_flags)
do_for_each_ftrace_rec(pg, rec) { do_for_each_ftrace_rec(pg, rec) {
if (rec->flags & FTRACE_FL_DISABLED) if (skip_record(rec))
continue; continue;
failed = __ftrace_replace_code(rec, enable); failed = __ftrace_replace_code(rec, enable);
...@@ -5427,6 +5439,8 @@ static struct ftrace_ops stub_ops = { ...@@ -5427,6 +5439,8 @@ static struct ftrace_ops stub_ops = {
* it is safe to modify the ftrace record, where it should be * it is safe to modify the ftrace record, where it should be
* currently calling @old_addr directly, to call @new_addr. * currently calling @old_addr directly, to call @new_addr.
* *
* This is called with direct_mutex locked.
*
* Safety checks should be made to make sure that the code at * Safety checks should be made to make sure that the code at
* @rec->ip is currently calling @old_addr. And this must * @rec->ip is currently calling @old_addr. And this must
* also update entry->direct to @new_addr. * also update entry->direct to @new_addr.
...@@ -5439,6 +5453,8 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry, ...@@ -5439,6 +5453,8 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
unsigned long ip = rec->ip; unsigned long ip = rec->ip;
int ret; int ret;
lockdep_assert_held(&direct_mutex);
/* /*
* The ftrace_lock was used to determine if the record * The ftrace_lock was used to determine if the record
* had more than one registered user to it. If it did, * had more than one registered user to it. If it did,
...@@ -5461,7 +5477,7 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry, ...@@ -5461,7 +5477,7 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
if (ret) if (ret)
goto out_lock; goto out_lock;
ret = register_ftrace_function(&stub_ops); ret = register_ftrace_function_nolock(&stub_ops);
if (ret) { if (ret) {
ftrace_set_filter_ip(&stub_ops, ip, 1, 0); ftrace_set_filter_ip(&stub_ops, ip, 1, 0);
goto out_lock; goto out_lock;
...@@ -6081,8 +6097,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file) ...@@ -6081,8 +6097,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
if (filter_hash) { if (filter_hash) {
orig_hash = &iter->ops->func_hash->filter_hash; orig_hash = &iter->ops->func_hash->filter_hash;
if (iter->tr && !list_empty(&iter->tr->mod_trace)) if (iter->tr) {
iter->hash->flags |= FTRACE_HASH_FL_MOD; if (list_empty(&iter->tr->mod_trace))
iter->hash->flags &= ~FTRACE_HASH_FL_MOD;
else
iter->hash->flags |= FTRACE_HASH_FL_MOD;
}
} else } else
orig_hash = &iter->ops->func_hash->notrace_hash; orig_hash = &iter->ops->func_hash->notrace_hash;
......
...@@ -35,6 +35,45 @@ ...@@ -35,6 +35,45 @@
static struct trace_event_file *gen_kprobe_test; static struct trace_event_file *gen_kprobe_test;
static struct trace_event_file *gen_kretprobe_test; static struct trace_event_file *gen_kretprobe_test;
#define KPROBE_GEN_TEST_FUNC "do_sys_open"
/* X86 */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_32)
#define KPROBE_GEN_TEST_ARG0 "dfd=%ax"
#define KPROBE_GEN_TEST_ARG1 "filename=%dx"
#define KPROBE_GEN_TEST_ARG2 "flags=%cx"
#define KPROBE_GEN_TEST_ARG3 "mode=+4($stack)"
/* ARM64 */
#elif defined(CONFIG_ARM64)
#define KPROBE_GEN_TEST_ARG0 "dfd=%x0"
#define KPROBE_GEN_TEST_ARG1 "filename=%x1"
#define KPROBE_GEN_TEST_ARG2 "flags=%x2"
#define KPROBE_GEN_TEST_ARG3 "mode=%x3"
/* ARM */
#elif defined(CONFIG_ARM)
#define KPROBE_GEN_TEST_ARG0 "dfd=%r0"
#define KPROBE_GEN_TEST_ARG1 "filename=%r1"
#define KPROBE_GEN_TEST_ARG2 "flags=%r2"
#define KPROBE_GEN_TEST_ARG3 "mode=%r3"
/* RISCV */
#elif defined(CONFIG_RISCV)
#define KPROBE_GEN_TEST_ARG0 "dfd=%a0"
#define KPROBE_GEN_TEST_ARG1 "filename=%a1"
#define KPROBE_GEN_TEST_ARG2 "flags=%a2"
#define KPROBE_GEN_TEST_ARG3 "mode=%a3"
/* others */
#else
#define KPROBE_GEN_TEST_ARG0 NULL
#define KPROBE_GEN_TEST_ARG1 NULL
#define KPROBE_GEN_TEST_ARG2 NULL
#define KPROBE_GEN_TEST_ARG3 NULL
#endif
/* /*
* Test to make sure we can create a kprobe event, then add more * Test to make sure we can create a kprobe event, then add more
* fields. * fields.
...@@ -58,14 +97,14 @@ static int __init test_gen_kprobe_cmd(void) ...@@ -58,14 +97,14 @@ static int __init test_gen_kprobe_cmd(void)
* fields. * fields.
*/ */
ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test", ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test",
"do_sys_open", KPROBE_GEN_TEST_FUNC,
"dfd=%ax", "filename=%dx"); KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1);
if (ret) if (ret)
goto free; goto free;
/* Use kprobe_event_add_fields to add the rest of the fields */ /* Use kprobe_event_add_fields to add the rest of the fields */
ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)"); ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3);
if (ret) if (ret)
goto free; goto free;
...@@ -128,7 +167,7 @@ static int __init test_gen_kretprobe_cmd(void) ...@@ -128,7 +167,7 @@ static int __init test_gen_kretprobe_cmd(void)
* Define the kretprobe event. * Define the kretprobe event.
*/ */
ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test", ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test",
"do_sys_open", KPROBE_GEN_TEST_FUNC,
"$retval"); "$retval");
if (ret) if (ret)
goto free; goto free;
...@@ -206,7 +245,7 @@ static void __exit kprobe_event_gen_test_exit(void) ...@@ -206,7 +245,7 @@ static void __exit kprobe_event_gen_test_exit(void)
WARN_ON(kprobe_event_delete("gen_kprobe_test")); WARN_ON(kprobe_event_delete("gen_kprobe_test"));
/* Disable the event or you can't remove it */ /* Disable the event or you can't remove it */
WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
"kprobes", "kprobes",
"gen_kretprobe_test", false)); "gen_kretprobe_test", false));
......
...@@ -413,6 +413,7 @@ struct rb_irq_work { ...@@ -413,6 +413,7 @@ struct rb_irq_work {
struct irq_work work; struct irq_work work;
wait_queue_head_t waiters; wait_queue_head_t waiters;
wait_queue_head_t full_waiters; wait_queue_head_t full_waiters;
long wait_index;
bool waiters_pending; bool waiters_pending;
bool full_waiters_pending; bool full_waiters_pending;
bool wakeup_full; bool wakeup_full;
...@@ -917,12 +918,44 @@ static void rb_wake_up_waiters(struct irq_work *work) ...@@ -917,12 +918,44 @@ static void rb_wake_up_waiters(struct irq_work *work)
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
wake_up_all(&rbwork->waiters); wake_up_all(&rbwork->waiters);
if (rbwork->wakeup_full) { if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
rbwork->wakeup_full = false; rbwork->wakeup_full = false;
rbwork->full_waiters_pending = false;
wake_up_all(&rbwork->full_waiters); wake_up_all(&rbwork->full_waiters);
} }
} }
/**
* ring_buffer_wake_waiters - wake up any waiters on this ring buffer
* @buffer: The ring buffer to wake waiters on
*
* In the case of a file that represents a ring buffer is closing,
* it is prudent to wake up any waiters that are on this.
*/
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *rbwork;
if (cpu == RING_BUFFER_ALL_CPUS) {
/* Wake up individual ones too. One level recursion */
for_each_buffer_cpu(buffer, cpu)
ring_buffer_wake_waiters(buffer, cpu);
rbwork = &buffer->irq_work;
} else {
cpu_buffer = buffer->buffers[cpu];
rbwork = &cpu_buffer->irq_work;
}
rbwork->wait_index++;
/* make sure the waiters see the new index */
smp_wmb();
rb_wake_up_waiters(&rbwork->work);
}
/** /**
* ring_buffer_wait - wait for input to the ring buffer * ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on * @buffer: buffer to wait on
...@@ -938,6 +971,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) ...@@ -938,6 +971,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
struct rb_irq_work *work; struct rb_irq_work *work;
long wait_index;
int ret = 0; int ret = 0;
/* /*
...@@ -956,6 +990,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) ...@@ -956,6 +990,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
work = &cpu_buffer->irq_work; work = &cpu_buffer->irq_work;
} }
wait_index = READ_ONCE(work->wait_index);
while (true) { while (true) {
if (full) if (full)
...@@ -1011,7 +1046,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) ...@@ -1011,7 +1046,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
nr_pages = cpu_buffer->nr_pages; nr_pages = cpu_buffer->nr_pages;
dirty = ring_buffer_nr_dirty_pages(buffer, cpu); dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
if (!cpu_buffer->shortest_full || if (!cpu_buffer->shortest_full ||
cpu_buffer->shortest_full < full) cpu_buffer->shortest_full > full)
cpu_buffer->shortest_full = full; cpu_buffer->shortest_full = full;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
if (!pagebusy && if (!pagebusy &&
...@@ -1020,6 +1055,11 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) ...@@ -1020,6 +1055,11 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
} }
schedule(); schedule();
/* Make sure to see the new wait index */
smp_rmb();
if (wait_index != work->wait_index)
break;
} }
if (full) if (full)
...@@ -2608,6 +2648,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -2608,6 +2648,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
/* Mark the rest of the page with padding */ /* Mark the rest of the page with padding */
rb_event_set_padding(event); rb_event_set_padding(event);
/* Make sure the padding is visible before the write update */
smp_wmb();
/* Set the write back to the previous setting */ /* Set the write back to the previous setting */
local_sub(length, &tail_page->write); local_sub(length, &tail_page->write);
return; return;
...@@ -2619,6 +2662,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -2619,6 +2662,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
/* time delta must be non zero */ /* time delta must be non zero */
event->time_delta = 1; event->time_delta = 1;
/* Make sure the padding is visible before the tail_page->write update */
smp_wmb();
/* Set write to end of buffer */ /* Set write to end of buffer */
length = (tail + length) - BUF_PAGE_SIZE; length = (tail + length) - BUF_PAGE_SIZE;
local_sub(length, &tail_page->write); local_sub(length, &tail_page->write);
...@@ -4587,6 +4633,33 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) ...@@ -4587,6 +4633,33 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
arch_spin_unlock(&cpu_buffer->lock); arch_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags); local_irq_restore(flags);
/*
* The writer has preempt disable, wait for it. But not forever
* Although, 1 second is pretty much "forever"
*/
#define USECS_WAIT 1000000
for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) {
/* If the write is past the end of page, a writer is still updating it */
if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE))
break;
udelay(1);
/* Get the latest version of the reader write value */
smp_rmb();
}
/* The writer is not moving forward? Something is wrong */
if (RB_WARN_ON(cpu_buffer, nr_loops == USECS_WAIT))
reader = NULL;
/*
* Make sure we see any padding after the write update
* (see rb_reset_tail())
*/
smp_rmb();
return reader; return reader;
} }
...@@ -5616,7 +5689,15 @@ int ring_buffer_read_page(struct trace_buffer *buffer, ...@@ -5616,7 +5689,15 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
unsigned int pos = 0; unsigned int pos = 0;
unsigned int size; unsigned int size;
if (full) /*
* If a full page is expected, this can still be returned
* if there's been a previous partial read and the
* rest of the page can be read and the commit page is off
* the reader page.
*/
if (full &&
(!read || (len < (commit - read)) ||
cpu_buffer->reader_page == cpu_buffer->commit_page))
goto out_unlock; goto out_unlock;
if (len > (commit - read)) if (len > (commit - read))
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "wip.h" #include "wip.h"
struct rv_monitor rv_wip; static struct rv_monitor rv_wip;
DECLARE_DA_MON_PER_CPU(wip, unsigned char); DECLARE_DA_MON_PER_CPU(wip, unsigned char);
static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip)
...@@ -60,7 +60,7 @@ static void disable_wip(void) ...@@ -60,7 +60,7 @@ static void disable_wip(void)
da_monitor_destroy_wip(); da_monitor_destroy_wip();
} }
struct rv_monitor rv_wip = { static struct rv_monitor rv_wip = {
.name = "wip", .name = "wip",
.description = "wakeup in preemptive per-cpu testing monitor.", .description = "wakeup in preemptive per-cpu testing monitor.",
.enable = enable_wip, .enable = enable_wip,
...@@ -69,13 +69,13 @@ struct rv_monitor rv_wip = { ...@@ -69,13 +69,13 @@ struct rv_monitor rv_wip = {
.enabled = 0, .enabled = 0,
}; };
static int register_wip(void) static int __init register_wip(void)
{ {
rv_register_monitor(&rv_wip); rv_register_monitor(&rv_wip);
return 0; return 0;
} }
static void unregister_wip(void) static void __exit unregister_wip(void)
{ {
rv_unregister_monitor(&rv_wip); rv_unregister_monitor(&rv_wip);
} }
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "wwnr.h" #include "wwnr.h"
struct rv_monitor rv_wwnr; static struct rv_monitor rv_wwnr;
DECLARE_DA_MON_PER_TASK(wwnr, unsigned char); DECLARE_DA_MON_PER_TASK(wwnr, unsigned char);
static void handle_switch(void *data, bool preempt, struct task_struct *p, static void handle_switch(void *data, bool preempt, struct task_struct *p,
...@@ -59,7 +59,7 @@ static void disable_wwnr(void) ...@@ -59,7 +59,7 @@ static void disable_wwnr(void)
da_monitor_destroy_wwnr(); da_monitor_destroy_wwnr();
} }
struct rv_monitor rv_wwnr = { static struct rv_monitor rv_wwnr = {
.name = "wwnr", .name = "wwnr",
.description = "wakeup while not running per-task testing model.", .description = "wakeup while not running per-task testing model.",
.enable = enable_wwnr, .enable = enable_wwnr,
...@@ -68,13 +68,13 @@ struct rv_monitor rv_wwnr = { ...@@ -68,13 +68,13 @@ struct rv_monitor rv_wwnr = {
.enabled = 0, .enabled = 0,
}; };
static int register_wwnr(void) static int __init register_wwnr(void)
{ {
rv_register_monitor(&rv_wwnr); rv_register_monitor(&rv_wwnr);
return 0; return 0;
} }
static void unregister_wwnr(void) static void __exit unregister_wwnr(void)
{ {
rv_unregister_monitor(&rv_wwnr); rv_unregister_monitor(&rv_wwnr);
} }
......
...@@ -1193,12 +1193,14 @@ void *tracing_cond_snapshot_data(struct trace_array *tr) ...@@ -1193,12 +1193,14 @@ void *tracing_cond_snapshot_data(struct trace_array *tr)
{ {
void *cond_data = NULL; void *cond_data = NULL;
local_irq_disable();
arch_spin_lock(&tr->max_lock); arch_spin_lock(&tr->max_lock);
if (tr->cond_snapshot) if (tr->cond_snapshot)
cond_data = tr->cond_snapshot->cond_data; cond_data = tr->cond_snapshot->cond_data;
arch_spin_unlock(&tr->max_lock); arch_spin_unlock(&tr->max_lock);
local_irq_enable();
return cond_data; return cond_data;
} }
...@@ -1334,9 +1336,11 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, ...@@ -1334,9 +1336,11 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
goto fail_unlock; goto fail_unlock;
} }
local_irq_disable();
arch_spin_lock(&tr->max_lock); arch_spin_lock(&tr->max_lock);
tr->cond_snapshot = cond_snapshot; tr->cond_snapshot = cond_snapshot;
arch_spin_unlock(&tr->max_lock); arch_spin_unlock(&tr->max_lock);
local_irq_enable();
mutex_unlock(&trace_types_lock); mutex_unlock(&trace_types_lock);
...@@ -1363,6 +1367,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) ...@@ -1363,6 +1367,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
{ {
int ret = 0; int ret = 0;
local_irq_disable();
arch_spin_lock(&tr->max_lock); arch_spin_lock(&tr->max_lock);
if (!tr->cond_snapshot) if (!tr->cond_snapshot)
...@@ -1373,6 +1378,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) ...@@ -1373,6 +1378,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
} }
arch_spin_unlock(&tr->max_lock); arch_spin_unlock(&tr->max_lock);
local_irq_enable();
return ret; return ret;
} }
...@@ -2200,6 +2206,11 @@ static size_t tgid_map_max; ...@@ -2200,6 +2206,11 @@ static size_t tgid_map_max;
#define SAVED_CMDLINES_DEFAULT 128 #define SAVED_CMDLINES_DEFAULT 128
#define NO_CMDLINE_MAP UINT_MAX #define NO_CMDLINE_MAP UINT_MAX
/*
* Preemption must be disabled before acquiring trace_cmdline_lock.
* The various trace_arrays' max_lock must be acquired in a context
* where interrupt is disabled.
*/
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
struct saved_cmdlines_buffer { struct saved_cmdlines_buffer {
unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
...@@ -2412,7 +2423,11 @@ static int trace_save_cmdline(struct task_struct *tsk) ...@@ -2412,7 +2423,11 @@ static int trace_save_cmdline(struct task_struct *tsk)
* the lock, but we also don't want to spin * the lock, but we also don't want to spin
* nor do we want to disable interrupts, * nor do we want to disable interrupts,
* so if we miss here, then better luck next time. * so if we miss here, then better luck next time.
*
* This is called within the scheduler and wake up, so interrupts
* had better been disabled and run queue lock been held.
*/ */
lockdep_assert_preemption_disabled();
if (!arch_spin_trylock(&trace_cmdline_lock)) if (!arch_spin_trylock(&trace_cmdline_lock))
return 0; return 0;
...@@ -5890,9 +5905,11 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, ...@@ -5890,9 +5905,11 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
char buf[64]; char buf[64];
int r; int r;
preempt_disable();
arch_spin_lock(&trace_cmdline_lock); arch_spin_lock(&trace_cmdline_lock);
r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
arch_spin_unlock(&trace_cmdline_lock); arch_spin_unlock(&trace_cmdline_lock);
preempt_enable();
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
} }
...@@ -5917,10 +5934,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val) ...@@ -5917,10 +5934,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val)
return -ENOMEM; return -ENOMEM;
} }
preempt_disable();
arch_spin_lock(&trace_cmdline_lock); arch_spin_lock(&trace_cmdline_lock);
savedcmd_temp = savedcmd; savedcmd_temp = savedcmd;
savedcmd = s; savedcmd = s;
arch_spin_unlock(&trace_cmdline_lock); arch_spin_unlock(&trace_cmdline_lock);
preempt_enable();
free_saved_cmdlines_buffer(savedcmd_temp); free_saved_cmdlines_buffer(savedcmd_temp);
return 0; return 0;
...@@ -6373,10 +6392,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) ...@@ -6373,10 +6392,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
#ifdef CONFIG_TRACER_SNAPSHOT #ifdef CONFIG_TRACER_SNAPSHOT
if (t->use_max_tr) { if (t->use_max_tr) {
local_irq_disable();
arch_spin_lock(&tr->max_lock); arch_spin_lock(&tr->max_lock);
if (tr->cond_snapshot) if (tr->cond_snapshot)
ret = -EBUSY; ret = -EBUSY;
arch_spin_unlock(&tr->max_lock); arch_spin_unlock(&tr->max_lock);
local_irq_enable();
if (ret) if (ret)
goto out; goto out;
} }
...@@ -6407,12 +6428,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) ...@@ -6407,12 +6428,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
if (tr->current_trace->reset) if (tr->current_trace->reset)
tr->current_trace->reset(tr); tr->current_trace->reset(tr);
#ifdef CONFIG_TRACER_MAX_TRACE
had_max_tr = tr->current_trace->use_max_tr;
/* Current trace needs to be nop_trace before synchronize_rcu */ /* Current trace needs to be nop_trace before synchronize_rcu */
tr->current_trace = &nop_trace; tr->current_trace = &nop_trace;
#ifdef CONFIG_TRACER_MAX_TRACE
had_max_tr = tr->allocated_snapshot;
if (had_max_tr && !t->use_max_tr) { if (had_max_tr && !t->use_max_tr) {
/* /*
* We need to make sure that the update_max_tr sees that * We need to make sure that the update_max_tr sees that
...@@ -6425,11 +6446,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) ...@@ -6425,11 +6446,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
free_snapshot(tr); free_snapshot(tr);
} }
if (t->use_max_tr && !had_max_tr) { if (t->use_max_tr && !tr->allocated_snapshot) {
ret = tracing_alloc_snapshot_instance(tr); ret = tracing_alloc_snapshot_instance(tr);
if (ret < 0) if (ret < 0)
goto out; goto out;
} }
#else
tr->current_trace = &nop_trace;
#endif #endif
if (t->init) { if (t->init) {
...@@ -7436,10 +7459,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, ...@@ -7436,10 +7459,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
goto out; goto out;
} }
local_irq_disable();
arch_spin_lock(&tr->max_lock); arch_spin_lock(&tr->max_lock);
if (tr->cond_snapshot) if (tr->cond_snapshot)
ret = -EBUSY; ret = -EBUSY;
arch_spin_unlock(&tr->max_lock); arch_spin_unlock(&tr->max_lock);
local_irq_enable();
if (ret) if (ret)
goto out; goto out;
...@@ -8137,6 +8162,12 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) ...@@ -8137,6 +8162,12 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
__trace_array_put(iter->tr); __trace_array_put(iter->tr);
iter->wait_index++;
/* Make sure the waiters see the new wait_index */
smp_wmb();
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
if (info->spare) if (info->spare)
ring_buffer_free_read_page(iter->array_buffer->buffer, ring_buffer_free_read_page(iter->array_buffer->buffer,
info->spare_cpu, info->spare); info->spare_cpu, info->spare);
...@@ -8290,6 +8321,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, ...@@ -8290,6 +8321,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
/* did we read anything? */ /* did we read anything? */
if (!spd.nr_pages) { if (!spd.nr_pages) {
long wait_index;
if (ret) if (ret)
goto out; goto out;
...@@ -8297,10 +8330,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, ...@@ -8297,10 +8330,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
goto out; goto out;
wait_index = READ_ONCE(iter->wait_index);
ret = wait_on_pipe(iter, iter->tr->buffer_percent); ret = wait_on_pipe(iter, iter->tr->buffer_percent);
if (ret) if (ret)
goto out; goto out;
/* No need to wait after waking up when tracing is off */
if (!tracer_tracing_is_on(iter->tr))
goto out;
/* Make sure we see the new wait_index */
smp_rmb();
if (wait_index != iter->wait_index)
goto out;
goto again; goto again;
} }
...@@ -8311,12 +8355,34 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, ...@@ -8311,12 +8355,34 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
return ret; return ret;
} }
/* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct ftrace_buffer_info *info = file->private_data;
struct trace_iterator *iter = &info->iter;
if (cmd)
return -ENOIOCTLCMD;
mutex_lock(&trace_types_lock);
iter->wait_index++;
/* Make sure the waiters see the new wait_index */
smp_wmb();
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
mutex_unlock(&trace_types_lock);
return 0;
}
static const struct file_operations tracing_buffers_fops = { static const struct file_operations tracing_buffers_fops = {
.open = tracing_buffers_open, .open = tracing_buffers_open,
.read = tracing_buffers_read, .read = tracing_buffers_read,
.poll = tracing_buffers_poll, .poll = tracing_buffers_poll,
.release = tracing_buffers_release, .release = tracing_buffers_release,
.splice_read = tracing_buffers_splice_read, .splice_read = tracing_buffers_splice_read,
.unlocked_ioctl = tracing_buffers_ioctl,
.llseek = no_llseek, .llseek = no_llseek,
}; };
...@@ -9005,6 +9071,8 @@ rb_simple_write(struct file *filp, const char __user *ubuf, ...@@ -9005,6 +9071,8 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
tracer_tracing_off(tr); tracer_tracing_off(tr);
if (tr->current_trace->stop) if (tr->current_trace->stop)
tr->current_trace->stop(tr); tr->current_trace->stop(tr);
/* Wake up any waiters */
ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
} }
mutex_unlock(&trace_types_lock); mutex_unlock(&trace_types_lock);
} }
...@@ -10091,7 +10159,7 @@ __init static int tracer_alloc_buffers(void) ...@@ -10091,7 +10159,7 @@ __init static int tracer_alloc_buffers(void)
* buffer. The memory will be removed once the "instance" is removed. * buffer. The memory will be removed once the "instance" is removed.
*/ */
ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
"trace/RB:preapre", trace_rb_cpu_prepare, "trace/RB:prepare", trace_rb_cpu_prepare,
NULL); NULL);
if (ret < 0) if (ret < 0)
goto out_free_cpumask; goto out_free_cpumask;
......
...@@ -1435,8 +1435,6 @@ event_trigger_unlock_commit(struct trace_event_file *file, ...@@ -1435,8 +1435,6 @@ event_trigger_unlock_commit(struct trace_event_file *file,
struct filter_pred; struct filter_pred;
struct regex; struct regex;
typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
typedef int (*regex_match_func)(char *str, struct regex *r, int len); typedef int (*regex_match_func)(char *str, struct regex *r, int len);
enum regex_type { enum regex_type {
...@@ -1455,17 +1453,6 @@ struct regex { ...@@ -1455,17 +1453,6 @@ struct regex {
regex_match_func match; regex_match_func match;
}; };
struct filter_pred {
filter_pred_fn_t fn;
u64 val;
struct regex regex;
unsigned short *ops;
struct ftrace_event_field *field;
int offset;
int not;
int op;
};
static inline bool is_string_field(struct ftrace_event_field *field) static inline bool is_string_field(struct ftrace_event_field *field)
{ {
return field->filter_type == FILTER_DYN_STRING || return field->filter_type == FILTER_DYN_STRING ||
......
...@@ -51,7 +51,7 @@ static void trace_do_benchmark(void) ...@@ -51,7 +51,7 @@ static void trace_do_benchmark(void)
local_irq_disable(); local_irq_disable();
start = trace_clock_local(); start = trace_clock_local();
trace_benchmark_event(bm_str); trace_benchmark_event(bm_str, bm_last);
stop = trace_clock_local(); stop = trace_clock_local();
local_irq_enable(); local_irq_enable();
......
...@@ -14,19 +14,21 @@ extern void trace_benchmark_unreg(void); ...@@ -14,19 +14,21 @@ extern void trace_benchmark_unreg(void);
TRACE_EVENT_FN(benchmark_event, TRACE_EVENT_FN(benchmark_event,
TP_PROTO(const char *str), TP_PROTO(const char *str, u64 delta),
TP_ARGS(str), TP_ARGS(str, delta),
TP_STRUCT__entry( TP_STRUCT__entry(
__array( char, str, BENCHMARK_EVENT_STRLEN ) __array( char, str, BENCHMARK_EVENT_STRLEN )
__field( u64, delta)
), ),
TP_fast_assign( TP_fast_assign(
memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN); memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN);
__entry->delta = delta;
), ),
TP_printk("%s", __entry->str), TP_printk("%s delta=%llu", __entry->str, __entry->delta),
trace_benchmark_reg, trace_benchmark_unreg trace_benchmark_reg, trace_benchmark_unreg
); );
......
...@@ -26,6 +26,9 @@ struct trace_eprobe { ...@@ -26,6 +26,9 @@ struct trace_eprobe {
/* tracepoint event */ /* tracepoint event */
const char *event_name; const char *event_name;
/* filter string for the tracepoint */
char *filter_str;
struct trace_event_call *event; struct trace_event_call *event;
struct dyn_event devent; struct dyn_event devent;
...@@ -664,14 +667,15 @@ static struct event_trigger_data * ...@@ -664,14 +667,15 @@ static struct event_trigger_data *
new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file) new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
{ {
struct event_trigger_data *trigger; struct event_trigger_data *trigger;
struct event_filter *filter = NULL;
struct eprobe_data *edata; struct eprobe_data *edata;
int ret;
edata = kzalloc(sizeof(*edata), GFP_KERNEL); edata = kzalloc(sizeof(*edata), GFP_KERNEL);
trigger = kzalloc(sizeof(*trigger), GFP_KERNEL); trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
if (!trigger || !edata) { if (!trigger || !edata) {
kfree(edata); ret = -ENOMEM;
kfree(trigger); goto error;
return ERR_PTR(-ENOMEM);
} }
trigger->flags = EVENT_TRIGGER_FL_PROBE; trigger->flags = EVENT_TRIGGER_FL_PROBE;
...@@ -686,13 +690,25 @@ new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file) ...@@ -686,13 +690,25 @@ new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
trigger->cmd_ops = &event_trigger_cmd; trigger->cmd_ops = &event_trigger_cmd;
INIT_LIST_HEAD(&trigger->list); INIT_LIST_HEAD(&trigger->list);
RCU_INIT_POINTER(trigger->filter, NULL);
if (ep->filter_str) {
ret = create_event_filter(file->tr, file->event_call,
ep->filter_str, false, &filter);
if (ret)
goto error;
}
RCU_INIT_POINTER(trigger->filter, filter);
edata->file = file; edata->file = file;
edata->ep = ep; edata->ep = ep;
trigger->private_data = edata; trigger->private_data = edata;
return trigger; return trigger;
error:
free_event_filter(filter);
kfree(edata);
kfree(trigger);
return ERR_PTR(ret);
} }
static int enable_eprobe(struct trace_eprobe *ep, static int enable_eprobe(struct trace_eprobe *ep,
...@@ -726,6 +742,7 @@ static int disable_eprobe(struct trace_eprobe *ep, ...@@ -726,6 +742,7 @@ static int disable_eprobe(struct trace_eprobe *ep,
{ {
struct event_trigger_data *trigger = NULL, *iter; struct event_trigger_data *trigger = NULL, *iter;
struct trace_event_file *file; struct trace_event_file *file;
struct event_filter *filter;
struct eprobe_data *edata; struct eprobe_data *edata;
file = find_event_file(tr, ep->event_system, ep->event_name); file = find_event_file(tr, ep->event_system, ep->event_name);
...@@ -752,6 +769,10 @@ static int disable_eprobe(struct trace_eprobe *ep, ...@@ -752,6 +769,10 @@ static int disable_eprobe(struct trace_eprobe *ep,
/* Make sure nothing is using the edata or trigger */ /* Make sure nothing is using the edata or trigger */
tracepoint_synchronize_unregister(); tracepoint_synchronize_unregister();
filter = rcu_access_pointer(trigger->filter);
if (filter)
free_event_filter(filter);
kfree(edata); kfree(edata);
kfree(trigger); kfree(trigger);
...@@ -927,12 +948,62 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[ ...@@ -927,12 +948,62 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[
return ret; return ret;
} }
static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[])
{
struct event_filter *dummy;
int i, ret, len = 0;
char *p;
if (argc == 0) {
trace_probe_log_err(0, NO_EP_FILTER);
return -EINVAL;
}
/* Recover the filter string */
for (i = 0; i < argc; i++)
len += strlen(argv[i]) + 1;
ep->filter_str = kzalloc(len, GFP_KERNEL);
if (!ep->filter_str)
return -ENOMEM;
p = ep->filter_str;
for (i = 0; i < argc; i++) {
ret = snprintf(p, len, "%s ", argv[i]);
if (ret < 0)
goto error;
if (ret > len) {
ret = -E2BIG;
goto error;
}
p += ret;
len -= ret;
}
p[-1] = '\0';
/*
* Ensure the filter string can be parsed correctly. Note, this
* filter string is for the original event, not for the eprobe.
*/
ret = create_event_filter(top_trace_array(), ep->event, ep->filter_str,
true, &dummy);
free_event_filter(dummy);
if (ret)
goto error;
return 0;
error:
kfree(ep->filter_str);
ep->filter_str = NULL;
return ret;
}
static int __trace_eprobe_create(int argc, const char *argv[]) static int __trace_eprobe_create(int argc, const char *argv[])
{ {
/* /*
* Argument syntax: * Argument syntax:
* e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] * e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] [if FILTER]
* Fetch args: * Fetch args (no space):
* <name>=$<field>[:TYPE] * <name>=$<field>[:TYPE]
*/ */
const char *event = NULL, *group = EPROBE_EVENT_SYSTEM; const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
...@@ -942,8 +1013,8 @@ static int __trace_eprobe_create(int argc, const char *argv[]) ...@@ -942,8 +1013,8 @@ static int __trace_eprobe_create(int argc, const char *argv[])
char buf1[MAX_EVENT_NAME_LEN]; char buf1[MAX_EVENT_NAME_LEN];
char buf2[MAX_EVENT_NAME_LEN]; char buf2[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN]; char gbuf[MAX_EVENT_NAME_LEN];
int ret = 0; int ret = 0, filter_idx = 0;
int i; int i, filter_cnt;
if (argc < 2 || argv[0][0] != 'e') if (argc < 2 || argv[0][0] != 'e')
return -ECANCELED; return -ECANCELED;
...@@ -968,11 +1039,19 @@ static int __trace_eprobe_create(int argc, const char *argv[]) ...@@ -968,11 +1039,19 @@ static int __trace_eprobe_create(int argc, const char *argv[])
} }
if (!event) { if (!event) {
strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN); strscpy(buf1, sys_event, MAX_EVENT_NAME_LEN);
sanitize_event_name(buf1);
event = buf1; event = buf1;
} }
for (i = 2; i < argc; i++) {
if (!strcmp(argv[i], "if")) {
filter_idx = i + 1;
filter_cnt = argc - filter_idx;
argc = i;
break;
}
}
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
event_call = find_and_get_event(sys_name, sys_event); event_call = find_and_get_event(sys_name, sys_event);
ep = alloc_event_probe(group, event, event_call, argc - 2); ep = alloc_event_probe(group, event, event_call, argc - 2);
...@@ -988,6 +1067,14 @@ static int __trace_eprobe_create(int argc, const char *argv[]) ...@@ -988,6 +1067,14 @@ static int __trace_eprobe_create(int argc, const char *argv[])
goto error; goto error;
} }
if (filter_idx) {
trace_probe_log_set_index(filter_idx);
ret = trace_eprobe_parse_filter(ep, filter_cnt, argv + filter_idx);
if (ret)
goto parse_error;
} else
ep->filter_str = NULL;
argc -= 2; argv += 2; argc -= 2; argv += 2;
/* parse arguments */ /* parse arguments */
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -1786,8 +1786,9 @@ static int start_per_cpu_kthreads(void) ...@@ -1786,8 +1786,9 @@ static int start_per_cpu_kthreads(void)
for_each_cpu(cpu, current_mask) { for_each_cpu(cpu, current_mask) {
retval = start_kthread(cpu); retval = start_kthread(cpu);
if (retval) { if (retval) {
cpus_read_unlock();
stop_per_cpu_kthreads(); stop_per_cpu_kthreads();
break; return retval;
} }
} }
......
...@@ -445,7 +445,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, ...@@ -445,7 +445,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(SAME_PROBE, "There is already the exact same probe event"),\ C(SAME_PROBE, "There is already the exact same probe event"),\
C(NO_EVENT_INFO, "This requires both group and event name to attach"),\ C(NO_EVENT_INFO, "This requires both group and event name to attach"),\
C(BAD_ATTACH_EVENT, "Attached event does not exist"),\ C(BAD_ATTACH_EVENT, "Attached event does not exist"),\
C(BAD_ATTACH_ARG, "Attached event does not have this field"), C(BAD_ATTACH_ARG, "Attached event does not have this field"),\
C(NO_EP_FILTER, "No filter rule after 'if'"),
#undef C #undef C
#define C(a, b) TP_ERR_##a #define C(a, b) TP_ERR_##a
......
...@@ -961,7 +961,7 @@ create_sort_entry(void *key, struct tracing_map_elt *elt) ...@@ -961,7 +961,7 @@ create_sort_entry(void *key, struct tracing_map_elt *elt)
static void detect_dups(struct tracing_map_sort_entry **sort_entries, static void detect_dups(struct tracing_map_sort_entry **sort_entries,
int n_entries, unsigned int key_size) int n_entries, unsigned int key_size)
{ {
unsigned int dups = 0, total_dups = 0; unsigned int total_dups = 0;
int i; int i;
void *key; void *key;
...@@ -974,11 +974,10 @@ static void detect_dups(struct tracing_map_sort_entry **sort_entries, ...@@ -974,11 +974,10 @@ static void detect_dups(struct tracing_map_sort_entry **sort_entries,
key = sort_entries[0]->key; key = sort_entries[0]->key;
for (i = 1; i < n_entries; i++) { for (i = 1; i < n_entries; i++) {
if (!memcmp(sort_entries[i]->key, key, key_size)) { if (!memcmp(sort_entries[i]->key, key, key_size)) {
dups++; total_dups++; total_dups++;
continue; continue;
} }
key = sort_entries[i]->key; key = sort_entries[i]->key;
dups = 0;
} }
WARN_ONCE(total_dups > 0, WARN_ONCE(total_dups > 0,
......
...@@ -640,7 +640,6 @@ static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv) ...@@ -640,7 +640,6 @@ static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
static int tracepoint_module_coming(struct module *mod) static int tracepoint_module_coming(struct module *mod)
{ {
struct tp_module *tp_mod; struct tp_module *tp_mod;
int ret = 0;
if (!mod->num_tracepoints) if (!mod->num_tracepoints)
return 0; return 0;
...@@ -652,19 +651,18 @@ static int tracepoint_module_coming(struct module *mod) ...@@ -652,19 +651,18 @@ static int tracepoint_module_coming(struct module *mod)
*/ */
if (trace_module_has_bad_taint(mod)) if (trace_module_has_bad_taint(mod))
return 0; return 0;
mutex_lock(&tracepoint_module_list_mutex);
tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL);
if (!tp_mod) { if (!tp_mod)
ret = -ENOMEM; return -ENOMEM;
goto end;
}
tp_mod->mod = mod; tp_mod->mod = mod;
mutex_lock(&tracepoint_module_list_mutex);
list_add_tail(&tp_mod->list, &tracepoint_module_list); list_add_tail(&tp_mod->list, &tracepoint_module_list);
blocking_notifier_call_chain(&tracepoint_notify_list, blocking_notifier_call_chain(&tracepoint_notify_list,
MODULE_STATE_COMING, tp_mod); MODULE_STATE_COMING, tp_mod);
end:
mutex_unlock(&tracepoint_module_list_mutex); mutex_unlock(&tracepoint_module_list_mutex);
return ret; return 0;
} }
static void tracepoint_module_going(struct module *mod) static void tracepoint_module_going(struct module *mod)
......
...@@ -12,13 +12,21 @@ ...@@ -12,13 +12,21 @@
#include <fcntl.h> #include <fcntl.h>
#include <stdio.h> #include <stdio.h>
#include <unistd.h> #include <unistd.h>
#include <asm/bitsperlong.h>
#include <endian.h>
#include <linux/user_events.h> #include <linux/user_events.h>
#if __BITS_PER_LONG == 64
#define endian_swap(x) htole64(x)
#else
#define endian_swap(x) htole32(x)
#endif
/* Assumes debugfs is mounted */ /* Assumes debugfs is mounted */
const char *data_file = "/sys/kernel/debug/tracing/user_events_data"; const char *data_file = "/sys/kernel/debug/tracing/user_events_data";
const char *status_file = "/sys/kernel/debug/tracing/user_events_status"; const char *status_file = "/sys/kernel/debug/tracing/user_events_status";
static int event_status(char **status) static int event_status(long **status)
{ {
int fd = open(status_file, O_RDONLY); int fd = open(status_file, O_RDONLY);
...@@ -33,7 +41,8 @@ static int event_status(char **status) ...@@ -33,7 +41,8 @@ static int event_status(char **status)
return 0; return 0;
} }
static int event_reg(int fd, const char *command, int *status, int *write) static int event_reg(int fd, const char *command, long *index, long *mask,
int *write)
{ {
struct user_reg reg = {0}; struct user_reg reg = {0};
...@@ -43,7 +52,8 @@ static int event_reg(int fd, const char *command, int *status, int *write) ...@@ -43,7 +52,8 @@ static int event_reg(int fd, const char *command, int *status, int *write)
if (ioctl(fd, DIAG_IOCSREG, &reg) == -1) if (ioctl(fd, DIAG_IOCSREG, &reg) == -1)
return -1; return -1;
*status = reg.status_index; *index = reg.status_bit / __BITS_PER_LONG;
*mask = endian_swap(1L << (reg.status_bit % __BITS_PER_LONG));
*write = reg.write_index; *write = reg.write_index;
return 0; return 0;
...@@ -51,8 +61,9 @@ static int event_reg(int fd, const char *command, int *status, int *write) ...@@ -51,8 +61,9 @@ static int event_reg(int fd, const char *command, int *status, int *write)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
int data_fd, status, write; int data_fd, write;
char *status_page; long index, mask;
long *status_page;
struct iovec io[2]; struct iovec io[2];
__u32 count = 0; __u32 count = 0;
...@@ -61,7 +72,7 @@ int main(int argc, char **argv) ...@@ -61,7 +72,7 @@ int main(int argc, char **argv)
data_fd = open(data_file, O_RDWR); data_fd = open(data_file, O_RDWR);
if (event_reg(data_fd, "test u32 count", &status, &write) == -1) if (event_reg(data_fd, "test u32 count", &index, &mask, &write) == -1)
return errno; return errno;
/* Setup iovec */ /* Setup iovec */
...@@ -75,7 +86,7 @@ int main(int argc, char **argv) ...@@ -75,7 +86,7 @@ int main(int argc, char **argv)
getchar(); getchar();
/* Check if anyone is listening */ /* Check if anyone is listening */
if (status_page[status]) { if (status_page[index] & mask) {
/* Yep, trace out our data */ /* Yep, trace out our data */
writev(data_fd, (const struct iovec *)io, 2); writev(data_fd, (const struct iovec *)io, 2);
......
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Event probe event parser error log check
# requires: dynamic_events events/syscalls/sys_enter_openat "<attached-group>.<attached-event> [<args>]":README error_log
check_error() { # command-with-error-pos-by-^
ftrace_errlog_check 'event_probe' "$1" 'dynamic_events'
}
check_error 'e ^a.' # NO_EVENT_INFO
check_error 'e ^.b' # NO_EVENT_INFO
check_error 'e ^a.b' # BAD_ATTACH_EVENT
check_error 'e syscalls/sys_enter_openat ^foo' # BAD_ATTACH_ARG
check_error 'e:^/bar syscalls/sys_enter_openat' # NO_GROUP_NAME
check_error 'e:^12345678901234567890123456789012345678901234567890123456789012345/bar syscalls/sys_enter_openat' # GROUP_TOO_LONG
check_error 'e:^foo.1/bar syscalls/sys_enter_openat' # BAD_GROUP_NAME
check_error 'e:^ syscalls/sys_enter_openat' # NO_EVENT_NAME
check_error 'e:foo/^12345678901234567890123456789012345678901234567890123456789012345 syscalls/sys_enter_openat' # EVENT_TOO_LONG
check_error 'e:foo/^bar.1 syscalls/sys_enter_openat' # BAD_EVENT_NAME
check_error 'e:foo/bar syscalls/sys_enter_openat arg=^dfd' # BAD_FETCH_ARG
check_error 'e:foo/bar syscalls/sys_enter_openat ^arg=$foo' # BAD_ATTACH_ARG
check_error 'e:foo/bar syscalls/sys_enter_openat if ^' # NO_EP_FILTER
exit 0
...@@ -22,6 +22,11 @@ const char *enable_file = "/sys/kernel/debug/tracing/events/user_events/__test_e ...@@ -22,6 +22,11 @@ const char *enable_file = "/sys/kernel/debug/tracing/events/user_events/__test_e
const char *trace_file = "/sys/kernel/debug/tracing/trace"; const char *trace_file = "/sys/kernel/debug/tracing/trace";
const char *fmt_file = "/sys/kernel/debug/tracing/events/user_events/__test_event/format"; const char *fmt_file = "/sys/kernel/debug/tracing/events/user_events/__test_event/format";
static inline int status_check(char *status_page, int status_bit)
{
return status_page[status_bit >> 3] & (1 << (status_bit & 7));
}
static int trace_bytes(void) static int trace_bytes(void)
{ {
int fd = open(trace_file, O_RDONLY); int fd = open(trace_file, O_RDONLY);
...@@ -197,12 +202,12 @@ TEST_F(user, register_events) { ...@@ -197,12 +202,12 @@ TEST_F(user, register_events) {
/* Register should work */ /* Register should work */
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg)); ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index); ASSERT_EQ(0, reg.write_index);
ASSERT_NE(0, reg.status_index); ASSERT_NE(0, reg.status_bit);
/* Multiple registers should result in same index */ /* Multiple registers should result in same index */
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg)); ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index); ASSERT_EQ(0, reg.write_index);
ASSERT_NE(0, reg.status_index); ASSERT_NE(0, reg.status_bit);
/* Ensure disabled */ /* Ensure disabled */
self->enable_fd = open(enable_file, O_RDWR); self->enable_fd = open(enable_file, O_RDWR);
...@@ -212,15 +217,15 @@ TEST_F(user, register_events) { ...@@ -212,15 +217,15 @@ TEST_F(user, register_events) {
/* MMAP should work and be zero'd */ /* MMAP should work and be zero'd */
ASSERT_NE(MAP_FAILED, status_page); ASSERT_NE(MAP_FAILED, status_page);
ASSERT_NE(NULL, status_page); ASSERT_NE(NULL, status_page);
ASSERT_EQ(0, status_page[reg.status_index]); ASSERT_EQ(0, status_check(status_page, reg.status_bit));
/* Enable event and ensure bits updated in status */ /* Enable event and ensure bits updated in status */
ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
ASSERT_EQ(EVENT_STATUS_FTRACE, status_page[reg.status_index]); ASSERT_NE(0, status_check(status_page, reg.status_bit));
/* Disable event and ensure bits updated in status */ /* Disable event and ensure bits updated in status */
ASSERT_NE(-1, write(self->enable_fd, "0", sizeof("0"))) ASSERT_NE(-1, write(self->enable_fd, "0", sizeof("0")))
ASSERT_EQ(0, status_page[reg.status_index]); ASSERT_EQ(0, status_check(status_page, reg.status_bit));
/* File still open should return -EBUSY for delete */ /* File still open should return -EBUSY for delete */
ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event")); ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event"));
...@@ -240,6 +245,8 @@ TEST_F(user, write_events) { ...@@ -240,6 +245,8 @@ TEST_F(user, write_events) {
struct iovec io[3]; struct iovec io[3];
__u32 field1, field2; __u32 field1, field2;
int before = 0, after = 0; int before = 0, after = 0;
int page_size = sysconf(_SC_PAGESIZE);
char *status_page;
reg.size = sizeof(reg); reg.size = sizeof(reg);
reg.name_args = (__u64)"__test_event u32 field1; u32 field2"; reg.name_args = (__u64)"__test_event u32 field1; u32 field2";
...@@ -254,10 +261,18 @@ TEST_F(user, write_events) { ...@@ -254,10 +261,18 @@ TEST_F(user, write_events) {
io[2].iov_base = &field2; io[2].iov_base = &field2;
io[2].iov_len = sizeof(field2); io[2].iov_len = sizeof(field2);
status_page = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
self->status_fd, 0);
/* Register should work */ /* Register should work */
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg)); ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index); ASSERT_EQ(0, reg.write_index);
ASSERT_NE(0, reg.status_index); ASSERT_NE(0, reg.status_bit);
/* MMAP should work and be zero'd */
ASSERT_NE(MAP_FAILED, status_page);
ASSERT_NE(NULL, status_page);
ASSERT_EQ(0, status_check(status_page, reg.status_bit));
/* Write should fail on invalid slot with ENOENT */ /* Write should fail on invalid slot with ENOENT */
io[0].iov_base = &field2; io[0].iov_base = &field2;
...@@ -271,6 +286,9 @@ TEST_F(user, write_events) { ...@@ -271,6 +286,9 @@ TEST_F(user, write_events) {
self->enable_fd = open(enable_file, O_RDWR); self->enable_fd = open(enable_file, O_RDWR);
ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
/* Event should now be enabled */
ASSERT_NE(0, status_check(status_page, reg.status_bit));
/* Write should make it out to ftrace buffers */ /* Write should make it out to ftrace buffers */
before = trace_bytes(); before = trace_bytes();
ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 3)); ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 3));
...@@ -298,7 +316,7 @@ TEST_F(user, write_fault) { ...@@ -298,7 +316,7 @@ TEST_F(user, write_fault) {
/* Register should work */ /* Register should work */
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg)); ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index); ASSERT_EQ(0, reg.write_index);
ASSERT_NE(0, reg.status_index); ASSERT_NE(0, reg.status_bit);
/* Write should work normally */ /* Write should work normally */
ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2)); ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2));
...@@ -315,6 +333,11 @@ TEST_F(user, write_validator) { ...@@ -315,6 +333,11 @@ TEST_F(user, write_validator) {
int loc, bytes; int loc, bytes;
char data[8]; char data[8];
int before = 0, after = 0; int before = 0, after = 0;
int page_size = sysconf(_SC_PAGESIZE);
char *status_page;
status_page = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
self->status_fd, 0);
reg.size = sizeof(reg); reg.size = sizeof(reg);
reg.name_args = (__u64)"__test_event __rel_loc char[] data"; reg.name_args = (__u64)"__test_event __rel_loc char[] data";
...@@ -322,7 +345,12 @@ TEST_F(user, write_validator) { ...@@ -322,7 +345,12 @@ TEST_F(user, write_validator) {
/* Register should work */ /* Register should work */
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg)); ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index); ASSERT_EQ(0, reg.write_index);
ASSERT_NE(0, reg.status_index); ASSERT_NE(0, reg.status_bit);
/* MMAP should work and be zero'd */
ASSERT_NE(MAP_FAILED, status_page);
ASSERT_NE(NULL, status_page);
ASSERT_EQ(0, status_check(status_page, reg.status_bit));
io[0].iov_base = &reg.write_index; io[0].iov_base = &reg.write_index;
io[0].iov_len = sizeof(reg.write_index); io[0].iov_len = sizeof(reg.write_index);
...@@ -340,6 +368,9 @@ TEST_F(user, write_validator) { ...@@ -340,6 +368,9 @@ TEST_F(user, write_validator) {
self->enable_fd = open(enable_file, O_RDWR); self->enable_fd = open(enable_file, O_RDWR);
ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
/* Event should now be enabled */
ASSERT_NE(0, status_check(status_page, reg.status_bit));
/* Full in-bounds write should work */ /* Full in-bounds write should work */
before = trace_bytes(); before = trace_bytes();
loc = DYN_LOC(0, bytes); loc = DYN_LOC(0, bytes);
......
...@@ -35,6 +35,11 @@ static long perf_event_open(struct perf_event_attr *pe, pid_t pid, ...@@ -35,6 +35,11 @@ static long perf_event_open(struct perf_event_attr *pe, pid_t pid,
return syscall(__NR_perf_event_open, pe, pid, cpu, group_fd, flags); return syscall(__NR_perf_event_open, pe, pid, cpu, group_fd, flags);
} }
static inline int status_check(char *status_page, int status_bit)
{
return status_page[status_bit >> 3] & (1 << (status_bit & 7));
}
static int get_id(void) static int get_id(void)
{ {
FILE *fp = fopen(id_file, "r"); FILE *fp = fopen(id_file, "r");
...@@ -120,8 +125,8 @@ TEST_F(user, perf_write) { ...@@ -120,8 +125,8 @@ TEST_F(user, perf_write) {
/* Register should work */ /* Register should work */
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg)); ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index); ASSERT_EQ(0, reg.write_index);
ASSERT_NE(0, reg.status_index); ASSERT_NE(0, reg.status_bit);
ASSERT_EQ(0, status_page[reg.status_index]); ASSERT_EQ(0, status_check(status_page, reg.status_bit));
/* Id should be there */ /* Id should be there */
id = get_id(); id = get_id();
...@@ -144,7 +149,7 @@ TEST_F(user, perf_write) { ...@@ -144,7 +149,7 @@ TEST_F(user, perf_write) {
ASSERT_NE(MAP_FAILED, perf_page); ASSERT_NE(MAP_FAILED, perf_page);
/* Status should be updated */ /* Status should be updated */
ASSERT_EQ(EVENT_STATUS_PERF, status_page[reg.status_index]); ASSERT_NE(0, status_check(status_page, reg.status_bit));
event.index = reg.write_index; event.index = reg.write_index;
event.field1 = 0xc001; event.field1 = 0xc001;
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* *
* The rv monitor reference is needed for the monitor declaration. * The rv monitor reference is needed for the monitor declaration.
*/ */
struct rv_monitor rv_MODEL_NAME; static struct rv_monitor rv_MODEL_NAME;
DECLARE_DA_MON_GLOBAL(MODEL_NAME, MIN_TYPE); DECLARE_DA_MON_GLOBAL(MODEL_NAME, MIN_TYPE);
/* /*
...@@ -63,7 +63,7 @@ TRACEPOINT_DETACH ...@@ -63,7 +63,7 @@ TRACEPOINT_DETACH
/* /*
* This is the monitor register section. * This is the monitor register section.
*/ */
struct rv_monitor rv_MODEL_NAME = { static struct rv_monitor rv_MODEL_NAME = {
.name = "MODEL_NAME", .name = "MODEL_NAME",
.description = "auto-generated MODEL_NAME", .description = "auto-generated MODEL_NAME",
.enable = enable_MODEL_NAME, .enable = enable_MODEL_NAME,
...@@ -72,13 +72,13 @@ struct rv_monitor rv_MODEL_NAME = { ...@@ -72,13 +72,13 @@ struct rv_monitor rv_MODEL_NAME = {
.enabled = 0, .enabled = 0,
}; };
static int register_MODEL_NAME(void) static int __init register_MODEL_NAME(void)
{ {
rv_register_monitor(&rv_MODEL_NAME); rv_register_monitor(&rv_MODEL_NAME);
return 0; return 0;
} }
static void unregister_MODEL_NAME(void) static void __exit unregister_MODEL_NAME(void)
{ {
rv_unregister_monitor(&rv_MODEL_NAME); rv_unregister_monitor(&rv_MODEL_NAME);
} }
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* *
* The rv monitor reference is needed for the monitor declaration. * The rv monitor reference is needed for the monitor declaration.
*/ */
struct rv_monitor rv_MODEL_NAME; static struct rv_monitor rv_MODEL_NAME;
DECLARE_DA_MON_PER_CPU(MODEL_NAME, MIN_TYPE); DECLARE_DA_MON_PER_CPU(MODEL_NAME, MIN_TYPE);
/* /*
...@@ -63,7 +63,7 @@ TRACEPOINT_DETACH ...@@ -63,7 +63,7 @@ TRACEPOINT_DETACH
/* /*
* This is the monitor register section. * This is the monitor register section.
*/ */
struct rv_monitor rv_MODEL_NAME = { static struct rv_monitor rv_MODEL_NAME = {
.name = "MODEL_NAME", .name = "MODEL_NAME",
.description = "auto-generated MODEL_NAME", .description = "auto-generated MODEL_NAME",
.enable = enable_MODEL_NAME, .enable = enable_MODEL_NAME,
...@@ -72,13 +72,13 @@ struct rv_monitor rv_MODEL_NAME = { ...@@ -72,13 +72,13 @@ struct rv_monitor rv_MODEL_NAME = {
.enabled = 0, .enabled = 0,
}; };
static int register_MODEL_NAME(void) static int __init register_MODEL_NAME(void)
{ {
rv_register_monitor(&rv_MODEL_NAME); rv_register_monitor(&rv_MODEL_NAME);
return 0; return 0;
} }
static void unregister_MODEL_NAME(void) static void __exit unregister_MODEL_NAME(void)
{ {
rv_unregister_monitor(&rv_MODEL_NAME); rv_unregister_monitor(&rv_MODEL_NAME);
} }
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* *
* The rv monitor reference is needed for the monitor declaration. * The rv monitor reference is needed for the monitor declaration.
*/ */
struct rv_monitor rv_MODEL_NAME; static struct rv_monitor rv_MODEL_NAME;
DECLARE_DA_MON_PER_TASK(MODEL_NAME, MIN_TYPE); DECLARE_DA_MON_PER_TASK(MODEL_NAME, MIN_TYPE);
/* /*
...@@ -63,7 +63,7 @@ TRACEPOINT_DETACH ...@@ -63,7 +63,7 @@ TRACEPOINT_DETACH
/* /*
* This is the monitor register section. * This is the monitor register section.
*/ */
struct rv_monitor rv_MODEL_NAME = { static struct rv_monitor rv_MODEL_NAME = {
.name = "MODEL_NAME", .name = "MODEL_NAME",
.description = "auto-generated MODEL_NAME", .description = "auto-generated MODEL_NAME",
.enable = enable_MODEL_NAME, .enable = enable_MODEL_NAME,
...@@ -72,13 +72,13 @@ struct rv_monitor rv_MODEL_NAME = { ...@@ -72,13 +72,13 @@ struct rv_monitor rv_MODEL_NAME = {
.enabled = 0, .enabled = 0,
}; };
static int register_MODEL_NAME(void) static int __init register_MODEL_NAME(void)
{ {
rv_register_monitor(&rv_MODEL_NAME); rv_register_monitor(&rv_MODEL_NAME);
return 0; return 0;
} }
static void unregister_MODEL_NAME(void) static void __exit unregister_MODEL_NAME(void)
{ {
rv_unregister_monitor(&rv_MODEL_NAME); rv_unregister_monitor(&rv_MODEL_NAME);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment