Commit 4f9e7fab authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'trace-v6.5-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing fixes from Steven Rostedt:

 - Fix ring buffer being permanently disabled due to missed
   record_disabled()

   Changing the trace cpu mask will disable the ring buffers for the
   CPUs no longer in the mask. But it fails to update the snapshot
   buffer. If a snapshot takes place, the accounting for the ring buffer
   being disabled is corrupted and this can lead to the ring buffer
   being permanently disabled.

 - Add test case for snapshot and cpu mask working together

 - Fix memleak by the function graph tracer not getting closed properly.

   The iterator is used to read the ring buffer. When it opens, it calls
   the open function of a tracer, and when it is closed, it calls the
   close iteration. While a trace is being read, it is still possible to
   change the tracer.

   If this happens between the function graph tracer and the wakeup
   tracer (which uses function graph tracing), the tracers are not
   closed properly during when the iterator sees the switch, and the
   wakeup function did not initialize its private pointer to NULL, which
   is used to know if the function graph tracer was the last tracer. It
   could be fooled in thinking it is, but then on exit it does not call
   the close function of the function graph tracer to clean up its data.

 - Fix synthetic events on big endian machines, by introducing a union
   that does the conversions properly.

 - Fix synthetic events from printing out the number of elements in the
   stacktrace when it shouldn't.

 - Fix synthetic events stacktrace to not print a bogus value at the
   end.

 - Introduce a pipe_cpumask that prevents the trace_pipe files from
   being opened by more than one task (file descriptor).

   There was a race found where if splice is called, the iter->ent could
   become stale and events could be missed. There's no point reading a
   producer/consumer file by more than one task as they will corrupt
   each other anyway. Add a cpumask that keeps track of the per_cpu
   trace_pipe files as well as the global trace_pipe file that prevents
   more than one open of a trace_pipe file that represents the same ring
   buffer. This prevents the race from happening.

 - Fix ftrace samples for arm64 to work with older compilers.

* tag 'trace-v6.5-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  samples: ftrace: Replace bti assembly with hint for older compiler
  tracing: Introduce pipe_cpumask to avoid race on trace_pipes
  tracing: Fix memleak due to race between current_tracer and trace
  tracing/synthetic: Allocate one additional element for size
  tracing/synthetic: Skip first entry for stack traces
  tracing/synthetic: Use union instead of casts
  selftests/ftrace: Add a basic testcase for snapshot
  tracing: Fix cpu buffers unavailable due to 'record_disabled' missed
parents 14ddccc8 e332938e
......@@ -59,6 +59,17 @@ int trace_raw_output_prep(struct trace_iterator *iter,
extern __printf(2, 3)
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...);
/* Used to find the offset and length of dynamic fields in trace events */
struct trace_dynamic_info {
#ifdef CONFIG_CPU_BIG_ENDIAN
u16 offset;
u16 len;
#else
u16 len;
u16 offset;
#endif
};
/*
* The trace entry - the most basic unit of tracing. This is what
* is printed in the end as a single line in the trace output, such as:
......
......@@ -4213,8 +4213,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
* will point to the same string as current_trace->name.
*/
mutex_lock(&trace_types_lock);
if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
/* Close iter->trace before switching to the new current tracer */
if (iter->trace->close)
iter->trace->close(iter);
*iter->trace = *tr->current_trace;
/* Reopen the new current tracer */
if (iter->trace->open)
iter->trace->open(iter);
}
mutex_unlock(&trace_types_lock);
#ifdef CONFIG_TRACER_MAX_TRACE
......@@ -5277,11 +5284,17 @@ int tracing_set_cpumask(struct trace_array *tr,
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
#ifdef CONFIG_TRACER_MAX_TRACE
ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
#endif
}
if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
#ifdef CONFIG_TRACER_MAX_TRACE
ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
#endif
}
}
arch_spin_unlock(&tr->max_lock);
......@@ -6705,10 +6718,36 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
#endif
static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
{
if (cpu == RING_BUFFER_ALL_CPUS) {
if (cpumask_empty(tr->pipe_cpumask)) {
cpumask_setall(tr->pipe_cpumask);
return 0;
}
} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
cpumask_set_cpu(cpu, tr->pipe_cpumask);
return 0;
}
return -EBUSY;
}
static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
{
if (cpu == RING_BUFFER_ALL_CPUS) {
WARN_ON(!cpumask_full(tr->pipe_cpumask));
cpumask_clear(tr->pipe_cpumask);
} else {
WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
cpumask_clear_cpu(cpu, tr->pipe_cpumask);
}
}
static int tracing_open_pipe(struct inode *inode, struct file *filp)
{
struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
int cpu;
int ret;
ret = tracing_check_open_get_tr(tr);
......@@ -6716,13 +6755,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
return ret;
mutex_lock(&trace_types_lock);
cpu = tracing_get_cpu(inode);
ret = open_pipe_on_cpu(tr, cpu);
if (ret)
goto fail_pipe_on_cpu;
/* create a buffer to store the information to pass to userspace */
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter) {
ret = -ENOMEM;
__trace_array_put(tr);
goto out;
goto fail_alloc_iter;
}
trace_seq_init(&iter->seq);
......@@ -6745,7 +6787,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
iter->tr = tr;
iter->array_buffer = &tr->array_buffer;
iter->cpu_file = tracing_get_cpu(inode);
iter->cpu_file = cpu;
mutex_init(&iter->mutex);
filp->private_data = iter;
......@@ -6755,12 +6797,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
nonseekable_open(inode, filp);
tr->trace_ref++;
out:
mutex_unlock(&trace_types_lock);
return ret;
fail:
kfree(iter);
fail_alloc_iter:
close_pipe_on_cpu(tr, cpu);
fail_pipe_on_cpu:
__trace_array_put(tr);
mutex_unlock(&trace_types_lock);
return ret;
......@@ -6777,7 +6822,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
if (iter->trace->pipe_close)
iter->trace->pipe_close(iter);
close_pipe_on_cpu(tr, iter->cpu_file);
mutex_unlock(&trace_types_lock);
free_cpumask_var(iter->started);
......@@ -9441,6 +9486,9 @@ static struct trace_array *trace_array_create(const char *name)
if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
goto out_free_tr;
if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
goto out_free_tr;
tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
......@@ -9482,6 +9530,7 @@ static struct trace_array *trace_array_create(const char *name)
out_free_tr:
ftrace_free_ftrace_ops(tr);
free_trace_buffers(tr);
free_cpumask_var(tr->pipe_cpumask);
free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
......@@ -9584,6 +9633,7 @@ static int __remove_instance(struct trace_array *tr)
}
kfree(tr->topts);
free_cpumask_var(tr->pipe_cpumask);
free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
......@@ -10381,12 +10431,14 @@ __init static int tracer_alloc_buffers(void)
if (trace_create_savedcmd() < 0)
goto out_free_temp_buffer;
if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
goto out_free_savedcmd;
/* TODO: make the number of buffers hot pluggable with CPUS */
if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
goto out_free_savedcmd;
goto out_free_pipe_cpumask;
}
if (global_trace.buffer_disabled)
tracing_off();
......@@ -10439,6 +10491,8 @@ __init static int tracer_alloc_buffers(void)
return 0;
out_free_pipe_cpumask:
free_cpumask_var(global_trace.pipe_cpumask);
out_free_savedcmd:
free_saved_cmdlines_buffer(savedcmd);
out_free_temp_buffer:
......
......@@ -377,6 +377,8 @@ struct trace_array {
struct list_head events;
struct trace_event_file *trace_marker_file;
cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
/* one per_cpu trace_pipe can be opened by only one user */
cpumask_var_t pipe_cpumask;
int ref;
int trace_ref;
#ifdef CONFIG_FUNCTION_TRACER
......@@ -1295,6 +1297,14 @@ static inline void trace_branch_disable(void)
/* set ring buffers to default size if not already done so */
int tracing_update_buffers(void);
union trace_synth_field {
u8 as_u8;
u16 as_u16;
u32 as_u32;
u64 as_u64;
struct trace_dynamic_info as_dynamic;
};
struct ftrace_event_field {
struct list_head link;
const char *name;
......
......@@ -127,7 +127,7 @@ static bool synth_event_match(const char *system, const char *event,
struct synth_trace_event {
struct trace_entry ent;
u64 fields[];
union trace_synth_field fields[];
};
static int synth_event_define_fields(struct trace_event_call *call)
......@@ -321,19 +321,19 @@ static const char *synth_field_fmt(char *type)
static void print_synth_event_num_val(struct trace_seq *s,
char *print_fmt, char *name,
int size, u64 val, char *space)
int size, union trace_synth_field *val, char *space)
{
switch (size) {
case 1:
trace_seq_printf(s, print_fmt, name, (u8)val, space);
trace_seq_printf(s, print_fmt, name, val->as_u8, space);
break;
case 2:
trace_seq_printf(s, print_fmt, name, (u16)val, space);
trace_seq_printf(s, print_fmt, name, val->as_u16, space);
break;
case 4:
trace_seq_printf(s, print_fmt, name, (u32)val, space);
trace_seq_printf(s, print_fmt, name, val->as_u32, space);
break;
default:
......@@ -350,7 +350,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
struct trace_seq *s = &iter->seq;
struct synth_trace_event *entry;
struct synth_event *se;
unsigned int i, n_u64;
unsigned int i, j, n_u64;
char print_fmt[32];
const char *fmt;
......@@ -374,43 +374,28 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
/* parameter values */
if (se->fields[i]->is_string) {
if (se->fields[i]->is_dynamic) {
u32 offset, data_offset;
char *str_field;
offset = (u32)entry->fields[n_u64];
data_offset = offset & 0xffff;
str_field = (char *)entry + data_offset;
union trace_synth_field *data = &entry->fields[n_u64];
trace_seq_printf(s, print_fmt, se->fields[i]->name,
STR_VAR_LEN_MAX,
str_field,
(char *)entry + data->as_dynamic.offset,
i == se->n_fields - 1 ? "" : " ");
n_u64++;
} else {
trace_seq_printf(s, print_fmt, se->fields[i]->name,
STR_VAR_LEN_MAX,
(char *)&entry->fields[n_u64],
(char *)&entry->fields[n_u64].as_u64,
i == se->n_fields - 1 ? "" : " ");
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
}
} else if (se->fields[i]->is_stack) {
u32 offset, data_offset, len;
unsigned long *p, *end;
offset = (u32)entry->fields[n_u64];
data_offset = offset & 0xffff;
len = offset >> 16;
p = (void *)entry + data_offset;
end = (void *)p + len - (sizeof(long) - 1);
union trace_synth_field *data = &entry->fields[n_u64];
unsigned long *p = (void *)entry + data->as_dynamic.offset;
trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
for (; *p && p < end; p++)
trace_seq_printf(s, "=> %pS\n", (void *)*p);
for (j = 1; j < data->as_dynamic.len / sizeof(long); j++)
trace_seq_printf(s, "=> %pS\n", (void *)p[j]);
n_u64++;
} else {
struct trace_print_flags __flags[] = {
__def_gfpflag_names, {-1, NULL} };
......@@ -419,13 +404,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
print_synth_event_num_val(s, print_fmt,
se->fields[i]->name,
se->fields[i]->size,
entry->fields[n_u64],
&entry->fields[n_u64],
space);
if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
trace_seq_puts(s, " (");
trace_print_flags_seq(s, "|",
entry->fields[n_u64],
entry->fields[n_u64].as_u64,
__flags);
trace_seq_putc(s, ')');
}
......@@ -454,21 +439,16 @@ static unsigned int trace_string(struct synth_trace_event *entry,
int ret;
if (is_dynamic) {
u32 data_offset;
union trace_synth_field *data = &entry->fields[*n_u64];
data_offset = struct_size(entry, fields, event->n_u64);
data_offset += data_size;
len = fetch_store_strlen((unsigned long)str_val);
data_offset |= len << 16;
*(u32 *)&entry->fields[*n_u64] = data_offset;
data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size;
data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val);
ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);
(*n_u64)++;
} else {
str_field = (char *)&entry->fields[*n_u64];
str_field = (char *)&entry->fields[*n_u64].as_u64;
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
if ((unsigned long)str_val < TASK_SIZE)
......@@ -492,6 +472,7 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
unsigned int data_size,
unsigned int *n_u64)
{
union trace_synth_field *data = &entry->fields[*n_u64];
unsigned int len;
u32 data_offset;
void *data_loc;
......@@ -504,10 +485,6 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
break;
}
/* Include the zero'd element if it fits */
if (len < HIST_STACKTRACE_DEPTH)
len++;
len *= sizeof(long);
/* Find the dynamic section to copy the stack into. */
......@@ -515,8 +492,9 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
memcpy(data_loc, stack, len);
/* Fill in the field that holds the offset/len combo */
data_offset |= len << 16;
*(u32 *)&entry->fields[*n_u64] = data_offset;
data->as_dynamic.offset = data_offset;
data->as_dynamic.len = len;
(*n_u64)++;
......@@ -550,7 +528,8 @@ static notrace void trace_event_raw_event_synth(void *__data,
str_val = (char *)(long)var_ref_vals[val_idx];
if (event->dynamic_fields[i]->is_stack) {
len = *((unsigned long *)str_val);
/* reserve one extra element for size */
len = *((unsigned long *)str_val) + 1;
len *= sizeof(unsigned long);
} else {
len = fetch_store_strlen((unsigned long)str_val);
......@@ -592,19 +571,19 @@ static notrace void trace_event_raw_event_synth(void *__data,
switch (field->size) {
case 1:
*(u8 *)&entry->fields[n_u64] = (u8)val;
entry->fields[n_u64].as_u8 = (u8)val;
break;
case 2:
*(u16 *)&entry->fields[n_u64] = (u16)val;
entry->fields[n_u64].as_u16 = (u16)val;
break;
case 4:
*(u32 *)&entry->fields[n_u64] = (u32)val;
entry->fields[n_u64].as_u32 = (u32)val;
break;
default:
entry->fields[n_u64] = val;
entry->fields[n_u64].as_u64 = val;
break;
}
n_u64++;
......@@ -1791,19 +1770,19 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
switch (field->size) {
case 1:
*(u8 *)&state.entry->fields[n_u64] = (u8)val;
state.entry->fields[n_u64].as_u8 = (u8)val;
break;
case 2:
*(u16 *)&state.entry->fields[n_u64] = (u16)val;
state.entry->fields[n_u64].as_u16 = (u16)val;
break;
case 4:
*(u32 *)&state.entry->fields[n_u64] = (u32)val;
state.entry->fields[n_u64].as_u32 = (u32)val;
break;
default:
state.entry->fields[n_u64] = val;
state.entry->fields[n_u64].as_u64 = val;
break;
}
n_u64++;
......@@ -1884,19 +1863,19 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
switch (field->size) {
case 1:
*(u8 *)&state.entry->fields[n_u64] = (u8)val;
state.entry->fields[n_u64].as_u8 = (u8)val;
break;
case 2:
*(u16 *)&state.entry->fields[n_u64] = (u16)val;
state.entry->fields[n_u64].as_u16 = (u16)val;
break;
case 4:
*(u32 *)&state.entry->fields[n_u64] = (u32)val;
state.entry->fields[n_u64].as_u32 = (u32)val;
break;
default:
state.entry->fields[n_u64] = val;
state.entry->fields[n_u64].as_u64 = val;
break;
}
n_u64++;
......@@ -2031,19 +2010,19 @@ static int __synth_event_add_val(const char *field_name, u64 val,
} else {
switch (field->size) {
case 1:
*(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
trace_state->entry->fields[field->offset].as_u8 = (u8)val;
break;
case 2:
*(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
trace_state->entry->fields[field->offset].as_u16 = (u16)val;
break;
case 4:
*(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
trace_state->entry->fields[field->offset].as_u32 = (u32)val;
break;
default:
trace_state->entry->fields[field->offset] = val;
trace_state->entry->fields[field->offset].as_u64 = val;
break;
}
}
......
......@@ -231,7 +231,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
graph_trace_open(iter);
else
iter->private = NULL;
}
static void irqsoff_trace_close(struct trace_iterator *iter)
......
......@@ -168,6 +168,8 @@ static void wakeup_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
graph_trace_open(iter);
else
iter->private = NULL;
}
static void wakeup_trace_close(struct trace_iterator *iter)
......
......@@ -105,7 +105,7 @@ asm (
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:"
" bti c\n"
" hint 34\n" // bti c
" sub sp, sp, #16\n"
" stp x9, x30, [sp]\n"
" bl my_direct_func1\n"
......@@ -117,7 +117,7 @@ asm (
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:"
" bti c\n"
" hint 34\n" // bti c
" sub sp, sp, #16\n"
" stp x9, x30, [sp]\n"
" bl my_direct_func2\n"
......
......@@ -112,7 +112,7 @@ asm (
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:"
" bti c\n"
" hint 34\n" // bti c
" sub sp, sp, #32\n"
" stp x9, x30, [sp]\n"
" str x0, [sp, #16]\n"
......@@ -127,7 +127,7 @@ asm (
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:"
" bti c\n"
" hint 34\n" // bti c
" sub sp, sp, #32\n"
" stp x9, x30, [sp]\n"
" str x0, [sp, #16]\n"
......
......@@ -75,7 +75,7 @@ asm (
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:"
" bti c\n"
" hint 34\n" // bti c
" sub sp, sp, #32\n"
" stp x9, x30, [sp]\n"
" str x0, [sp, #16]\n"
......
......@@ -81,7 +81,7 @@ asm (
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:"
" bti c\n"
" hint 34\n" // bti c
" sub sp, sp, #48\n"
" stp x9, x30, [sp]\n"
" stp x0, x1, [sp, #16]\n"
......
......@@ -72,7 +72,7 @@ asm (
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:"
" bti c\n"
" hint 34\n" // bti c
" sub sp, sp, #32\n"
" stp x9, x30, [sp]\n"
" str x0, [sp, #16]\n"
......
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Snapshot and tracing_cpumask
# requires: trace_marker tracing_cpumask snapshot
# flags: instance
# This testcase is constrived to reproduce a problem that the cpu buffers
# become unavailable which is due to 'record_disabled' of array_buffer and
# max_buffer being messed up.
# Store origin cpumask
ORIG_CPUMASK=`cat tracing_cpumask`
# Stop tracing all cpu
echo 0 > tracing_cpumask
# Take a snapshot of the main buffer
echo 1 > snapshot
# Restore origin cpumask, note that there should be some cpus being traced
echo ${ORIG_CPUMASK} > tracing_cpumask
# Set tracing on
echo 1 > tracing_on
# Write a log into buffer
echo "test input 1" > trace_marker
# Ensure the log writed so that cpu buffers are still available
grep -q "test input 1" trace
exit 0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment