Commit 83c2f912 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits)
  perf tools: Fix compile error on x86_64 Ubuntu
  perf report: Fix --stdio output alignment when --showcpuutilization used
  perf annotate: Get rid of field_sep check
  perf annotate: Fix usage string
  perf kmem: Fix a memory leak
  perf kmem: Add missing closedir() calls
  perf top: Add error message for EMFILE
  perf test: Change type of '-v' option to INCR
  perf script: Add missing closedir() calls
  tracing: Fix compile error when static ftrace is enabled
  recordmcount: Fix handling of elf64 big-endian objects.
  perf tools: Add const.h to MANIFEST to make perf-tar-src-pkg work again
  perf tools: Add support for guest/host-only profiling
  perf kvm: Do guest-only counting by default
  perf top: Don't update total_period on process_sample
  perf hists: Stop using 'self' for struct hist_entry
  perf hists: Rename total_session to total_period
  x86: Add counter when debug stack is used with interrupts enabled
  x86: Allow NMIs to hit breakpoints in i386
  x86: Keep current stack in NMI breakpoints
  ...
parents f0ed5b9a 172d1b0b
...@@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
stacktrace [FTRACE] stacktrace [FTRACE]
Enabled the stack tracer on boot up. Enabled the stack tracer on boot up.
stacktrace_filter=[function-list]
[FTRACE] Limit the functions that the stack tracer
will trace at boot up. function-list is a comma separated
list of functions. This list can be changed at run
time by the stack_trace_filter file in the debugfs
tracing directory. Note, this enables stack tracing
and the stacktrace above is not needed.
sti= [PARISC,HW] sti= [PARISC,HW]
Format: <num> Format: <num>
Set the STI (builtin display/keyboard on the HP-PARISC Set the STI (builtin display/keyboard on the HP-PARISC
......
...@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump); ...@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
extern void hw_breakpoint_restore(void); extern void hw_breakpoint_restore(void);
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(int, debug_stack_usage);
static inline void debug_stack_usage_inc(void)
{
__get_cpu_var(debug_stack_usage)++;
}
static inline void debug_stack_usage_dec(void)
{
__get_cpu_var(debug_stack_usage)--;
}
int is_debug_stack(unsigned long addr);
void debug_stack_set_zero(void);
void debug_stack_reset(void);
#else /* !X86_64 */
static inline int is_debug_stack(unsigned long addr) { return 0; }
static inline void debug_stack_set_zero(void) { }
static inline void debug_stack_reset(void) { }
static inline void debug_stack_usage_inc(void) { }
static inline void debug_stack_usage_dec(void) { }
#endif /* X86_64 */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_X86_DEBUGREG_H */ #endif /* _ASM_X86_DEBUGREG_H */
...@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in ...@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
extern struct desc_ptr idt_descr; extern struct desc_ptr idt_descr;
extern gate_desc idt_table[]; extern gate_desc idt_table[];
extern struct desc_ptr nmi_idt_descr;
extern gate_desc nmi_idt_table[];
struct gdt_page { struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES]; struct desc_struct gdt[GDT_ENTRIES];
...@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) ...@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
desc->limit = (limit >> 16) & 0xf; desc->limit = (limit >> 16) & 0xf;
} }
#ifdef CONFIG_X86_64
static inline void set_nmi_gate(int gate, void *addr)
{
gate_desc s;
pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
write_idt_entry(nmi_idt_table, gate, &s);
}
#endif
static inline void _set_gate(int gate, unsigned type, void *addr, static inline void _set_gate(int gate, unsigned type, void *addr,
unsigned dpl, unsigned ist, unsigned seg) unsigned dpl, unsigned ist, unsigned seg)
{ {
......
...@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid); ...@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
(unsigned long) nmi_idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union, DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE); irq_stack_union) __aligned(PAGE_SIZE);
...@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags; ...@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
*/ */
DEFINE_PER_CPU(struct orig_ist, orig_ist); DEFINE_PER_CPU(struct orig_ist, orig_ist);
static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
DEFINE_PER_CPU(int, debug_stack_usage);
int is_debug_stack(unsigned long addr)
{
return __get_cpu_var(debug_stack_usage) ||
(addr <= __get_cpu_var(debug_stack_addr) &&
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
}
void debug_stack_set_zero(void)
{
load_idt((const struct desc_ptr *)&nmi_idt_descr);
}
void debug_stack_reset(void)
{
load_idt((const struct desc_ptr *)&idt_descr);
}
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
...@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void) ...@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
estacks += exception_stack_sizes[v]; estacks += exception_stack_sizes[v];
oist->ist[v] = t->x86_tss.ist[v] = oist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks; (unsigned long)estacks;
if (v == DEBUG_STACK-1)
per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
} }
} }
......
...@@ -1480,62 +1480,214 @@ ENTRY(error_exit) ...@@ -1480,62 +1480,214 @@ ENTRY(error_exit)
CFI_ENDPROC CFI_ENDPROC
END(error_exit) END(error_exit)
/*
* Test if a given stack is an NMI stack or not.
*/
.macro test_in_nmi reg stack nmi_ret normal_ret
cmpq %\reg, \stack
ja \normal_ret
subq $EXCEPTION_STKSZ, %\reg
cmpq %\reg, \stack
jb \normal_ret
jmp \nmi_ret
.endm
/* runs on exception stack */ /* runs on exception stack */
ENTRY(nmi) ENTRY(nmi)
INTR_FRAME INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
* This means that we can have nested NMIs where the next
* NMI is using the top of the stack of the previous NMI. We
* can't let it execute because the nested NMI will corrupt the
* stack of the previous NMI. NMI handlers are not re-entrant
* anyway.
*
* To handle this case we do the following:
* Check the a special location on the stack that contains
* a variable that is set when NMIs are executing.
* The interrupted task's stack is also checked to see if it
* is an NMI stack.
* If the variable is not set and the stack is not the NMI
* stack then:
* o Set the special variable on the stack
* o Copy the interrupt frame into a "saved" location on the stack
* o Copy the interrupt frame into a "copy" location on the stack
* o Continue processing the NMI
* If the variable is set or the previous stack is the NMI stack:
* o Modify the "copy" location to jump to the repeate_nmi
* o return back to the first NMI
*
* Now on exit of the first NMI, we first clear the stack variable
* The NMI stack will tell any nested NMIs at that point that it is
* nested. Then we pop the stack normally with iret, and if there was
* a nested NMI that updated the copy interrupt stack frame, a
* jump will be made to the repeat_nmi code that will handle the second
* NMI.
*/
/* Use %rdx as out temp variable throughout */
pushq_cfi %rdx
/*
* Check the special variable on the stack to see if NMIs are
* executing.
*/
cmp $1, -8(%rsp)
je nested_nmi
/*
* Now test if the previous stack was an NMI stack.
* We need the double check. We check the NMI stack to satisfy the
* race when the first NMI clears the variable before returning.
* We check the variable because the first NMI could be in a
* breakpoint routine using a breakpoint stack.
*/
lea 6*8(%rsp), %rdx
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
nested_nmi:
/*
* Do nothing if we interrupted the fixup in repeat_nmi.
* It's about to repeat the NMI handler, so we are fine
* with ignoring this one.
*/
movq $repeat_nmi, %rdx
cmpq 8(%rsp), %rdx
ja 1f
movq $end_repeat_nmi, %rdx
cmpq 8(%rsp), %rdx
ja nested_nmi_out
1:
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
leaq -6*8(%rsp), %rdx
movq %rdx, %rsp
CFI_ADJUST_CFA_OFFSET 6*8
pushq_cfi $__KERNEL_DS
pushq_cfi %rdx
pushfq_cfi
pushq_cfi $__KERNEL_CS
pushq_cfi $repeat_nmi
/* Put stack back */
addq $(11*8), %rsp
CFI_ADJUST_CFA_OFFSET -11*8
nested_nmi_out:
popq_cfi %rdx
/* No need to check faults here */
INTERRUPT_RETURN
first_nmi:
/*
* Because nested NMIs will use the pushed location that we
* stored in rdx, we must keep that space available.
* Here's what our stack frame will look like:
* +-------------------------+
* | original SS |
* | original Return RSP |
* | original RFLAGS |
* | original CS |
* | original RIP |
* +-------------------------+
* | temp storage for rdx |
* +-------------------------+
* | NMI executing variable |
* +-------------------------+
* | Saved SS |
* | Saved Return RSP |
* | Saved RFLAGS |
* | Saved CS |
* | Saved RIP |
* +-------------------------+
* | copied SS |
* | copied Return RSP |
* | copied RFLAGS |
* | copied CS |
* | copied RIP |
* +-------------------------+
* | pt_regs |
* +-------------------------+
*
* The saved RIP is used to fix up the copied RIP that a nested
* NMI may zero out. The original stack frame and the temp storage
* is also used by nested NMIs and can not be trusted on exit.
*/
/* Set the NMI executing variable on the stack. */
pushq_cfi $1
/* Copy the stack frame to the Saved frame */
.rept 5
pushq_cfi 6*8(%rsp)
.endr
/* Make another copy, this one may be modified by nested NMIs */
.rept 5
pushq_cfi 4*8(%rsp)
.endr
/* Do not pop rdx, nested NMIs will corrupt it */
movq 11*8(%rsp), %rdx
/*
* Everything below this point can be preempted by a nested
* NMI if the first NMI took an exception. Repeated NMIs
* caused by an exception and nested NMI will start here, and
* can still be preempted by another NMI.
*/
restart_nmi:
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
/*
* Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
* as we should not be calling schedule in NMI context.
* Even with normal interrupts enabled. An NMI should not be
* setting NEED_RESCHED or anything that normal interrupts and
* exceptions might do.
*/
call save_paranoid call save_paranoid
DEFAULT_FRAME 0 DEFAULT_FRAME 0
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp,%rdi movq %rsp,%rdi
movq $-1,%rsi movq $-1,%rsi
call do_nmi call do_nmi
#ifdef CONFIG_TRACE_IRQFLAGS
/* paranoidexit; without TRACE_IRQS_OFF */
/* ebx: no swapgs flag */
DISABLE_INTERRUPTS(CLBR_NONE)
testl %ebx,%ebx /* swapgs needed? */ testl %ebx,%ebx /* swapgs needed? */
jnz nmi_restore jnz nmi_restore
testl $3,CS(%rsp)
jnz nmi_userspace
nmi_swapgs: nmi_swapgs:
SWAPGS_UNSAFE_STACK SWAPGS_UNSAFE_STACK
nmi_restore: nmi_restore:
RESTORE_ALL 8 RESTORE_ALL 8
/* Clear the NMI executing stack variable */
movq $0, 10*8(%rsp)
jmp irq_return jmp irq_return
nmi_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
jz nmi_swapgs
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
testl $_TIF_NEED_RESCHED,%ebx
jnz nmi_schedule
movl %ebx,%edx /* arg3: thread flags */
ENABLE_INTERRUPTS(CLBR_NONE)
xorl %esi,%esi /* arg2: oldset */
movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
DISABLE_INTERRUPTS(CLBR_NONE)
jmp nmi_userspace
nmi_schedule:
ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
DISABLE_INTERRUPTS(CLBR_ANY)
jmp nmi_userspace
CFI_ENDPROC
#else
jmp paranoid_exit
CFI_ENDPROC CFI_ENDPROC
#endif
END(nmi) END(nmi)
/*
* If an NMI hit an iret because of an exception or breakpoint,
* it can lose its NMI context, and a nested NMI may come in.
* In that case, the nested NMI will change the preempted NMI's
* stack to jump to here when it does the final iret.
*/
repeat_nmi:
INTR_FRAME
/* Update the stack variable to say we are still in NMI */
movq $1, 5*8(%rsp)
/* copy the saved stack back to copy stack */
.rept 5
pushq_cfi 4*8(%rsp)
.endr
jmp restart_nmi
CFI_ENDPROC
end_repeat_nmi:
ENTRY(ignore_sysret) ENTRY(ignore_sysret)
CFI_STARTPROC CFI_STARTPROC
mov $-ENOSYS,%eax mov $-ENOSYS,%eax
......
...@@ -417,6 +417,10 @@ ENTRY(phys_base) ...@@ -417,6 +417,10 @@ ENTRY(phys_base)
ENTRY(idt_table) ENTRY(idt_table)
.skip IDT_ENTRIES * 16 .skip IDT_ENTRIES * 16
.align L1_CACHE_BYTES
ENTRY(nmi_idt_table)
.skip IDT_ENTRIES * 16
__PAGE_ALIGNED_BSS __PAGE_ALIGNED_BSS
.align PAGE_SIZE .align PAGE_SIZE
ENTRY(empty_zero_page) ENTRY(empty_zero_page)
......
...@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) ...@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
unknown_nmi_error(reason, regs); unknown_nmi_error(reason, regs);
} }
/*
* NMIs can hit breakpoints which will cause it to lose its
* NMI context with the CPU when the breakpoint does an iret.
*/
#ifdef CONFIG_X86_32
/*
* For i386, NMIs use the same stack as the kernel, and we can
* add a workaround to the iret problem in C. Simply have 3 states
* the NMI can be in.
*
* 1) not running
* 2) executing
* 3) latched
*
* When no NMI is in progress, it is in the "not running" state.
* When an NMI comes in, it goes into the "executing" state.
* Normally, if another NMI is triggered, it does not interrupt
* the running NMI and the HW will simply latch it so that when
* the first NMI finishes, it will restart the second NMI.
* (Note, the latch is binary, thus multiple NMIs triggering,
* when one is running, are ignored. Only one NMI is restarted.)
*
* If an NMI hits a breakpoint that executes an iret, another
* NMI can preempt it. We do not want to allow this new NMI
* to run, but we want to execute it when the first one finishes.
* We set the state to "latched", and the first NMI will perform
* an cmpxchg on the state, and if it doesn't successfully
* reset the state to "not running" it will restart the next
* NMI.
*/
enum nmi_states {
NMI_NOT_RUNNING,
NMI_EXECUTING,
NMI_LATCHED,
};
static DEFINE_PER_CPU(enum nmi_states, nmi_state);
#define nmi_nesting_preprocess(regs) \
do { \
if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
__get_cpu_var(nmi_state) = NMI_LATCHED; \
return; \
} \
nmi_restart: \
__get_cpu_var(nmi_state) = NMI_EXECUTING; \
} while (0)
#define nmi_nesting_postprocess() \
do { \
if (cmpxchg(&__get_cpu_var(nmi_state), \
NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
goto nmi_restart; \
} while (0)
#else /* x86_64 */
/*
* In x86_64 things are a bit more difficult. This has the same problem
* where an NMI hitting a breakpoint that calls iret will remove the
* NMI context, allowing a nested NMI to enter. What makes this more
* difficult is that both NMIs and breakpoints have their own stack.
* When a new NMI or breakpoint is executed, the stack is set to a fixed
* point. If an NMI is nested, it will have its stack set at that same
* fixed address that the first NMI had, and will start corrupting the
* stack. This is handled in entry_64.S, but the same problem exists with
* the breakpoint stack.
*
* If a breakpoint is being processed, and the debug stack is being used,
* if an NMI comes in and also hits a breakpoint, the stack pointer
* will be set to the same fixed address as the breakpoint that was
* interrupted, causing that stack to be corrupted. To handle this case,
* check if the stack that was interrupted is the debug stack, and if
* so, change the IDT so that new breakpoints will use the current stack
* and not switch to the fixed address. On return of the NMI, switch back
* to the original IDT.
*/
static DEFINE_PER_CPU(int, update_debug_stack);
static inline void nmi_nesting_preprocess(struct pt_regs *regs)
{
/*
* If we interrupted a breakpoint, it is possible that
* the nmi handler will have breakpoints too. We need to
* change the IDT such that breakpoints that happen here
* continue to use the NMI stack.
*/
if (unlikely(is_debug_stack(regs->sp))) {
debug_stack_set_zero();
__get_cpu_var(update_debug_stack) = 1;
}
}
static inline void nmi_nesting_postprocess(void)
{
if (unlikely(__get_cpu_var(update_debug_stack)))
debug_stack_reset();
}
#endif
dotraplinkage notrace __kprobes void dotraplinkage notrace __kprobes void
do_nmi(struct pt_regs *regs, long error_code) do_nmi(struct pt_regs *regs, long error_code)
{ {
nmi_nesting_preprocess(regs);
nmi_enter(); nmi_enter();
inc_irq_stat(__nmi_count); inc_irq_stat(__nmi_count);
...@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code) ...@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code)
default_do_nmi(regs); default_do_nmi(regs);
nmi_exit(); nmi_exit();
/* On i386, may loop back to preprocess */
nmi_nesting_postprocess();
} }
void stop_nmi(void) void stop_nmi(void)
......
...@@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) ...@@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
== NOTIFY_STOP) == NOTIFY_STOP)
return; return;
/*
* Let others (NMI) know that the debug stack is in use
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
preempt_conditional_sti(regs); preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs); preempt_conditional_cli(regs);
debug_stack_usage_dec();
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) ...@@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
SIGTRAP) == NOTIFY_STOP) SIGTRAP) == NOTIFY_STOP)
return; return;
/*
* Let others (NMI) know that the debug stack is in use
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
/* It's safe to allow irq's after DR6 has been saved */ /* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs); preempt_conditional_sti(regs);
...@@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) ...@@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
handle_vm86_trap((struct kernel_vm86_regs *) regs, handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1); error_code, 1);
preempt_conditional_cli(regs); preempt_conditional_cli(regs);
debug_stack_usage_dec();
return; return;
} }
...@@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) ...@@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code); send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs); preempt_conditional_cli(regs);
debug_stack_usage_dec();
return; return;
} }
...@@ -718,4 +732,10 @@ void __init trap_init(void) ...@@ -718,4 +732,10 @@ void __init trap_init(void)
cpu_init(); cpu_init();
x86_init.irqs.trap_init(); x86_init.irqs.trap_init();
#ifdef CONFIG_X86_64
memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
set_nmi_gate(1, &debug);
set_nmi_gate(3, &int3);
#endif
} }
...@@ -50,6 +50,11 @@ ...@@ -50,6 +50,11 @@
# define inline inline __attribute__((always_inline)) # define inline inline __attribute__((always_inline))
# define __inline__ __inline__ __attribute__((always_inline)) # define __inline__ __inline__ __attribute__((always_inline))
# define __inline __inline __attribute__((always_inline)) # define __inline __inline __attribute__((always_inline))
#else
/* A lot of inline functions can cause havoc with function tracing */
# define inline inline notrace
# define __inline__ __inline__ notrace
# define __inline __inline notrace
#endif #endif
#define __deprecated __attribute__((deprecated)) #define __deprecated __attribute__((deprecated))
......
...@@ -133,6 +133,8 @@ struct ftrace_func_command { ...@@ -133,6 +133,8 @@ struct ftrace_func_command {
int ftrace_arch_code_modify_prepare(void); int ftrace_arch_code_modify_prepare(void);
int ftrace_arch_code_modify_post_process(void); int ftrace_arch_code_modify_post_process(void);
void ftrace_bug(int err, unsigned long ip);
struct seq_file; struct seq_file;
struct ftrace_probe_ops { struct ftrace_probe_ops {
...@@ -161,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end); ...@@ -161,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end);
enum { enum {
FTRACE_FL_ENABLED = (1 << 30), FTRACE_FL_ENABLED = (1 << 30),
FTRACE_FL_FREE = (1 << 31),
}; };
#define FTRACE_FL_MASK (0x3UL << 30) #define FTRACE_FL_MASK (0x3UL << 30)
...@@ -172,10 +173,7 @@ struct dyn_ftrace { ...@@ -172,10 +173,7 @@ struct dyn_ftrace {
unsigned long ip; /* address of mcount call-site */ unsigned long ip; /* address of mcount call-site */
struct dyn_ftrace *freelist; struct dyn_ftrace *freelist;
}; };
union { unsigned long flags;
unsigned long flags;
struct dyn_ftrace *newlist;
};
struct dyn_arch_ftrace arch; struct dyn_arch_ftrace arch;
}; };
...@@ -190,6 +188,56 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); ...@@ -190,6 +188,56 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
int register_ftrace_command(struct ftrace_func_command *cmd); int register_ftrace_command(struct ftrace_func_command *cmd);
int unregister_ftrace_command(struct ftrace_func_command *cmd); int unregister_ftrace_command(struct ftrace_func_command *cmd);
enum {
FTRACE_UPDATE_CALLS = (1 << 0),
FTRACE_DISABLE_CALLS = (1 << 1),
FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
FTRACE_START_FUNC_RET = (1 << 3),
FTRACE_STOP_FUNC_RET = (1 << 4),
};
enum {
FTRACE_UPDATE_IGNORE,
FTRACE_UPDATE_MAKE_CALL,
FTRACE_UPDATE_MAKE_NOP,
};
enum {
FTRACE_ITER_FILTER = (1 << 0),
FTRACE_ITER_NOTRACE = (1 << 1),
FTRACE_ITER_PRINTALL = (1 << 2),
FTRACE_ITER_DO_HASH = (1 << 3),
FTRACE_ITER_HASH = (1 << 4),
FTRACE_ITER_ENABLED = (1 << 5),
};
void arch_ftrace_update_code(int command);
struct ftrace_rec_iter;
struct ftrace_rec_iter *ftrace_rec_iter_start(void);
struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
int ftrace_update_record(struct dyn_ftrace *rec, int enable);
int ftrace_test_record(struct dyn_ftrace *rec, int enable);
void ftrace_run_stop_machine(int command);
int ftrace_location(unsigned long ip);
extern ftrace_func_t ftrace_trace_function;
int ftrace_regex_open(struct ftrace_ops *ops, int flag,
struct inode *inode, struct file *file);
ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos);
ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos);
loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
int ftrace_regex_release(struct inode *inode, struct file *file);
void __init
ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
/* defined in arch */ /* defined in arch */
extern int ftrace_ip_converted(unsigned long ip); extern int ftrace_ip_converted(unsigned long ip);
extern int ftrace_dyn_arch_init(void *data); extern int ftrace_dyn_arch_init(void *data);
...@@ -284,6 +332,25 @@ static inline int ftrace_text_reserved(void *start, void *end) ...@@ -284,6 +332,25 @@ static inline int ftrace_text_reserved(void *start, void *end)
{ {
return 0; return 0;
} }
/*
* Again users of functions that have ftrace_ops may not
* have them defined when ftrace is not enabled, but these
* functions may still be called. Use a macro instead of inline.
*/
#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos) { return -ENODEV; }
static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos) { return -ENODEV; }
static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
{
return -ENODEV;
}
static inline int
ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_DYNAMIC_FTRACE */
/* totally disable ftrace - can not re-enable after this */ /* totally disable ftrace - can not re-enable after this */
......
This diff is collapsed.
...@@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system, ...@@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system,
return -ENOMEM; return -ENOMEM;
} }
static int create_filter_start(char *filter_str, bool set_str,
struct filter_parse_state **psp,
struct event_filter **filterp)
{
struct event_filter *filter;
struct filter_parse_state *ps = NULL;
int err = 0;
WARN_ON_ONCE(*psp || *filterp);
/* allocate everything, and if any fails, free all and fail */
filter = __alloc_filter();
if (filter && set_str)
err = replace_filter_string(filter, filter_str);
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!filter || !ps || err) {
kfree(ps);
__free_filter(filter);
return -ENOMEM;
}
/* we're committed to creating a new filter */
*filterp = filter;
*psp = ps;
parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps);
if (err && set_str)
append_filter_err(ps, filter);
return err;
}
static void create_filter_finish(struct filter_parse_state *ps)
{
if (ps) {
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
}
}
/**
* create_filter - create a filter for a ftrace_event_call
* @call: ftrace_event_call to create a filter for
* @filter_str: filter string
* @set_str: remember @filter_str and enable detailed error in filter
* @filterp: out param for created filter (always updated on return)
*
* Creates a filter for @call with @filter_str. If @set_str is %true,
* @filter_str is copied and recorded in the new filter.
*
* On success, returns 0 and *@filterp points to the new filter. On
* failure, returns -errno and *@filterp may point to %NULL or to a new
* filter. In the latter case, the returned filter contains error
* information if @set_str is %true and the caller is responsible for
* freeing it.
*/
static int create_filter(struct ftrace_event_call *call,
char *filter_str, bool set_str,
struct event_filter **filterp)
{
struct event_filter *filter = NULL;
struct filter_parse_state *ps = NULL;
int err;
err = create_filter_start(filter_str, set_str, &ps, &filter);
if (!err) {
err = replace_preds(call, filter, ps, filter_str, false);
if (err && set_str)
append_filter_err(ps, filter);
}
create_filter_finish(ps);
*filterp = filter;
return err;
}
/**
* create_system_filter - create a filter for an event_subsystem
* @system: event_subsystem to create a filter for
* @filter_str: filter string
* @filterp: out param for created filter (always updated on return)
*
* Identical to create_filter() except that it creates a subsystem filter
* and always remembers @filter_str.
*/
static int create_system_filter(struct event_subsystem *system,
char *filter_str, struct event_filter **filterp)
{
struct event_filter *filter = NULL;
struct filter_parse_state *ps = NULL;
int err;
err = create_filter_start(filter_str, true, &ps, &filter);
if (!err) {
err = replace_system_preds(system, ps, filter_str);
if (!err) {
/* System filters just show a default message */
kfree(filter->filter_string);
filter->filter_string = NULL;
} else {
append_filter_err(ps, filter);
}
}
create_filter_finish(ps);
*filterp = filter;
return err;
}
int apply_event_filter(struct ftrace_event_call *call, char *filter_string) int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{ {
struct filter_parse_state *ps;
struct event_filter *filter; struct event_filter *filter;
struct event_filter *tmp;
int err = 0; int err = 0;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
...@@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) ...@@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
goto out_unlock; goto out_unlock;
} }
err = -ENOMEM; err = create_filter(call, filter_string, true, &filter);
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
goto out_unlock;
filter = __alloc_filter();
if (!filter) {
kfree(ps);
goto out_unlock;
}
replace_filter_string(filter, filter_string);
parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps);
if (err) {
append_filter_err(ps, filter);
goto out;
}
err = replace_preds(call, filter, ps, filter_string, false);
if (err) {
filter_disable(call);
append_filter_err(ps, filter);
} else
call->flags |= TRACE_EVENT_FL_FILTERED;
out:
/* /*
* Always swap the call filter with the new filter * Always swap the call filter with the new filter
* even if there was an error. If there was an error * even if there was an error. If there was an error
* in the filter, we disable the filter and show the error * in the filter, we disable the filter and show the error
* string * string
*/ */
tmp = call->filter; if (filter) {
rcu_assign_pointer(call->filter, filter); struct event_filter *tmp = call->filter;
if (tmp) {
/* Make sure the call is done with the filter */ if (!err)
synchronize_sched(); call->flags |= TRACE_EVENT_FL_FILTERED;
__free_filter(tmp); else
filter_disable(call);
rcu_assign_pointer(call->filter, filter);
if (tmp) {
/* Make sure the call is done with the filter */
synchronize_sched();
__free_filter(tmp);
}
} }
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
out_unlock: out_unlock:
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
...@@ -1811,7 +1902,6 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) ...@@ -1811,7 +1902,6 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
int apply_subsystem_event_filter(struct event_subsystem *system, int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string) char *filter_string)
{ {
struct filter_parse_state *ps;
struct event_filter *filter; struct event_filter *filter;
int err = 0; int err = 0;
...@@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system, ...@@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
goto out_unlock; goto out_unlock;
} }
err = -ENOMEM; err = create_system_filter(system, filter_string, &filter);
ps = kzalloc(sizeof(*ps), GFP_KERNEL); if (filter) {
if (!ps) /*
goto out_unlock; * No event actually uses the system filter
* we can free it without synchronize_sched().
filter = __alloc_filter(); */
if (!filter) __free_filter(system->filter);
goto out; system->filter = filter;
}
/* System filters just show a default message */
kfree(filter->filter_string);
filter->filter_string = NULL;
/*
* No event actually uses the system filter
* we can free it without synchronize_sched().
*/
__free_filter(system->filter);
system->filter = filter;
parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps);
if (err)
goto err_filter;
err = replace_system_preds(system, ps, filter_string);
if (err)
goto err_filter;
out:
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
out_unlock: out_unlock:
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
return err; return err;
err_filter:
replace_filter_string(filter, filter_string);
append_filter_err(ps, system->filter);
goto out;
} }
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
...@@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, ...@@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
{ {
int err; int err;
struct event_filter *filter; struct event_filter *filter;
struct filter_parse_state *ps;
struct ftrace_event_call *call; struct ftrace_event_call *call;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
...@@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, ...@@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
if (event->filter) if (event->filter)
goto out_unlock; goto out_unlock;
filter = __alloc_filter(); err = create_filter(call, filter_str, false, &filter);
if (!filter) {
err = PTR_ERR(filter);
goto out_unlock;
}
err = -ENOMEM;
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
goto free_filter;
parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps);
if (err)
goto free_ps;
err = replace_preds(call, filter, ps, filter_str, false);
if (!err) if (!err)
event->filter = filter; event->filter = filter;
else
free_ps:
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
free_filter:
if (err)
__free_filter(filter); __free_filter(filter);
out_unlock: out_unlock:
...@@ -1954,43 +1991,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, ...@@ -1954,43 +1991,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "trace_events_filter_test.h" #include "trace_events_filter_test.h"
static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
struct event_filter **pfilter)
{
struct event_filter *filter;
struct filter_parse_state *ps;
int err = -ENOMEM;
filter = __alloc_filter();
if (!filter)
goto out;
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
goto free_filter;
parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps);
if (err)
goto free_ps;
err = replace_preds(call, filter, ps, filter_str, false);
if (!err)
*pfilter = filter;
free_ps:
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
free_filter:
if (err)
__free_filter(filter);
out:
return err;
}
#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \ #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
{ \ { \
.filter = FILTER, \ .filter = FILTER, \
...@@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void) ...@@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void)
struct test_filter_data_t *d = &test_filter_data[i]; struct test_filter_data_t *d = &test_filter_data[i];
int err; int err;
err = test_get_filter(d->filter, &event_ftrace_test_filter, err = create_filter(&event_ftrace_test_filter, d->filter,
&filter); false, &filter);
if (err) { if (err) {
printk(KERN_INFO printk(KERN_INFO
"Failed to get filter for '%s', err %d\n", "Failed to get filter for '%s', err %d\n",
d->filter, err); d->filter, err);
__free_filter(filter);
break; break;
} }
......
...@@ -13,6 +13,9 @@ ...@@ -13,6 +13,9 @@
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <asm/setup.h>
#include "trace.h" #include "trace.h"
#define STACK_TRACE_ENTRIES 500 #define STACK_TRACE_ENTRIES 500
...@@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) ...@@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
static struct ftrace_ops trace_ops __read_mostly = static struct ftrace_ops trace_ops __read_mostly =
{ {
.func = stack_trace_call, .func = stack_trace_call,
.flags = FTRACE_OPS_FL_GLOBAL,
}; };
static ssize_t static ssize_t
...@@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = { ...@@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = {
.release = seq_release, .release = seq_release,
}; };
static int
stack_trace_filter_open(struct inode *inode, struct file *file)
{
return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
inode, file);
}
static const struct file_operations stack_trace_filter_fops = {
.open = stack_trace_filter_open,
.read = seq_read,
.write = ftrace_filter_write,
.llseek = ftrace_regex_lseek,
.release = ftrace_regex_release,
};
int int
stack_trace_sysctl(struct ctl_table *table, int write, stack_trace_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, void __user *buffer, size_t *lenp,
...@@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write, ...@@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write,
return ret; return ret;
} }
static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
static __init int enable_stacktrace(char *str) static __init int enable_stacktrace(char *str)
{ {
if (strncmp(str, "_filter=", 8) == 0)
strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
stack_tracer_enabled = 1; stack_tracer_enabled = 1;
last_stack_tracer_enabled = 1; last_stack_tracer_enabled = 1;
return 1; return 1;
...@@ -358,6 +380,12 @@ static __init int stack_trace_init(void) ...@@ -358,6 +380,12 @@ static __init int stack_trace_init(void)
trace_create_file("stack_trace", 0444, d_tracer, trace_create_file("stack_trace", 0444, d_tracer,
NULL, &stack_trace_fops); NULL, &stack_trace_fops);
trace_create_file("stack_trace_filter", 0444, d_tracer,
NULL, &stack_trace_filter_fops);
if (stack_trace_filter_buf[0])
ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
if (stack_tracer_enabled) if (stack_tracer_enabled)
register_ftrace_function(&trace_ops); register_ftrace_function(&trace_ops);
......
...@@ -462,7 +462,7 @@ __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */ ...@@ -462,7 +462,7 @@ __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */
succeed_file(); succeed_file();
} }
if (w(txthdr->sh_type) != SHT_PROGBITS || if (w(txthdr->sh_type) != SHT_PROGBITS ||
!(w(txthdr->sh_flags) & SHF_EXECINSTR)) !(_w(txthdr->sh_flags) & SHF_EXECINSTR))
return NULL; return NULL;
return txtname; return txtname;
} }
......
...@@ -21,6 +21,8 @@ EVENT MODIFIERS ...@@ -21,6 +21,8 @@ EVENT MODIFIERS
Events can optionally have a modifer by appending a colon and one or Events can optionally have a modifer by appending a colon and one or
more modifiers. Modifiers allow the user to restrict when events are more modifiers. Modifiers allow the user to restrict when events are
counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
Additional modifiers are 'G' for guest counting (in KVM guests) and 'H'
for host counting (not in KVM guests).
The 'p' modifier can be used for specifying how precise the instruction The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier is currently only implemented for address should be. The 'p' modifier is currently only implemented for
......
tools/perf tools/perf
include/linux/const.h
include/linux/perf_event.h include/linux/perf_event.h
include/linux/rbtree.h include/linux/rbtree.h
include/linux/list.h include/linux/list.h
......
...@@ -235,7 +235,7 @@ static int __cmd_annotate(struct perf_annotate *ann) ...@@ -235,7 +235,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
} }
static const char * const annotate_usage[] = { static const char * const annotate_usage[] = {
"perf annotate [<options>] <command>", "perf annotate [<options>]",
NULL NULL
}; };
...@@ -313,10 +313,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) ...@@ -313,10 +313,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
annotate.sym_hist_filter = argv[0]; annotate.sym_hist_filter = argv[0];
} }
if (field_sep && *field_sep == '.') {
pr_err("'.' is the only non valid --field-separator argument\n");
return -1;
}
return __cmd_annotate(&annotate); return __cmd_annotate(&annotate);
} }
...@@ -108,7 +108,9 @@ static void setup_cpunode_map(void) ...@@ -108,7 +108,9 @@ static void setup_cpunode_map(void)
continue; continue;
cpunode_map[cpu] = mem; cpunode_map[cpu] = mem;
} }
closedir(dir2);
} }
closedir(dir1);
} }
static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
...@@ -645,6 +647,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) ...@@ -645,6 +647,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
break; break;
if (sort_dimension__add(tok, sort_list) < 0) { if (sort_dimension__add(tok, sort_list) < 0) {
error("Unknown --sort key: '%s'", tok); error("Unknown --sort key: '%s'", tok);
free(str);
return -1; return -1;
} }
} }
......
...@@ -22,9 +22,6 @@ ...@@ -22,9 +22,6 @@
static const char *file_name; static const char *file_name;
static char name_buffer[256]; static char name_buffer[256];
bool perf_host = 1;
bool perf_guest;
static const char * const kvm_usage[] = { static const char * const kvm_usage[] = {
"perf kvm [<options>] {top|record|report|diff|buildid-list}", "perf kvm [<options>] {top|record|report|diff|buildid-list}",
NULL NULL
...@@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv) ...@@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv)
int cmd_kvm(int argc, const char **argv, const char *prefix __used) int cmd_kvm(int argc, const char **argv, const char *prefix __used)
{ {
perf_host = perf_guest = 0; perf_host = 0;
perf_guest = 1;
argc = parse_options(argc, argv, kvm_options, kvm_usage, argc = parse_options(argc, argv, kvm_options, kvm_usage,
PARSE_OPT_STOP_AT_NON_OPTION); PARSE_OPT_STOP_AT_NON_OPTION);
......
...@@ -1018,13 +1018,17 @@ static char *get_script_path(const char *script_root, const char *suffix) ...@@ -1018,13 +1018,17 @@ static char *get_script_path(const char *script_root, const char *suffix)
__script_root = get_script_root(&script_dirent, suffix); __script_root = get_script_root(&script_dirent, suffix);
if (__script_root && !strcmp(script_root, __script_root)) { if (__script_root && !strcmp(script_root, __script_root)) {
free(__script_root); free(__script_root);
closedir(lang_dir);
closedir(scripts_dir);
snprintf(script_path, MAXPATHLEN, "%s/%s", snprintf(script_path, MAXPATHLEN, "%s/%s",
lang_path, script_dirent.d_name); lang_path, script_dirent.d_name);
return strdup(script_path); return strdup(script_path);
} }
free(__script_root); free(__script_root);
} }
closedir(lang_dir);
} }
closedir(scripts_dir);
return NULL; return NULL;
} }
......
...@@ -1396,7 +1396,7 @@ int cmd_test(int argc, const char **argv, const char *prefix __used) ...@@ -1396,7 +1396,7 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
NULL, NULL,
}; };
const struct option test_options[] = { const struct option test_options[] = {
OPT_INTEGER('v', "verbose", &verbose, OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"), "be more verbose (show symbol address, etc)"),
OPT_END() OPT_END()
}; };
......
...@@ -235,7 +235,6 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, ...@@ -235,7 +235,6 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
if (he == NULL) if (he == NULL)
return NULL; return NULL;
evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
return he; return he;
} }
...@@ -889,6 +888,10 @@ static void perf_top__start_counters(struct perf_top *top) ...@@ -889,6 +888,10 @@ static void perf_top__start_counters(struct perf_top *top)
ui__warning("The %s event is not supported.\n", ui__warning("The %s event is not supported.\n",
event_name(counter)); event_name(counter));
goto out_err; goto out_err;
} else if (err == EMFILE) {
ui__warning("Too many events are opened.\n"
"Try again after reducing the number of events\n");
goto out_err;
} }
ui__warning("The sys_perf_event_open() syscall " ui__warning("The sys_perf_event_open() syscall "
......
...@@ -111,8 +111,11 @@ int perf_evlist__add_default(struct perf_evlist *evlist) ...@@ -111,8 +111,11 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
.type = PERF_TYPE_HARDWARE, .type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES, .config = PERF_COUNT_HW_CPU_CYCLES,
}; };
struct perf_evsel *evsel = perf_evsel__new(&attr, 0); struct perf_evsel *evsel;
event_attr_init(&attr);
evsel = perf_evsel__new(&attr, 0);
if (evsel == NULL) if (evsel == NULL)
goto error; goto error;
......
...@@ -76,21 +76,21 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) ...@@ -76,21 +76,21 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
} }
} }
static void hist_entry__add_cpumode_period(struct hist_entry *self, static void hist_entry__add_cpumode_period(struct hist_entry *he,
unsigned int cpumode, u64 period) unsigned int cpumode, u64 period)
{ {
switch (cpumode) { switch (cpumode) {
case PERF_RECORD_MISC_KERNEL: case PERF_RECORD_MISC_KERNEL:
self->period_sys += period; he->period_sys += period;
break; break;
case PERF_RECORD_MISC_USER: case PERF_RECORD_MISC_USER:
self->period_us += period; he->period_us += period;
break; break;
case PERF_RECORD_MISC_GUEST_KERNEL: case PERF_RECORD_MISC_GUEST_KERNEL:
self->period_guest_sys += period; he->period_guest_sys += period;
break; break;
case PERF_RECORD_MISC_GUEST_USER: case PERF_RECORD_MISC_GUEST_USER:
self->period_guest_us += period; he->period_guest_us += period;
break; break;
default: default:
break; break;
...@@ -165,18 +165,18 @@ void hists__decay_entries_threaded(struct hists *hists, ...@@ -165,18 +165,18 @@ void hists__decay_entries_threaded(struct hists *hists,
static struct hist_entry *hist_entry__new(struct hist_entry *template) static struct hist_entry *hist_entry__new(struct hist_entry *template)
{ {
size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
struct hist_entry *self = malloc(sizeof(*self) + callchain_size); struct hist_entry *he = malloc(sizeof(*he) + callchain_size);
if (self != NULL) { if (he != NULL) {
*self = *template; *he = *template;
self->nr_events = 1; he->nr_events = 1;
if (self->ms.map) if (he->ms.map)
self->ms.map->referenced = true; he->ms.map->referenced = true;
if (symbol_conf.use_callchain) if (symbol_conf.use_callchain)
callchain_init(self->callchain); callchain_init(he->callchain);
} }
return self; return he;
} }
static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
...@@ -677,15 +677,16 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self, ...@@ -677,15 +677,16 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
return ret; return ret;
} }
static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
u64 total_samples, int left_margin) u64 total_samples, int left_margin,
FILE *fp)
{ {
struct rb_node *rb_node; struct rb_node *rb_node;
struct callchain_node *chain; struct callchain_node *chain;
size_t ret = 0; size_t ret = 0;
u32 entries_printed = 0; u32 entries_printed = 0;
rb_node = rb_first(&self->sorted_chain); rb_node = rb_first(&he->sorted_chain);
while (rb_node) { while (rb_node) {
double percent; double percent;
...@@ -730,35 +731,35 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows) ...@@ -730,35 +731,35 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows)
} }
} }
static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s, static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
size_t size, struct hists *pair_hists, size_t size, struct hists *pair_hists,
bool show_displacement, long displacement, bool show_displacement, long displacement,
bool color, u64 session_total) bool color, u64 total_period)
{ {
u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
u64 nr_events; u64 nr_events;
const char *sep = symbol_conf.field_sep; const char *sep = symbol_conf.field_sep;
int ret; int ret;
if (symbol_conf.exclude_other && !self->parent) if (symbol_conf.exclude_other && !he->parent)
return 0; return 0;
if (pair_hists) { if (pair_hists) {
period = self->pair ? self->pair->period : 0; period = he->pair ? he->pair->period : 0;
nr_events = self->pair ? self->pair->nr_events : 0; nr_events = he->pair ? he->pair->nr_events : 0;
total = pair_hists->stats.total_period; total = pair_hists->stats.total_period;
period_sys = self->pair ? self->pair->period_sys : 0; period_sys = he->pair ? he->pair->period_sys : 0;
period_us = self->pair ? self->pair->period_us : 0; period_us = he->pair ? he->pair->period_us : 0;
period_guest_sys = self->pair ? self->pair->period_guest_sys : 0; period_guest_sys = he->pair ? he->pair->period_guest_sys : 0;
period_guest_us = self->pair ? self->pair->period_guest_us : 0; period_guest_us = he->pair ? he->pair->period_guest_us : 0;
} else { } else {
period = self->period; period = he->period;
nr_events = self->nr_events; nr_events = he->nr_events;
total = session_total; total = total_period;
period_sys = self->period_sys; period_sys = he->period_sys;
period_us = self->period_us; period_us = he->period_us;
period_guest_sys = self->period_guest_sys; period_guest_sys = he->period_guest_sys;
period_guest_us = self->period_guest_us; period_guest_us = he->period_guest_us;
} }
if (total) { if (total) {
...@@ -812,8 +813,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s, ...@@ -812,8 +813,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
if (total > 0) if (total > 0)
old_percent = (period * 100.0) / total; old_percent = (period * 100.0) / total;
if (session_total > 0) if (total_period > 0)
new_percent = (self->period * 100.0) / session_total; new_percent = (he->period * 100.0) / total_period;
diff = new_percent - old_percent; diff = new_percent - old_percent;
...@@ -862,9 +863,10 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size, ...@@ -862,9 +863,10 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size,
return ret; return ret;
} }
int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, static int hist_entry__fprintf(struct hist_entry *he, size_t size,
struct hists *pair_hists, bool show_displacement, struct hists *hists, struct hists *pair_hists,
long displacement, FILE *fp, u64 session_total) bool show_displacement, long displacement,
u64 total_period, FILE *fp)
{ {
char bf[512]; char bf[512];
int ret; int ret;
...@@ -874,14 +876,14 @@ int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, ...@@ -874,14 +876,14 @@ int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists,
show_displacement, displacement, show_displacement, displacement,
true, session_total); true, total_period);
hist_entry__snprintf(he, bf + ret, size - ret, hists); hist_entry__snprintf(he, bf + ret, size - ret, hists);
return fprintf(fp, "%s\n", bf); return fprintf(fp, "%s\n", bf);
} }
static size_t hist_entry__fprintf_callchain(struct hist_entry *self, static size_t hist_entry__fprintf_callchain(struct hist_entry *he,
struct hists *hists, FILE *fp, struct hists *hists,
u64 session_total) u64 total_period, FILE *fp)
{ {
int left_margin = 0; int left_margin = 0;
...@@ -889,11 +891,10 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self, ...@@ -889,11 +891,10 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
struct sort_entry *se = list_first_entry(&hist_entry__sort_list, struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
typeof(*se), list); typeof(*se), list);
left_margin = hists__col_len(hists, se->se_width_idx); left_margin = hists__col_len(hists, se->se_width_idx);
left_margin -= thread__comm_len(self->thread); left_margin -= thread__comm_len(he->thread);
} }
return hist_entry_callchain__fprintf(fp, self, session_total, return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
left_margin);
} }
size_t hists__fprintf(struct hists *hists, struct hists *pair, size_t hists__fprintf(struct hists *hists, struct hists *pair,
...@@ -903,6 +904,7 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, ...@@ -903,6 +904,7 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
struct sort_entry *se; struct sort_entry *se;
struct rb_node *nd; struct rb_node *nd;
size_t ret = 0; size_t ret = 0;
u64 total_period;
unsigned long position = 1; unsigned long position = 1;
long displacement = 0; long displacement = 0;
unsigned int width; unsigned int width;
...@@ -917,20 +919,6 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, ...@@ -917,20 +919,6 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
if (symbol_conf.show_nr_samples) {
if (sep)
fprintf(fp, "%cSamples", *sep);
else
fputs(" Samples ", fp);
}
if (symbol_conf.show_total_period) {
if (sep)
ret += fprintf(fp, "%cPeriod", *sep);
else
ret += fprintf(fp, " Period ");
}
if (symbol_conf.show_cpu_utilization) { if (symbol_conf.show_cpu_utilization) {
if (sep) { if (sep) {
ret += fprintf(fp, "%csys", *sep); ret += fprintf(fp, "%csys", *sep);
...@@ -940,8 +928,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, ...@@ -940,8 +928,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
ret += fprintf(fp, "%cguest us", *sep); ret += fprintf(fp, "%cguest us", *sep);
} }
} else { } else {
ret += fprintf(fp, " sys "); ret += fprintf(fp, " sys ");
ret += fprintf(fp, " us "); ret += fprintf(fp, " us ");
if (perf_guest) { if (perf_guest) {
ret += fprintf(fp, " guest sys "); ret += fprintf(fp, " guest sys ");
ret += fprintf(fp, " guest us "); ret += fprintf(fp, " guest us ");
...@@ -949,6 +937,20 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, ...@@ -949,6 +937,20 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
} }
} }
if (symbol_conf.show_nr_samples) {
if (sep)
fprintf(fp, "%cSamples", *sep);
else
fputs(" Samples ", fp);
}
if (symbol_conf.show_total_period) {
if (sep)
ret += fprintf(fp, "%cPeriod", *sep);
else
ret += fprintf(fp, " Period ");
}
if (pair) { if (pair) {
if (sep) if (sep)
ret += fprintf(fp, "%cDelta", *sep); ret += fprintf(fp, "%cDelta", *sep);
...@@ -993,6 +995,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, ...@@ -993,6 +995,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
goto print_entries; goto print_entries;
fprintf(fp, "# ........"); fprintf(fp, "# ........");
if (symbol_conf.show_cpu_utilization)
fprintf(fp, " ....... .......");
if (symbol_conf.show_nr_samples) if (symbol_conf.show_nr_samples)
fprintf(fp, " .........."); fprintf(fp, " ..........");
if (symbol_conf.show_total_period) if (symbol_conf.show_total_period)
...@@ -1025,6 +1029,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, ...@@ -1025,6 +1029,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
goto out; goto out;
print_entries: print_entries:
total_period = hists->stats.total_period;
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
...@@ -1040,11 +1046,10 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, ...@@ -1040,11 +1046,10 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
++position; ++position;
} }
ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement,
displacement, fp, hists->stats.total_period); displacement, total_period, fp);
if (symbol_conf.use_callchain) if (symbol_conf.use_callchain)
ret += hist_entry__fprintf_callchain(h, hists, fp, ret += hist_entry__fprintf_callchain(h, hists, total_period, fp);
hists->stats.total_period);
if (max_rows && ++nr_rows >= max_rows) if (max_rows && ++nr_rows >= max_rows)
goto out; goto out;
......
...@@ -66,11 +66,8 @@ struct hists { ...@@ -66,11 +66,8 @@ struct hists {
struct hist_entry *__hists__add_entry(struct hists *self, struct hist_entry *__hists__add_entry(struct hists *self,
struct addr_location *al, struct addr_location *al,
struct symbol *parent, u64 period); struct symbol *parent, u64 period);
extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
struct hists *pair_hists, bool show_displacement,
long displacement, FILE *fp, u64 session_total);
int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
struct hists *hists); struct hists *hists);
void hist_entry__free(struct hist_entry *); void hist_entry__free(struct hist_entry *);
......
...@@ -735,8 +735,8 @@ static int ...@@ -735,8 +735,8 @@ static int
parse_event_modifier(const char **strp, struct perf_event_attr *attr) parse_event_modifier(const char **strp, struct perf_event_attr *attr)
{ {
const char *str = *strp; const char *str = *strp;
int exclude = 0; int exclude = 0, exclude_GH = 0;
int eu = 0, ek = 0, eh = 0, precise = 0; int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0;
if (!*str) if (!*str)
return 0; return 0;
...@@ -760,6 +760,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) ...@@ -760,6 +760,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
if (!exclude) if (!exclude)
exclude = eu = ek = eh = 1; exclude = eu = ek = eh = 1;
eh = 0; eh = 0;
} else if (*str == 'G') {
if (!exclude_GH)
exclude_GH = eG = eH = 1;
eG = 0;
} else if (*str == 'H') {
if (!exclude_GH)
exclude_GH = eG = eH = 1;
eH = 0;
} else if (*str == 'p') { } else if (*str == 'p') {
precise++; precise++;
} else } else
...@@ -776,6 +784,8 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) ...@@ -776,6 +784,8 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
attr->exclude_kernel = ek; attr->exclude_kernel = ek;
attr->exclude_hv = eh; attr->exclude_hv = eh;
attr->precise_ip = precise; attr->precise_ip = precise;
attr->exclude_host = eH;
attr->exclude_guest = eG;
return 0; return 0;
} }
...@@ -838,6 +848,7 @@ int parse_events(struct perf_evlist *evlist , const char *str, int unset __used) ...@@ -838,6 +848,7 @@ int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
for (;;) { for (;;) {
ostr = str; ostr = str;
memset(&attr, 0, sizeof(attr)); memset(&attr, 0, sizeof(attr));
event_attr_init(&attr);
ret = parse_event_symbols(evlist, &str, &attr); ret = parse_event_symbols(evlist, &str, &attr);
if (ret == EVT_FAILED) if (ret == EVT_FAILED)
return -1; return -1;
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
* *
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/ */
#include <ctype.h>
#include "util.h" #include "util.h"
#include <dirent.h> #include <dirent.h>
#include <mntent.h> #include <mntent.h>
......
#include "../perf.h"
#include "util.h" #include "util.h"
#include <sys/mman.h> #include <sys/mman.h>
/*
* XXX We need to find a better place for these things...
*/
bool perf_host = true;
bool perf_guest = true;
void event_attr_init(struct perf_event_attr *attr)
{
if (!perf_host)
attr->exclude_host = 1;
if (!perf_guest)
attr->exclude_guest = 1;
}
int mkdir_p(char *path, mode_t mode) int mkdir_p(char *path, mode_t mode)
{ {
struct stat st; struct stat st;
......
...@@ -242,6 +242,10 @@ int strtailcmp(const char *s1, const char *s2); ...@@ -242,6 +242,10 @@ int strtailcmp(const char *s1, const char *s2);
unsigned long convert_unit(unsigned long value, char *unit); unsigned long convert_unit(unsigned long value, char *unit);
int readn(int fd, void *buf, size_t size); int readn(int fd, void *buf, size_t size);
struct perf_event_attr;
void event_attr_init(struct perf_event_attr *attr);
#define _STR(x) #x #define _STR(x) #x
#define STR(x) _STR(x) #define STR(x) _STR(x)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment