Commit 0806ebd9 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'perf/core' of...

Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing into perf/core
parents 090f7204 feef47d0
...@@ -137,6 +137,17 @@ config HAVE_HW_BREAKPOINT ...@@ -137,6 +137,17 @@ config HAVE_HW_BREAKPOINT
bool bool
depends on PERF_EVENTS depends on PERF_EVENTS
config HAVE_MIXED_BREAKPOINTS_REGS
bool
depends on HAVE_HW_BREAKPOINT
help
Depending on the arch implementation of hardware breakpoints,
some of them have separate registers for data and instruction
breakpoints addresses, others have mixed registers to store
them but define the access type in a control register.
Select this option if your arch implements breakpoints under the
latter fashion.
config HAVE_USER_RETURN_NOTIFIER config HAVE_USER_RETURN_NOTIFIER
bool bool
......
...@@ -44,6 +44,7 @@ config SUPERH32 ...@@ -44,6 +44,7 @@ config SUPERH32
select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB
select HAVE_HW_BREAKPOINT select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS if HAVE_HW_BREAKPOINT select PERF_EVENTS if HAVE_HW_BREAKPOINT
select ARCH_HIBERNATION_POSSIBLE if MMU select ARCH_HIBERNATION_POSSIBLE if MMU
......
...@@ -46,10 +46,14 @@ struct pmu; ...@@ -46,10 +46,14 @@ struct pmu;
/* Maximum number of UBC channels */ /* Maximum number of UBC channels */
#define HBP_NUM 2 #define HBP_NUM 2
static inline int hw_breakpoint_slots(int type)
{
return HBP_NUM;
}
/* arch/sh/kernel/hw_breakpoint.c */ /* arch/sh/kernel/hw_breakpoint.c */
extern int arch_check_va_in_userspace(unsigned long va, u16 hbp_len); extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
extern int arch_validate_hwbkpt_settings(struct perf_event *bp, extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
struct task_struct *tsk);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data); unsigned long val, void *data);
......
...@@ -119,26 +119,17 @@ static int get_hbp_len(u16 hbp_len) ...@@ -119,26 +119,17 @@ static int get_hbp_len(u16 hbp_len)
return len_in_bytes; return len_in_bytes;
} }
/*
* Check for virtual address in user space.
*/
int arch_check_va_in_userspace(unsigned long va, u16 hbp_len)
{
unsigned int len;
len = get_hbp_len(hbp_len);
return (va <= TASK_SIZE - len);
}
/* /*
* Check for virtual address in kernel space. * Check for virtual address in kernel space.
*/ */
static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) int arch_check_bp_in_kernelspace(struct perf_event *bp)
{ {
unsigned int len; unsigned int len;
unsigned long va;
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
len = get_hbp_len(hbp_len); va = info->address;
len = get_hbp_len(info->len);
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
} }
...@@ -226,8 +217,7 @@ static int arch_build_bp_info(struct perf_event *bp) ...@@ -226,8 +217,7 @@ static int arch_build_bp_info(struct perf_event *bp)
/* /*
* Validate the arch-specific HW Breakpoint register settings * Validate the arch-specific HW Breakpoint register settings
*/ */
int arch_validate_hwbkpt_settings(struct perf_event *bp, int arch_validate_hwbkpt_settings(struct perf_event *bp)
struct task_struct *tsk)
{ {
struct arch_hw_breakpoint *info = counter_arch_bp(bp); struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned int align; unsigned int align;
...@@ -270,15 +260,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, ...@@ -270,15 +260,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
if (info->address & align) if (info->address & align)
return -EINVAL; return -EINVAL;
/* Check that the virtual address is in the proper range */
if (tsk) {
if (!arch_check_va_in_userspace(info->address, info->len))
return -EFAULT;
} else {
if (!arch_check_va_in_kernelspace(info->address, info->len))
return -EFAULT;
}
return 0; return 0;
} }
...@@ -363,8 +344,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) ...@@ -363,8 +344,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
perf_bp_event(bp, args->regs); perf_bp_event(bp, args->regs);
/* Deliver the signal to userspace */ /* Deliver the signal to userspace */
if (arch_check_va_in_userspace(bp->attr.bp_addr, if (!arch_check_bp_in_kernelspace(bp)) {
bp->attr.bp_len)) {
siginfo_t info; siginfo_t info;
info.si_signo = args->signr; info.si_signo = args->signr;
......
...@@ -85,7 +85,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr) ...@@ -85,7 +85,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
bp = thread->ptrace_bps[0]; bp = thread->ptrace_bps[0];
if (!bp) { if (!bp) {
hw_breakpoint_init(&attr); ptrace_breakpoint_init(&attr);
attr.bp_addr = addr; attr.bp_addr = addr;
attr.bp_len = HW_BREAKPOINT_LEN_2; attr.bp_len = HW_BREAKPOINT_LEN_2;
......
...@@ -53,6 +53,7 @@ config X86 ...@@ -53,6 +53,7 @@ config X86
select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO select HAVE_KERNEL_LZO
select HAVE_HW_BREAKPOINT select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS select PERF_EVENTS
select ANON_INODES select ANON_INODES
select HAVE_ARCH_KMEMCHECK select HAVE_ARCH_KMEMCHECK
......
...@@ -41,12 +41,16 @@ struct arch_hw_breakpoint { ...@@ -41,12 +41,16 @@ struct arch_hw_breakpoint {
/* Total number of available HW breakpoint registers */ /* Total number of available HW breakpoint registers */
#define HBP_NUM 4 #define HBP_NUM 4
static inline int hw_breakpoint_slots(int type)
{
return HBP_NUM;
}
struct perf_event; struct perf_event;
struct pmu; struct pmu;
extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
extern int arch_validate_hwbkpt_settings(struct perf_event *bp, extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
struct task_struct *tsk);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data); unsigned long val, void *data);
......
...@@ -188,26 +188,17 @@ static int get_hbp_len(u8 hbp_len) ...@@ -188,26 +188,17 @@ static int get_hbp_len(u8 hbp_len)
return len_in_bytes; return len_in_bytes;
} }
/*
* Check for virtual address in user space.
*/
int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
{
unsigned int len;
len = get_hbp_len(hbp_len);
return (va <= TASK_SIZE - len);
}
/* /*
* Check for virtual address in kernel space. * Check for virtual address in kernel space.
*/ */
static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) int arch_check_bp_in_kernelspace(struct perf_event *bp)
{ {
unsigned int len; unsigned int len;
unsigned long va;
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
len = get_hbp_len(hbp_len); va = info->address;
len = get_hbp_len(info->len);
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
} }
...@@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp) ...@@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp)
/* /*
* Validate the arch-specific HW Breakpoint register settings * Validate the arch-specific HW Breakpoint register settings
*/ */
int arch_validate_hwbkpt_settings(struct perf_event *bp, int arch_validate_hwbkpt_settings(struct perf_event *bp)
struct task_struct *tsk)
{ {
struct arch_hw_breakpoint *info = counter_arch_bp(bp); struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned int align; unsigned int align;
...@@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, ...@@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
ret = -EINVAL; ret = -EINVAL;
if (info->type == X86_BREAKPOINT_EXECUTE)
/*
* Ptrace-refactoring code
* For now, we'll allow instruction breakpoint only for user-space
* addresses
*/
if ((!arch_check_va_in_userspace(info->address, info->len)) &&
info->len != X86_BREAKPOINT_EXECUTE)
return ret;
switch (info->len) { switch (info->len) {
case X86_BREAKPOINT_LEN_1: case X86_BREAKPOINT_LEN_1:
align = 0; align = 0;
...@@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, ...@@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
if (info->address & align) if (info->address & align)
return -EINVAL; return -EINVAL;
/* Check that the virtual address is in the proper range */
if (tsk) {
if (!arch_check_va_in_userspace(info->address, info->len))
return -EFAULT;
} else {
if (!arch_check_va_in_kernelspace(info->address, info->len))
return -EFAULT;
}
return 0; return 0;
} }
......
...@@ -688,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, ...@@ -688,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
struct perf_event_attr attr; struct perf_event_attr attr;
if (!t->ptrace_bps[nr]) { if (!t->ptrace_bps[nr]) {
hw_breakpoint_init(&attr); ptrace_breakpoint_init(&attr);
/* /*
* Put stub len and type to register (reserve) an inactive but * Put stub len and type to register (reserve) an inactive but
* correct bp * correct bp
......
...@@ -9,9 +9,22 @@ enum { ...@@ -9,9 +9,22 @@ enum {
}; };
enum { enum {
HW_BREAKPOINT_R = 1, HW_BREAKPOINT_EMPTY = 0,
HW_BREAKPOINT_W = 2, HW_BREAKPOINT_R = 1,
HW_BREAKPOINT_X = 4, HW_BREAKPOINT_W = 2,
HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W,
HW_BREAKPOINT_X = 4,
HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
};
enum bp_type_idx {
TYPE_INST = 0,
#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
TYPE_DATA = 0,
#else
TYPE_DATA = 1,
#endif
TYPE_MAX
}; };
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -34,6 +47,12 @@ static inline void hw_breakpoint_init(struct perf_event_attr *attr) ...@@ -34,6 +47,12 @@ static inline void hw_breakpoint_init(struct perf_event_attr *attr)
attr->sample_period = 1; attr->sample_period = 1;
} }
static inline void ptrace_breakpoint_init(struct perf_event_attr *attr)
{
hw_breakpoint_init(attr);
attr->exclude_kernel = 1;
}
static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
{ {
return bp->attr.bp_addr; return bp->attr.bp_addr;
......
...@@ -40,23 +40,29 @@ ...@@ -40,23 +40,29 @@
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/slab.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/hw_breakpoint.h> #include <linux/hw_breakpoint.h>
/* /*
* Constraints data * Constraints data
*/ */
/* Number of pinned cpu breakpoints in a cpu */ /* Number of pinned cpu breakpoints in a cpu */
static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
/* Number of pinned task breakpoints in a cpu */ /* Number of pinned task breakpoints in a cpu */
static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); static DEFINE_PER_CPU(unsigned int, *nr_task_bp_pinned[TYPE_MAX]);
/* Number of non-pinned cpu/task breakpoints in a cpu */ /* Number of non-pinned cpu/task breakpoints in a cpu */
static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
static int nr_slots[TYPE_MAX];
static int constraints_initialized;
/* Gather the number of total pinned and un-pinned bp in a cpuset */ /* Gather the number of total pinned and un-pinned bp in a cpuset */
struct bp_busy_slots { struct bp_busy_slots {
...@@ -67,16 +73,29 @@ struct bp_busy_slots { ...@@ -67,16 +73,29 @@ struct bp_busy_slots {
/* Serialize accesses to the above constraints */ /* Serialize accesses to the above constraints */
static DEFINE_MUTEX(nr_bp_mutex); static DEFINE_MUTEX(nr_bp_mutex);
__weak int hw_breakpoint_weight(struct perf_event *bp)
{
return 1;
}
static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
{
if (bp->attr.bp_type & HW_BREAKPOINT_RW)
return TYPE_DATA;
return TYPE_INST;
}
/* /*
* Report the maximum number of pinned breakpoints a task * Report the maximum number of pinned breakpoints a task
* have in this cpu * have in this cpu
*/ */
static unsigned int max_task_bp_pinned(int cpu) static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
{ {
int i; int i;
unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
for (i = HBP_NUM -1; i >= 0; i--) { for (i = nr_slots[type] - 1; i >= 0; i--) {
if (tsk_pinned[i] > 0) if (tsk_pinned[i] > 0)
return i + 1; return i + 1;
} }
...@@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu) ...@@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu)
return 0; return 0;
} }
static int task_bp_pinned(struct task_struct *tsk) static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
{ {
struct perf_event_context *ctx = tsk->perf_event_ctxp; struct perf_event_context *ctx = tsk->perf_event_ctxp;
struct list_head *list; struct list_head *list;
...@@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk) ...@@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk)
*/ */
list_for_each_entry(bp, list, event_entry) { list_for_each_entry(bp, list, event_entry) {
if (bp->attr.type == PERF_TYPE_BREAKPOINT) if (bp->attr.type == PERF_TYPE_BREAKPOINT)
count++; if (find_slot_idx(bp) == type)
count += hw_breakpoint_weight(bp);
} }
raw_spin_unlock_irqrestore(&ctx->lock, flags); raw_spin_unlock_irqrestore(&ctx->lock, flags);
...@@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk) ...@@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk)
* a given cpu (cpu > -1) or in all of them (cpu = -1). * a given cpu (cpu > -1) or in all of them (cpu = -1).
*/ */
static void static void
fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
enum bp_type_idx type)
{ {
int cpu = bp->cpu; int cpu = bp->cpu;
struct task_struct *tsk = bp->ctx->task; struct task_struct *tsk = bp->ctx->task;
if (cpu >= 0) { if (cpu >= 0) {
slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
if (!tsk) if (!tsk)
slots->pinned += max_task_bp_pinned(cpu); slots->pinned += max_task_bp_pinned(cpu, type);
else else
slots->pinned += task_bp_pinned(tsk); slots->pinned += task_bp_pinned(tsk, type);
slots->flexible = per_cpu(nr_bp_flexible, cpu); slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
return; return;
} }
...@@ -137,48 +158,66 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) ...@@ -137,48 +158,66 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
unsigned int nr; unsigned int nr;
nr = per_cpu(nr_cpu_bp_pinned, cpu); nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
if (!tsk) if (!tsk)
nr += max_task_bp_pinned(cpu); nr += max_task_bp_pinned(cpu, type);
else else
nr += task_bp_pinned(tsk); nr += task_bp_pinned(tsk, type);
if (nr > slots->pinned) if (nr > slots->pinned)
slots->pinned = nr; slots->pinned = nr;
nr = per_cpu(nr_bp_flexible, cpu); nr = per_cpu(nr_bp_flexible[type], cpu);
if (nr > slots->flexible) if (nr > slots->flexible)
slots->flexible = nr; slots->flexible = nr;
} }
} }
/*
* For now, continue to consider flexible as pinned, until we can
* ensure no flexible event can ever be scheduled before a pinned event
* in a same cpu.
*/
static void
fetch_this_slot(struct bp_busy_slots *slots, int weight)
{
slots->pinned += weight;
}
/* /*
* Add a pinned breakpoint for the given task in our constraint table * Add a pinned breakpoint for the given task in our constraint table
*/ */
static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
enum bp_type_idx type, int weight)
{ {
unsigned int *tsk_pinned; unsigned int *tsk_pinned;
int count = 0; int old_count = 0;
int old_idx = 0;
int idx = 0;
count = task_bp_pinned(tsk); old_count = task_bp_pinned(tsk, type);
old_idx = old_count - 1;
idx = old_idx + weight;
tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
if (enable) { if (enable) {
tsk_pinned[count]++; tsk_pinned[idx]++;
if (count > 0) if (old_count > 0)
tsk_pinned[count-1]--; tsk_pinned[old_idx]--;
} else { } else {
tsk_pinned[count]--; tsk_pinned[idx]--;
if (count > 0) if (old_count > 0)
tsk_pinned[count-1]++; tsk_pinned[old_idx]++;
} }
} }
/* /*
* Add/remove the given breakpoint in our constraint table * Add/remove the given breakpoint in our constraint table
*/ */
static void toggle_bp_slot(struct perf_event *bp, bool enable) static void
toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
int weight)
{ {
int cpu = bp->cpu; int cpu = bp->cpu;
struct task_struct *tsk = bp->ctx->task; struct task_struct *tsk = bp->ctx->task;
...@@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) ...@@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
/* Pinned counter task profiling */ /* Pinned counter task profiling */
if (tsk) { if (tsk) {
if (cpu >= 0) { if (cpu >= 0) {
toggle_bp_task_slot(tsk, cpu, enable); toggle_bp_task_slot(tsk, cpu, enable, type, weight);
return; return;
} }
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
toggle_bp_task_slot(tsk, cpu, enable); toggle_bp_task_slot(tsk, cpu, enable, type, weight);
return; return;
} }
/* Pinned counter cpu profiling */ /* Pinned counter cpu profiling */
if (enable) if (enable)
per_cpu(nr_cpu_bp_pinned, bp->cpu)++; per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
else else
per_cpu(nr_cpu_bp_pinned, bp->cpu)--; per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
} }
/* /*
...@@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) ...@@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
static int __reserve_bp_slot(struct perf_event *bp) static int __reserve_bp_slot(struct perf_event *bp)
{ {
struct bp_busy_slots slots = {0}; struct bp_busy_slots slots = {0};
enum bp_type_idx type;
int weight;
fetch_bp_busy_slots(&slots, bp); /* We couldn't initialize breakpoint constraints on boot */
if (!constraints_initialized)
return -ENOMEM;
/* Basic checks */
if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
bp->attr.bp_type == HW_BREAKPOINT_INVALID)
return -EINVAL;
type = find_slot_idx(bp);
weight = hw_breakpoint_weight(bp);
fetch_bp_busy_slots(&slots, bp, type);
fetch_this_slot(&slots, weight);
/* Flexible counters need to keep at least one slot */ /* Flexible counters need to keep at least one slot */
if (slots.pinned + (!!slots.flexible) == HBP_NUM) if (slots.pinned + (!!slots.flexible) > nr_slots[type])
return -ENOSPC; return -ENOSPC;
toggle_bp_slot(bp, true); toggle_bp_slot(bp, true, type, weight);
return 0; return 0;
} }
...@@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp) ...@@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp)
static void __release_bp_slot(struct perf_event *bp) static void __release_bp_slot(struct perf_event *bp)
{ {
toggle_bp_slot(bp, false); enum bp_type_idx type;
int weight;
type = find_slot_idx(bp);
weight = hw_breakpoint_weight(bp);
toggle_bp_slot(bp, false, type, weight);
} }
void release_bp_slot(struct perf_event *bp) void release_bp_slot(struct perf_event *bp)
...@@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp) ...@@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp)
return 0; return 0;
} }
static int validate_hw_breakpoint(struct perf_event *bp)
{
int ret;
ret = arch_validate_hwbkpt_settings(bp);
if (ret)
return ret;
if (arch_check_bp_in_kernelspace(bp)) {
if (bp->attr.exclude_kernel)
return -EINVAL;
/*
* Don't let unprivileged users set a breakpoint in the trap
* path to avoid trap recursion attacks.
*/
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
}
return 0;
}
int register_perf_hw_breakpoint(struct perf_event *bp) int register_perf_hw_breakpoint(struct perf_event *bp)
{ {
int ret; int ret;
...@@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) ...@@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
if (ret) if (ret)
return ret; return ret;
/* ret = validate_hw_breakpoint(bp);
* Ptrace breakpoints can be temporary perf events only
* meant to reserve a slot. In this case, it is created disabled and
* we don't want to check the params right now (as we put a null addr)
* But perf tools create events as disabled and we want to check
* the params for them.
* This is a quick hack that will be removed soon, once we remove
* the tmp breakpoints from ptrace
*/
if (!bp->attr.disabled || !bp->overflow_handler)
ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
/* if arch_validate_hwbkpt_settings() fails then release bp slot */ /* if arch_validate_hwbkpt_settings() fails then release bp slot */
if (ret) if (ret)
...@@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att ...@@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
if (attr->disabled) if (attr->disabled)
goto end; goto end;
err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); err = validate_hw_breakpoint(bp);
if (!err) if (!err)
perf_event_enable(bp); perf_event_enable(bp);
...@@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { ...@@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
static int __init init_hw_breakpoint(void) static int __init init_hw_breakpoint(void)
{ {
unsigned int **task_bp_pinned;
int cpu, err_cpu;
int i;
for (i = 0; i < TYPE_MAX; i++)
nr_slots[i] = hw_breakpoint_slots(i);
for_each_possible_cpu(cpu) {
for (i = 0; i < TYPE_MAX; i++) {
task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
*task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
GFP_KERNEL);
if (!*task_bp_pinned)
goto err_alloc;
}
}
constraints_initialized = 1;
return register_die_notifier(&hw_breakpoint_exceptions_nb); return register_die_notifier(&hw_breakpoint_exceptions_nb);
err_alloc:
for_each_possible_cpu(err_cpu) {
if (err_cpu == cpu)
break;
for (i = 0; i < TYPE_MAX; i++)
kfree(per_cpu(nr_task_bp_pinned[i], cpu));
}
return -ENOMEM;
} }
core_initcall(init_hw_breakpoint); core_initcall(init_hw_breakpoint);
......
...@@ -34,12 +34,6 @@ ...@@ -34,12 +34,6 @@
#include <asm/atomic.h> #include <asm/atomic.h>
/*
* For now, let us restrict the no. of symbols traced simultaneously to number
* of available hardware breakpoint registers.
*/
#define KSYM_TRACER_MAX HBP_NUM
#define KSYM_TRACER_OP_LEN 3 /* rw- */ #define KSYM_TRACER_OP_LEN 3 /* rw- */
struct trace_ksym { struct trace_ksym {
...@@ -53,7 +47,6 @@ struct trace_ksym { ...@@ -53,7 +47,6 @@ struct trace_ksym {
static struct trace_array *ksym_trace_array; static struct trace_array *ksym_trace_array;
static unsigned int ksym_filter_entry_count;
static unsigned int ksym_tracing_enabled; static unsigned int ksym_tracing_enabled;
static HLIST_HEAD(ksym_filter_head); static HLIST_HEAD(ksym_filter_head);
...@@ -181,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) ...@@ -181,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
struct trace_ksym *entry; struct trace_ksym *entry;
int ret = -ENOMEM; int ret = -ENOMEM;
if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
" new requests for tracing can be accepted now.\n",
KSYM_TRACER_MAX);
return -ENOSPC;
}
entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
if (!entry) if (!entry)
return -ENOMEM; return -ENOMEM;
...@@ -203,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) ...@@ -203,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
if (IS_ERR(entry->ksym_hbp)) { if (IS_ERR(entry->ksym_hbp)) {
ret = PTR_ERR(entry->ksym_hbp); ret = PTR_ERR(entry->ksym_hbp);
printk(KERN_INFO "ksym_tracer request failed. Try again" if (ret == -ENOSPC) {
" later!!\n"); printk(KERN_ERR "ksym_tracer: Maximum limit reached."
" No new requests for tracing can be accepted now.\n");
} else {
printk(KERN_INFO "ksym_tracer request failed. Try again"
" later!!\n");
}
goto err; goto err;
} }
hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
ksym_filter_entry_count++;
return 0; return 0;
...@@ -265,7 +255,6 @@ static void __ksym_trace_reset(void) ...@@ -265,7 +255,6 @@ static void __ksym_trace_reset(void)
hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
ksym_hlist) { ksym_hlist) {
unregister_wide_hw_breakpoint(entry->ksym_hbp); unregister_wide_hw_breakpoint(entry->ksym_hbp);
ksym_filter_entry_count--;
hlist_del_rcu(&(entry->ksym_hlist)); hlist_del_rcu(&(entry->ksym_hlist));
synchronize_rcu(); synchronize_rcu();
kfree(entry); kfree(entry);
...@@ -338,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file, ...@@ -338,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file,
goto out_unlock; goto out_unlock;
} }
/* Error or "symbol:---" case: drop it */ /* Error or "symbol:---" case: drop it */
ksym_filter_entry_count--;
hlist_del_rcu(&(entry->ksym_hlist)); hlist_del_rcu(&(entry->ksym_hlist));
synchronize_rcu(); synchronize_rcu();
kfree(entry); kfree(entry);
......
...@@ -49,12 +49,10 @@ available as calls back into the perf executable (see below). ...@@ -49,12 +49,10 @@ available as calls back into the perf executable (see below).
As an example, the following perf record command can be used to record As an example, the following perf record command can be used to record
all sched_wakeup events in the system: all sched_wakeup events in the system:
# perf record -c 1 -f -a -M -R -e sched:sched_wakeup # perf record -a -e sched:sched_wakeup
Traces meant to be processed using a script should be recorded with Traces meant to be processed using a script should be recorded with
the above options: -c 1 says to sample every event, -a to enable the above option: -a to enable system-wide collection.
system-wide collection, -M to multiplex the output, and -R to collect
raw samples.
The format file for the sched_wakep event defines the following fields The format file for the sched_wakep event defines the following fields
(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
......
...@@ -93,7 +93,7 @@ don't care how it exited, so we'll use 'perf record' to record only ...@@ -93,7 +93,7 @@ don't care how it exited, so we'll use 'perf record' to record only
the sys_enter events: the sys_enter events:
---- ----
# perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter # perf record -a -e raw_syscalls:sys_enter
^C[ perf record: Woken up 1 times to write data ] ^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ] [ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
...@@ -359,7 +359,7 @@ your script: ...@@ -359,7 +359,7 @@ your script:
# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter perf record -a -e raw_syscalls:sys_enter
---- ----
The 'report' script is also a shell script with the same base name as The 'report' script is also a shell script with the same base name as
...@@ -449,12 +449,10 @@ available as calls back into the perf executable (see below). ...@@ -449,12 +449,10 @@ available as calls back into the perf executable (see below).
As an example, the following perf record command can be used to record As an example, the following perf record command can be used to record
all sched_wakeup events in the system: all sched_wakeup events in the system:
# perf record -c 1 -f -a -M -R -e sched:sched_wakeup # perf record -a -e sched:sched_wakeup
Traces meant to be processed using a script should be recorded with Traces meant to be processed using a script should be recorded with
the above options: -c 1 says to sample every event, -a to enable the above option: -a to enable system-wide collection.
system-wide collection, -M to multiplex the output, and -R to collect
raw samples.
The format file for the sched_wakep event defines the following fields The format file for the sched_wakep event defines the following fields
(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
......
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree perf record -a -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@ perf record -a -e raw_syscalls:sys_exit $@
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@ perf record -a -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup $@ perf record -a -e sched:sched_switch -e sched:sched_wakeup $@
......
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@ perf record -a -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@ perf record -a -e raw_syscalls:sys_exit $@
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@ perf record -a -e raw_syscalls:sys_enter $@
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@ perf record -a -e raw_syscalls:sys_enter $@
#!/bin/bash #!/bin/bash
perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@ perf record -a -e raw_syscalls:sys_enter $@
...@@ -691,11 +691,6 @@ static int __read_expected(enum event_type expect, const char *str, ...@@ -691,11 +691,6 @@ static int __read_expected(enum event_type expect, const char *str,
return ret; return ret;
} }
static int read_expected_warn(enum event_type expect, const char *str, bool warn)
{
return __read_expected(expect, str, 1, warn);
}
static int read_expected(enum event_type expect, const char *str) static int read_expected(enum event_type expect, const char *str)
{ {
return __read_expected(expect, str, 1, true); return __read_expected(expect, str, 1, true);
...@@ -3104,90 +3099,6 @@ static void print_args(struct print_arg *args) ...@@ -3104,90 +3099,6 @@ static void print_args(struct print_arg *args)
} }
} }
static void parse_header_field(const char *field,
int *offset, int *size, bool warn)
{
char *token;
int type;
if (read_expected(EVENT_ITEM, "field") < 0)
return;
if (read_expected(EVENT_OP, ":") < 0)
return;
/* type */
if (read_expect_type(EVENT_ITEM, &token) < 0)
goto fail;
free_token(token);
if (read_expected_warn(EVENT_ITEM, field, warn) < 0)
return;
if (read_expected(EVENT_OP, ";") < 0)
return;
if (read_expected(EVENT_ITEM, "offset") < 0)
return;
if (read_expected(EVENT_OP, ":") < 0)
return;
if (read_expect_type(EVENT_ITEM, &token) < 0)
goto fail;
*offset = atoi(token);
free_token(token);
if (read_expected(EVENT_OP, ";") < 0)
return;
if (read_expected(EVENT_ITEM, "size") < 0)
return;
if (read_expected(EVENT_OP, ":") < 0)
return;
if (read_expect_type(EVENT_ITEM, &token) < 0)
goto fail;
*size = atoi(token);
free_token(token);
if (read_expected(EVENT_OP, ";") < 0)
return;
type = read_token(&token);
if (type != EVENT_NEWLINE) {
/* newer versions of the kernel have a "signed" type */
if (type != EVENT_ITEM)
goto fail;
if (strcmp(token, "signed") != 0)
goto fail;
free_token(token);
if (read_expected(EVENT_OP, ":") < 0)
return;
if (read_expect_type(EVENT_ITEM, &token))
goto fail;
free_token(token);
if (read_expected(EVENT_OP, ";") < 0)
return;
if (read_expect_type(EVENT_NEWLINE, &token))
goto fail;
}
fail:
free_token(token);
}
int parse_header_page(char *buf, unsigned long size)
{
init_input_buf(buf, size);
parse_header_field("timestamp", &header_page_ts_offset,
&header_page_ts_size, true);
parse_header_field("commit", &header_page_size_offset,
&header_page_size_size, true);
parse_header_field("overwrite", &header_page_overwrite_offset,
&header_page_overwrite_size, false);
parse_header_field("data", &header_page_data_offset,
&header_page_data_size, true);
return 0;
}
int parse_ftrace_file(char *buf, unsigned long size) int parse_ftrace_file(char *buf, unsigned long size)
{ {
struct format_field *field; struct format_field *field;
......
...@@ -53,6 +53,12 @@ static unsigned long page_size; ...@@ -53,6 +53,12 @@ static unsigned long page_size;
static ssize_t calc_data_size; static ssize_t calc_data_size;
static bool repipe; static bool repipe;
/* If it fails, the next read will report it */
static void skip(int size)
{
lseek(input_fd, size, SEEK_CUR);
}
static int do_read(int fd, void *buf, int size) static int do_read(int fd, void *buf, int size)
{ {
int rsize = size; int rsize = size;
...@@ -184,7 +190,6 @@ static void read_ftrace_printk(void) ...@@ -184,7 +190,6 @@ static void read_ftrace_printk(void)
static void read_header_files(void) static void read_header_files(void)
{ {
unsigned long long size; unsigned long long size;
char *header_page;
char *header_event; char *header_event;
char buf[BUFSIZ]; char buf[BUFSIZ];
...@@ -194,10 +199,7 @@ static void read_header_files(void) ...@@ -194,10 +199,7 @@ static void read_header_files(void)
die("did not read header page"); die("did not read header page");
size = read8(); size = read8();
header_page = malloc_or_die(size); skip(size);
read_or_die(header_page, size);
parse_header_page(header_page, size);
free(header_page);
/* /*
* The size field in the page is of type long, * The size field in the page is of type long,
......
...@@ -244,7 +244,6 @@ extern int header_page_data_size; ...@@ -244,7 +244,6 @@ extern int header_page_data_size;
extern bool latency_format; extern bool latency_format;
int parse_header_page(char *buf, unsigned long size);
int trace_parse_common_type(void *data); int trace_parse_common_type(void *data);
int trace_parse_common_pid(void *data); int trace_parse_common_pid(void *data);
int parse_common_pc(void *data); int parse_common_pc(void *data);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment