Commit 5d70f79b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-core-for-linus' of...

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (163 commits)
  tracing: Fix compile issue for trace_sched_wakeup.c
  [S390] hardirq: remove pointless header file includes
  [IA64] Move local_softirq_pending() definition
  perf, powerpc: Fix power_pmu_event_init to not use event->ctx
  ftrace: Remove recursion between recordmcount and scripts/mod/empty
  jump_label: Add COND_STMT(), reducer wrappery
  perf: Optimize sw events
  perf: Use jump_labels to optimize the scheduler hooks
  jump_label: Add atomic_t interface
  jump_label: Use more consistent naming
  perf, hw_breakpoint: Fix crash in hw_breakpoint creation
  perf: Find task before event alloc
  perf: Fix task refcount bugs
  perf: Fix group moving
  irq_work: Add generic hardirq context callbacks
  perf_events: Fix transaction recovery in group_sched_in()
  perf_events: Fix bogus AMD64 generic TLB events
  perf_events: Fix bogus context time tracking
  tracing: Remove parent recording in latency tracer graph options
  tracing: Use one prologue for the preempt irqs off tracer function tracers
  ...
parents 888a6f77 750ed158
...@@ -542,9 +542,11 @@ Kprobes does not use mutexes or allocate memory except during ...@@ -542,9 +542,11 @@ Kprobes does not use mutexes or allocate memory except during
registration and unregistration. registration and unregistration.
Probe handlers are run with preemption disabled. Depending on the Probe handlers are run with preemption disabled. Depending on the
architecture, handlers may also run with interrupts disabled. In any architecture and optimization state, handlers may also run with
case, your handler should not yield the CPU (e.g., by attempting to interrupts disabled (e.g., kretprobe handlers and optimized kprobe
acquire a semaphore). handlers run without interrupt disabled on x86/x86-64). In any case,
your handler should not yield the CPU (e.g., by attempting to acquire
a semaphore).
Since a return probe is implemented by replacing the return Since a return probe is implemented by replacing the return
address with the trampoline's address, stack backtraces and calls address with the trampoline's address, stack backtraces and calls
......
...@@ -568,6 +568,12 @@ endif ...@@ -568,6 +568,12 @@ endif
ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_FUNCTION_TRACER
KBUILD_CFLAGS += -pg KBUILD_CFLAGS += -pg
ifdef CONFIG_DYNAMIC_FTRACE
ifdef CONFIG_HAVE_C_RECORDMCOUNT
BUILD_C_RECORDMCOUNT := y
export BUILD_C_RECORDMCOUNT
endif
endif
endif endif
# We trigger additional mismatches with less inlining # We trigger additional mismatches with less inlining
...@@ -591,6 +597,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) ...@@ -591,6 +597,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
# conserve stack if available # conserve stack if available
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
# check for 'asm goto'
ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
endif
# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
# But warn user when we do so # But warn user when we do so
warn-assign = \ warn-assign = \
......
...@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI ...@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
subsystem. Also has support for calculating CPU cycle events subsystem. Also has support for calculating CPU cycle events
to determine how many clock cycles in a given period. to determine how many clock cycles in a given period.
config HAVE_ARCH_JUMP_LABEL
bool
source "kernel/gcov/Kconfig" source "kernel/gcov/Kconfig"
...@@ -9,6 +9,7 @@ config ALPHA ...@@ -9,6 +9,7 @@ config ALPHA
select HAVE_IDE select HAVE_IDE
select HAVE_OPROFILE select HAVE_OPROFILE
select HAVE_SYSCALL_WRAPPERS select HAVE_SYSCALL_WRAPPERS
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select HAVE_DMA_ATTRS select HAVE_DMA_ATTRS
help help
......
#ifndef __ASM_ALPHA_PERF_EVENT_H #ifndef __ASM_ALPHA_PERF_EVENT_H
#define __ASM_ALPHA_PERF_EVENT_H #define __ASM_ALPHA_PERF_EVENT_H
/* Alpha only supports software events through this interface. */
extern void set_perf_event_pending(void);
#define PERF_EVENT_INDEX_OFFSET 0
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void); extern void init_hw_perf_events(void);
#else #else
......
...@@ -402,13 +402,12 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) ...@@ -402,13 +402,12 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
struct hw_perf_event *hwc = &pe->hw; struct hw_perf_event *hwc = &pe->hw;
int idx = hwc->idx; int idx = hwc->idx;
if (cpuc->current_idx[j] != PMC_NO_INDEX) { if (cpuc->current_idx[j] == PMC_NO_INDEX) {
cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
continue;
}
alpha_perf_event_set_period(pe, hwc, idx); alpha_perf_event_set_period(pe, hwc, idx);
cpuc->current_idx[j] = idx; cpuc->current_idx[j] = idx;
}
if (!(hwc->state & PERF_HES_STOPPED))
cpuc->idx_mask |= (1<<cpuc->current_idx[j]); cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
} }
cpuc->config = cpuc->event[0]->hw.config_base; cpuc->config = cpuc->event[0]->hw.config_base;
...@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) ...@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
* - this function is called from outside this module via the pmu struct * - this function is called from outside this module via the pmu struct
* returned from perf event initialisation. * returned from perf event initialisation.
*/ */
static int alpha_pmu_enable(struct perf_event *event) static int alpha_pmu_add(struct perf_event *event, int flags)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int n0; int n0;
int ret; int ret;
unsigned long flags; unsigned long irq_flags;
/* /*
* The Sparc code has the IRQ disable first followed by the perf * The Sparc code has the IRQ disable first followed by the perf
...@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event) ...@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event)
* nevertheless we disable the PMCs first to enable a potential * nevertheless we disable the PMCs first to enable a potential
* final PMI to occur before we disable interrupts. * final PMI to occur before we disable interrupts.
*/ */
perf_disable(); perf_pmu_disable(event->pmu);
local_irq_save(flags); local_irq_save(irq_flags);
/* Default to error to be returned */ /* Default to error to be returned */
ret = -EAGAIN; ret = -EAGAIN;
...@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event) ...@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event)
} }
} }
local_irq_restore(flags); hwc->state = PERF_HES_UPTODATE;
perf_enable(); if (!(flags & PERF_EF_START))
hwc->state |= PERF_HES_STOPPED;
local_irq_restore(irq_flags);
perf_pmu_enable(event->pmu);
return ret; return ret;
} }
...@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event) ...@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event)
* - this function is called from outside this module via the pmu struct * - this function is called from outside this module via the pmu struct
* returned from perf event initialisation. * returned from perf event initialisation.
*/ */
static void alpha_pmu_disable(struct perf_event *event) static void alpha_pmu_del(struct perf_event *event, int flags)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
unsigned long flags; unsigned long irq_flags;
int j; int j;
perf_disable(); perf_pmu_disable(event->pmu);
local_irq_save(flags); local_irq_save(irq_flags);
for (j = 0; j < cpuc->n_events; j++) { for (j = 0; j < cpuc->n_events; j++) {
if (event == cpuc->event[j]) { if (event == cpuc->event[j]) {
...@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event) ...@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event)
} }
} }
local_irq_restore(flags); local_irq_restore(irq_flags);
perf_enable(); perf_pmu_enable(event->pmu);
} }
...@@ -514,12 +518,43 @@ static void alpha_pmu_read(struct perf_event *event) ...@@ -514,12 +518,43 @@ static void alpha_pmu_read(struct perf_event *event)
} }
static void alpha_pmu_unthrottle(struct perf_event *event) static void alpha_pmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (!(hwc->state & PERF_HES_STOPPED)) {
cpuc->idx_mask &= ~(1UL<<hwc->idx);
hwc->state |= PERF_HES_STOPPED;
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
alpha_perf_event_update(event, hwc, hwc->idx, 0);
hwc->state |= PERF_HES_UPTODATE;
}
if (cpuc->enabled)
wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx));
}
static void alpha_pmu_start(struct perf_event *event, int flags)
{ {
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
alpha_perf_event_set_period(event, hwc, hwc->idx);
}
hwc->state = 0;
cpuc->idx_mask |= 1UL<<hwc->idx; cpuc->idx_mask |= 1UL<<hwc->idx;
if (cpuc->enabled)
wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
} }
...@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event) ...@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event)
return 0; return 0;
} }
static const struct pmu pmu = {
.enable = alpha_pmu_enable,
.disable = alpha_pmu_disable,
.read = alpha_pmu_read,
.unthrottle = alpha_pmu_unthrottle,
};
/* /*
* Main entry point to initialise a HW performance event. * Main entry point to initialise a HW performance event.
*/ */
const struct pmu *hw_perf_event_init(struct perf_event *event) static int alpha_pmu_event_init(struct perf_event *event)
{ {
int err; int err;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
break;
default:
return -ENOENT;
}
if (!alpha_pmu) if (!alpha_pmu)
return ERR_PTR(-ENODEV); return -ENODEV;
/* Do the real initialisation work. */ /* Do the real initialisation work. */
err = __hw_perf_event_init(event); err = __hw_perf_event_init(event);
if (err) return err;
return ERR_PTR(err);
return &pmu;
} }
/* /*
* Main entry point - enable HW performance counters. * Main entry point - enable HW performance counters.
*/ */
void hw_perf_enable(void) static void alpha_pmu_enable(struct pmu *pmu)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
...@@ -700,7 +732,7 @@ void hw_perf_enable(void) ...@@ -700,7 +732,7 @@ void hw_perf_enable(void)
* Main entry point - disable HW performance counters. * Main entry point - disable HW performance counters.
*/ */
void hw_perf_disable(void) static void alpha_pmu_disable(struct pmu *pmu)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
...@@ -713,6 +745,17 @@ void hw_perf_disable(void) ...@@ -713,6 +745,17 @@ void hw_perf_disable(void)
wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
} }
static struct pmu pmu = {
.pmu_enable = alpha_pmu_enable,
.pmu_disable = alpha_pmu_disable,
.event_init = alpha_pmu_event_init,
.add = alpha_pmu_add,
.del = alpha_pmu_del,
.start = alpha_pmu_start,
.stop = alpha_pmu_stop,
.read = alpha_pmu_read,
};
/* /*
* Main entry point - don't know when this is called but it * Main entry point - don't know when this is called but it
...@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, ...@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
/* la_ptr is the counter that overflowed. */ /* la_ptr is the counter that overflowed. */
if (unlikely(la_ptr >= perf_max_events)) { if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
/* This should never occur! */ /* This should never occur! */
irq_err_count++; irq_err_count++;
pr_warning("PMI: silly index %ld\n", la_ptr); pr_warning("PMI: silly index %ld\n", la_ptr);
...@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, ...@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
/* Interrupts coming too quickly; "throttle" the /* Interrupts coming too quickly; "throttle" the
* counter, i.e., disable it for a little while. * counter, i.e., disable it for a little while.
*/ */
cpuc->idx_mask &= ~(1UL<<idx); alpha_pmu_stop(event, 0);
} }
} }
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
...@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void) ...@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void)
/* And set up PMU specification */ /* And set up PMU specification */
alpha_pmu = &ev67_pmu; alpha_pmu = &ev67_pmu;
perf_max_events = alpha_pmu->num_pmcs;
perf_pmu_register(&pmu);
} }
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/bcd.h> #include <linux/bcd.h>
#include <linux/profile.h> #include <linux/profile.h>
#include <linux/perf_event.h> #include <linux/irq_work.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -83,25 +83,25 @@ static struct { ...@@ -83,25 +83,25 @@ static struct {
unsigned long est_cycle_freq; unsigned long est_cycle_freq;
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_IRQ_WORK
DEFINE_PER_CPU(u8, perf_event_pending); DEFINE_PER_CPU(u8, irq_work_pending);
#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 #define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending) #define test_irq_work_pending() __get_cpu_var(irq_work_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 #define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
void set_perf_event_pending(void) void set_irq_work_pending(void)
{ {
set_perf_event_pending_flag(); set_irq_work_pending_flag();
} }
#else /* CONFIG_PERF_EVENTS */ #else /* CONFIG_IRQ_WORK */
#define test_perf_event_pending() 0 #define test_irq_work_pending() 0
#define clear_perf_event_pending() #define clear_irq_work_pending()
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_IRQ_WORK */
static inline __u32 rpcc(void) static inline __u32 rpcc(void)
...@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev) ...@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev)
write_sequnlock(&xtime_lock); write_sequnlock(&xtime_lock);
if (test_perf_event_pending()) { if (test_irq_work_pending()) {
clear_perf_event_pending(); clear_irq_work_pending();
perf_event_do_pending(); irq_work_run();
} }
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
......
...@@ -23,6 +23,7 @@ config ARM ...@@ -23,6 +23,7 @@ config ARM
select HAVE_KERNEL_GZIP select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZMA
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC select PERF_USE_VMALLOC
select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_REGS_AND_STACK_ACCESS_API
......
...@@ -12,18 +12,6 @@ ...@@ -12,18 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__ #ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__
/*
* NOP: on *most* (read: all supported) ARM platforms, the performance
* counter interrupts are regular interrupts and not an NMI. This
* means that when we receive the interrupt we can call
* perf_event_do_pending() that handles all of the work with
* interrupts disabled.
*/
static inline void
set_perf_event_pending(void)
{
}
/* ARM performance counters start from 1 (in the cp15 accesses) so use the /* ARM performance counters start from 1 (in the cp15 accesses) so use the
* same indexes here for consistency. */ * same indexes here for consistency. */
#define PERF_EVENT_INDEX_OFFSET 1 #define PERF_EVENT_INDEX_OFFSET 1
......
This diff is collapsed.
...@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ ...@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprofilefs.o oprofile_stats.o \ oprofilefs.o oprofile_stats.o \
timer_int.o ) timer_int.o )
ifeq ($(CONFIG_HW_PERF_EVENTS),y)
DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
endif
oprofile-y := $(DRIVER_OBJS) common.o oprofile-y := $(DRIVER_OBJS) common.o
...@@ -25,139 +25,10 @@ ...@@ -25,139 +25,10 @@
#include <asm/ptrace.h> #include <asm/ptrace.h>
#ifdef CONFIG_HW_PERF_EVENTS #ifdef CONFIG_HW_PERF_EVENTS
/* char *op_name_from_perf_id(void)
* Per performance monitor configuration as set via oprofilefs.
*/
struct op_counter_config {
unsigned long count;
unsigned long enabled;
unsigned long event;
unsigned long unit_mask;
unsigned long kernel;
unsigned long user;
struct perf_event_attr attr;
};
static int op_arm_enabled;
static DEFINE_MUTEX(op_arm_mutex);
static struct op_counter_config *counter_config;
static struct perf_event **perf_events[nr_cpumask_bits];
static int perf_num_counters;
/*
* Overflow callback for oprofile.
*/
static void op_overflow_handler(struct perf_event *event, int unused,
struct perf_sample_data *data, struct pt_regs *regs)
{
int id;
u32 cpu = smp_processor_id();
for (id = 0; id < perf_num_counters; ++id)
if (perf_events[cpu][id] == event)
break;
if (id != perf_num_counters)
oprofile_add_sample(regs, id);
else
pr_warning("oprofile: ignoring spurious overflow "
"on cpu %u\n", cpu);
}
/*
* Called by op_arm_setup to create perf attributes to mirror the oprofile
* settings in counter_config. Attributes are created as `pinned' events and
* so are permanently scheduled on the PMU.
*/
static void op_perf_setup(void)
{ {
int i; enum arm_perf_pmu_ids id = armpmu_get_pmu_id();
u32 size = sizeof(struct perf_event_attr);
struct perf_event_attr *attr;
for (i = 0; i < perf_num_counters; ++i) {
attr = &counter_config[i].attr;
memset(attr, 0, size);
attr->type = PERF_TYPE_RAW;
attr->size = size;
attr->config = counter_config[i].event;
attr->sample_period = counter_config[i].count;
attr->pinned = 1;
}
}
static int op_create_counter(int cpu, int event)
{
int ret = 0;
struct perf_event *pevent;
if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
return ret;
pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
cpu, -1,
op_overflow_handler);
if (IS_ERR(pevent)) {
ret = PTR_ERR(pevent);
} else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
perf_event_release_kernel(pevent);
pr_warning("oprofile: failed to enable event %d "
"on CPU %d\n", event, cpu);
ret = -EBUSY;
} else {
perf_events[cpu][event] = pevent;
}
return ret;
}
static void op_destroy_counter(int cpu, int event)
{
struct perf_event *pevent = perf_events[cpu][event];
if (pevent) {
perf_event_release_kernel(pevent);
perf_events[cpu][event] = NULL;
}
}
/*
* Called by op_arm_start to create active perf events based on the
* perviously configured attributes.
*/
static int op_perf_start(void)
{
int cpu, event, ret = 0;
for_each_online_cpu(cpu) {
for (event = 0; event < perf_num_counters; ++event) {
ret = op_create_counter(cpu, event);
if (ret)
goto out;
}
}
out:
return ret;
}
/*
* Called by op_arm_stop at the end of a profiling run.
*/
static void op_perf_stop(void)
{
int cpu, event;
for_each_online_cpu(cpu)
for (event = 0; event < perf_num_counters; ++event)
op_destroy_counter(cpu, event);
}
static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
{
switch (id) { switch (id) {
case ARM_PERF_PMU_ID_XSCALE1: case ARM_PERF_PMU_ID_XSCALE1:
return "arm/xscale1"; return "arm/xscale1";
...@@ -176,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id) ...@@ -176,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
} }
} }
static int op_arm_create_files(struct super_block *sb, struct dentry *root)
{
unsigned int i;
for (i = 0; i < perf_num_counters; i++) {
struct dentry *dir;
char buf[4];
snprintf(buf, sizeof buf, "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
}
return 0;
}
static int op_arm_setup(void)
{
spin_lock(&oprofilefs_lock);
op_perf_setup();
spin_unlock(&oprofilefs_lock);
return 0;
}
static int op_arm_start(void)
{
int ret = -EBUSY;
mutex_lock(&op_arm_mutex);
if (!op_arm_enabled) {
ret = 0;
op_perf_start();
op_arm_enabled = 1;
}
mutex_unlock(&op_arm_mutex);
return ret;
}
static void op_arm_stop(void)
{
mutex_lock(&op_arm_mutex);
if (op_arm_enabled)
op_perf_stop();
op_arm_enabled = 0;
mutex_unlock(&op_arm_mutex);
}
#ifdef CONFIG_PM
static int op_arm_suspend(struct platform_device *dev, pm_message_t state)
{
mutex_lock(&op_arm_mutex);
if (op_arm_enabled)
op_perf_stop();
mutex_unlock(&op_arm_mutex);
return 0;
}
static int op_arm_resume(struct platform_device *dev)
{
mutex_lock(&op_arm_mutex);
if (op_arm_enabled && op_perf_start())
op_arm_enabled = 0;
mutex_unlock(&op_arm_mutex);
return 0;
}
static struct platform_driver oprofile_driver = {
.driver = {
.name = "arm-oprofile",
},
.resume = op_arm_resume,
.suspend = op_arm_suspend,
};
static struct platform_device *oprofile_pdev;
static int __init init_driverfs(void)
{
int ret;
ret = platform_driver_register(&oprofile_driver);
if (ret)
goto out;
oprofile_pdev = platform_device_register_simple(
oprofile_driver.driver.name, 0, NULL, 0);
if (IS_ERR(oprofile_pdev)) {
ret = PTR_ERR(oprofile_pdev);
platform_driver_unregister(&oprofile_driver);
}
out:
return ret;
}
static void exit_driverfs(void)
{
platform_device_unregister(oprofile_pdev);
platform_driver_unregister(&oprofile_driver);
}
#else
static int __init init_driverfs(void) { return 0; }
#define exit_driverfs() do { } while (0)
#endif /* CONFIG_PM */
static int report_trace(struct stackframe *frame, void *d) static int report_trace(struct stackframe *frame, void *d)
{ {
unsigned int *depth = d; unsigned int *depth = d;
...@@ -350,74 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth) ...@@ -350,74 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
int __init oprofile_arch_init(struct oprofile_operations *ops) int __init oprofile_arch_init(struct oprofile_operations *ops)
{ {
int cpu, ret = 0;
perf_num_counters = armpmu_get_max_events();
counter_config = kcalloc(perf_num_counters,
sizeof(struct op_counter_config), GFP_KERNEL);
if (!counter_config) {
pr_info("oprofile: failed to allocate %d "
"counters\n", perf_num_counters);
return -ENOMEM;
}
ret = init_driverfs();
if (ret) {
kfree(counter_config);
counter_config = NULL;
return ret;
}
for_each_possible_cpu(cpu) {
perf_events[cpu] = kcalloc(perf_num_counters,
sizeof(struct perf_event *), GFP_KERNEL);
if (!perf_events[cpu]) {
pr_info("oprofile: failed to allocate %d perf events "
"for cpu %d\n", perf_num_counters, cpu);
while (--cpu >= 0)
kfree(perf_events[cpu]);
return -ENOMEM;
}
}
ops->backtrace = arm_backtrace; ops->backtrace = arm_backtrace;
ops->create_files = op_arm_create_files;
ops->setup = op_arm_setup;
ops->start = op_arm_start;
ops->stop = op_arm_stop;
ops->shutdown = op_arm_stop;
ops->cpu_type = op_name_from_perf_id(armpmu_get_pmu_id());
if (!ops->cpu_type)
ret = -ENODEV;
else
pr_info("oprofile: using %s\n", ops->cpu_type);
return ret; return oprofile_perf_init(ops);
} }
void oprofile_arch_exit(void) void __exit oprofile_arch_exit(void)
{ {
int cpu, id; oprofile_perf_exit();
struct perf_event *event;
if (*perf_events) {
for_each_possible_cpu(cpu) {
for (id = 0; id < perf_num_counters; ++id) {
event = perf_events[cpu][id];
if (event != NULL)
perf_event_release_kernel(event);
}
kfree(perf_events[cpu]);
}
}
if (counter_config) {
kfree(counter_config);
exit_driverfs();
}
} }
#else #else
int __init oprofile_arch_init(struct oprofile_operations *ops) int __init oprofile_arch_init(struct oprofile_operations *ops)
...@@ -425,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ...@@ -425,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
pr_info("oprofile: hardware counters not available\n"); pr_info("oprofile: hardware counters not available\n");
return -ENODEV; return -ENODEV;
} }
void oprofile_arch_exit(void) {} void __exit oprofile_arch_exit(void) {}
#endif /* CONFIG_HW_PERF_EVENTS */ #endif /* CONFIG_HW_PERF_EVENTS */
...@@ -7,6 +7,7 @@ config FRV ...@@ -7,6 +7,7 @@ config FRV
default y default y
select HAVE_IDE select HAVE_IDE
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
config ZONE_DMA config ZONE_DMA
......
...@@ -5,4 +5,4 @@ ...@@ -5,4 +5,4 @@
lib-y := \ lib-y := \
__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
/* Performance event handling
*
* Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#include <linux/perf_event.h>
/*
* mark the performance event as pending
*/
void set_perf_event_pending(void)
{
}
...@@ -6,12 +6,6 @@ ...@@ -6,12 +6,6 @@
* David Mosberger-Tang <davidm@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com>
*/ */
#include <linux/threads.h>
#include <linux/irq.h>
#include <asm/processor.h>
/* /*
* No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure.
*/ */
...@@ -20,6 +14,11 @@ ...@@ -20,6 +14,11 @@
#define local_softirq_pending() (local_cpu_data->softirq_pending) #define local_softirq_pending() (local_cpu_data->softirq_pending)
#include <linux/threads.h>
#include <linux/irq.h>
#include <asm/processor.h>
extern void __iomem *ipi_base_addr; extern void __iomem *ipi_base_addr;
void ack_bad_irq(unsigned int irq); void ack_bad_irq(unsigned int irq);
......
...@@ -16,6 +16,7 @@ config PARISC ...@@ -16,6 +16,7 @@ config PARISC
select RTC_DRV_GENERIC select RTC_DRV_GENERIC
select INIT_ALL_POSSIBLE select INIT_ALL_POSSIBLE
select BUG select BUG
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select GENERIC_ATOMIC64 if !64BIT select GENERIC_ATOMIC64 if !64BIT
help help
......
#ifndef __ASM_PARISC_PERF_EVENT_H #ifndef __ASM_PARISC_PERF_EVENT_H
#define __ASM_PARISC_PERF_EVENT_H #define __ASM_PARISC_PERF_EVENT_H
/* parisc only supports software events through this interface. */ /* Empty, just to avoid compiling error */
static inline void set_perf_event_pending(void) { }
#endif /* __ASM_PARISC_PERF_EVENT_H */ #endif /* __ASM_PARISC_PERF_EVENT_H */
...@@ -138,6 +138,7 @@ config PPC ...@@ -138,6 +138,7 @@ config PPC
select HAVE_OPROFILE select HAVE_OPROFILE
select HAVE_SYSCALL_WRAPPERS if PPC64 select HAVE_SYSCALL_WRAPPERS if PPC64
select GENERIC_ATOMIC64 if PPC32 select GENERIC_ATOMIC64 if PPC32
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
......
...@@ -129,7 +129,7 @@ struct paca_struct { ...@@ -129,7 +129,7 @@ struct paca_struct {
u8 soft_enabled; /* irq soft-enable flag */ u8 soft_enabled; /* irq soft-enable flag */
u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 hard_enabled; /* set if irqs are enabled in MSR */
u8 io_sync; /* writel() needs spin_unlock sync */ u8 io_sync; /* writel() needs spin_unlock sync */
u8 perf_event_pending; /* PM interrupt while soft-disabled */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
/* Stuff for accurate time accounting */ /* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */ u64 user_time; /* accumulated usermode TB ticks */
......
...@@ -23,18 +23,6 @@ ...@@ -23,18 +23,6 @@
#include "ppc32.h" #include "ppc32.h"
#endif #endif
/*
* Store another value in a callchain_entry.
*/
static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
unsigned int nr = entry->nr;
if (nr < PERF_MAX_STACK_DEPTH) {
entry->ip[nr] = ip;
entry->nr = nr + 1;
}
}
/* /*
* Is sp valid as the address of the next kernel stack frame after prev_sp? * Is sp valid as the address of the next kernel stack frame after prev_sp?
...@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) ...@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
return 0; return 0;
} }
static void perf_callchain_kernel(struct pt_regs *regs, void
struct perf_callchain_entry *entry) perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{ {
unsigned long sp, next_sp; unsigned long sp, next_sp;
unsigned long next_ip; unsigned long next_ip;
...@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ...@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
lr = regs->link; lr = regs->link;
sp = regs->gpr[1]; sp = regs->gpr[1];
callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, regs->nip);
callchain_store(entry, regs->nip);
if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
return; return;
...@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ...@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
next_ip = regs->nip; next_ip = regs->nip;
lr = regs->link; lr = regs->link;
level = 0; level = 0;
callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
} else { } else {
if (level == 0) if (level == 0)
...@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ...@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
++level; ++level;
} }
callchain_store(entry, next_ip); perf_callchain_store(entry, next_ip);
if (!valid_next_sp(next_sp, sp)) if (!valid_next_sp(next_sp, sp))
return; return;
sp = next_sp; sp = next_sp;
...@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp) ...@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp)
puc == (unsigned long) &sf->uc; puc == (unsigned long) &sf->uc;
} }
static void perf_callchain_user_64(struct pt_regs *regs, static void perf_callchain_user_64(struct perf_callchain_entry *entry,
struct perf_callchain_entry *entry) struct pt_regs *regs)
{ {
unsigned long sp, next_sp; unsigned long sp, next_sp;
unsigned long next_ip; unsigned long next_ip;
...@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs, ...@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs,
next_ip = regs->nip; next_ip = regs->nip;
lr = regs->link; lr = regs->link;
sp = regs->gpr[1]; sp = regs->gpr[1];
callchain_store(entry, PERF_CONTEXT_USER); perf_callchain_store(entry, next_ip);
callchain_store(entry, next_ip);
for (;;) { for (;;) {
fp = (unsigned long __user *) sp; fp = (unsigned long __user *) sp;
...@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, ...@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
read_user_stack_64(&uregs[PT_R1], &sp)) read_user_stack_64(&uregs[PT_R1], &sp))
return; return;
level = 0; level = 0;
callchain_store(entry, PERF_CONTEXT_USER); perf_callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, next_ip); perf_callchain_store(entry, next_ip);
continue; continue;
} }
if (level == 0) if (level == 0)
next_ip = lr; next_ip = lr;
callchain_store(entry, next_ip); perf_callchain_store(entry, next_ip);
++level; ++level;
sp = next_sp; sp = next_sp;
} }
...@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) ...@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
return __get_user_inatomic(*ret, ptr); return __get_user_inatomic(*ret, ptr);
} }
static inline void perf_callchain_user_64(struct pt_regs *regs, static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
struct perf_callchain_entry *entry) struct pt_regs *regs)
{ {
} }
...@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, ...@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
return mctx->mc_gregs; return mctx->mc_gregs;
} }
static void perf_callchain_user_32(struct pt_regs *regs, static void perf_callchain_user_32(struct perf_callchain_entry *entry,
struct perf_callchain_entry *entry) struct pt_regs *regs)
{ {
unsigned int sp, next_sp; unsigned int sp, next_sp;
unsigned int next_ip; unsigned int next_ip;
...@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, ...@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
next_ip = regs->nip; next_ip = regs->nip;
lr = regs->link; lr = regs->link;
sp = regs->gpr[1]; sp = regs->gpr[1];
callchain_store(entry, PERF_CONTEXT_USER); perf_callchain_store(entry, next_ip);
callchain_store(entry, next_ip);
while (entry->nr < PERF_MAX_STACK_DEPTH) { while (entry->nr < PERF_MAX_STACK_DEPTH) {
fp = (unsigned int __user *) (unsigned long) sp; fp = (unsigned int __user *) (unsigned long) sp;
...@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs, ...@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs,
read_user_stack_32(&uregs[PT_R1], &sp)) read_user_stack_32(&uregs[PT_R1], &sp))
return; return;
level = 0; level = 0;
callchain_store(entry, PERF_CONTEXT_USER); perf_callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, next_ip); perf_callchain_store(entry, next_ip);
continue; continue;
} }
if (level == 0) if (level == 0)
next_ip = lr; next_ip = lr;
callchain_store(entry, next_ip); perf_callchain_store(entry, next_ip);
++level; ++level;
sp = next_sp; sp = next_sp;
} }
} }
/* void
* Since we can't get PMU interrupts inside a PMU interrupt handler, perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
* we don't need separate irq and nmi entries here.
*/
static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
{ {
struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
entry->nr = 0;
if (!user_mode(regs)) {
perf_callchain_kernel(regs, entry);
if (current->mm)
regs = task_pt_regs(current);
else
regs = NULL;
}
if (regs) {
if (current_is_64bit()) if (current_is_64bit())
perf_callchain_user_64(regs, entry); perf_callchain_user_64(entry, regs);
else else
perf_callchain_user_32(regs, entry); perf_callchain_user_32(entry, regs);
}
return entry;
} }
This diff is collapsed.
...@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event) ...@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
{ {
s64 val, delta, prev; s64 val, delta, prev;
if (event->hw.state & PERF_HES_STOPPED)
return;
/* /*
* Performance monitor interrupts come even when interrupts * Performance monitor interrupts come even when interrupts
* are soft-disabled, as long as interrupts are hard-enabled. * are soft-disabled, as long as interrupts are hard-enabled.
...@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) ...@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
* Disable all events to prevent PMU interrupts and to allow * Disable all events to prevent PMU interrupts and to allow
* events to be added or removed. * events to be added or removed.
*/ */
void hw_perf_disable(void) static void fsl_emb_pmu_disable(struct pmu *pmu)
{ {
struct cpu_hw_events *cpuhw; struct cpu_hw_events *cpuhw;
unsigned long flags; unsigned long flags;
...@@ -216,7 +219,7 @@ void hw_perf_disable(void) ...@@ -216,7 +219,7 @@ void hw_perf_disable(void)
* If we were previously disabled and events were added, then * If we were previously disabled and events were added, then
* put the new config on the PMU. * put the new config on the PMU.
*/ */
void hw_perf_enable(void) static void fsl_emb_pmu_enable(struct pmu *pmu)
{ {
struct cpu_hw_events *cpuhw; struct cpu_hw_events *cpuhw;
unsigned long flags; unsigned long flags;
...@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count, ...@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count,
return n; return n;
} }
/* perf must be disabled, context locked on entry */ /* context locked on entry */
static int fsl_emb_pmu_enable(struct perf_event *event) static int fsl_emb_pmu_add(struct perf_event *event, int flags)
{ {
struct cpu_hw_events *cpuhw; struct cpu_hw_events *cpuhw;
int ret = -EAGAIN; int ret = -EAGAIN;
...@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ...@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
u64 val; u64 val;
int i; int i;
perf_pmu_disable(event->pmu);
cpuhw = &get_cpu_var(cpu_hw_events); cpuhw = &get_cpu_var(cpu_hw_events);
if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
...@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ...@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
val = 0x80000000L - left; val = 0x80000000L - left;
} }
local64_set(&event->hw.prev_count, val); local64_set(&event->hw.prev_count, val);
if (!(flags & PERF_EF_START)) {
event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
val = 0;
}
write_pmc(i, val); write_pmc(i, val);
perf_event_update_userpage(event); perf_event_update_userpage(event);
...@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ...@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
ret = 0; ret = 0;
out: out:
put_cpu_var(cpu_hw_events); put_cpu_var(cpu_hw_events);
perf_pmu_enable(event->pmu);
return ret; return ret;
} }
/* perf must be disabled, context locked on entry */ /* context locked on entry */
static void fsl_emb_pmu_disable(struct perf_event *event) static void fsl_emb_pmu_del(struct perf_event *event, int flags)
{ {
struct cpu_hw_events *cpuhw; struct cpu_hw_events *cpuhw;
int i = event->hw.idx; int i = event->hw.idx;
perf_pmu_disable(event->pmu);
if (i < 0) if (i < 0)
goto out; goto out;
...@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event) ...@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
cpuhw->n_events--; cpuhw->n_events--;
out: out:
perf_pmu_enable(event->pmu);
put_cpu_var(cpu_hw_events); put_cpu_var(cpu_hw_events);
} }
/* static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
* Re-enable interrupts on a event after they were throttled
* because they were coming too fast.
*
* Context is locked on entry, but perf is not disabled.
*/
static void fsl_emb_pmu_unthrottle(struct perf_event *event)
{ {
s64 val, left;
unsigned long flags; unsigned long flags;
s64 left;
if (event->hw.idx < 0 || !event->hw.sample_period) if (event->hw.idx < 0 || !event->hw.sample_period)
return; return;
if (!(event->hw.state & PERF_HES_STOPPED))
return;
if (ef_flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
local_irq_save(flags); local_irq_save(flags);
perf_disable(); perf_pmu_disable(event->pmu);
fsl_emb_pmu_read(event);
left = event->hw.sample_period; event->hw.state = 0;
event->hw.last_period = left; left = local64_read(&event->hw.period_left);
val = 0; write_pmc(event->hw.idx, left);
if (left < 0x80000000L)
val = 0x80000000L - left;
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event); perf_event_update_userpage(event);
perf_enable(); perf_pmu_enable(event->pmu);
local_irq_restore(flags); local_irq_restore(flags);
} }
static struct pmu fsl_emb_pmu = { static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
.enable = fsl_emb_pmu_enable, {
.disable = fsl_emb_pmu_disable, unsigned long flags;
.read = fsl_emb_pmu_read,
.unthrottle = fsl_emb_pmu_unthrottle, if (event->hw.idx < 0 || !event->hw.sample_period)
}; return;
if (event->hw.state & PERF_HES_STOPPED)
return;
local_irq_save(flags);
perf_pmu_disable(event->pmu);
fsl_emb_pmu_read(event);
event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
write_pmc(event->hw.idx, 0);
perf_event_update_userpage(event);
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
/* /*
* Release the PMU if this is the last perf_event. * Release the PMU if this is the last perf_event.
...@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) ...@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
return 0; return 0;
} }
const struct pmu *hw_perf_event_init(struct perf_event *event) static int fsl_emb_pmu_event_init(struct perf_event *event)
{ {
u64 ev; u64 ev;
struct perf_event *events[MAX_HWEVENTS]; struct perf_event *events[MAX_HWEVENTS];
...@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) ...@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
case PERF_TYPE_HARDWARE: case PERF_TYPE_HARDWARE:
ev = event->attr.config; ev = event->attr.config;
if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
return ERR_PTR(-EOPNOTSUPP); return -EOPNOTSUPP;
ev = ppmu->generic_events[ev]; ev = ppmu->generic_events[ev];
break; break;
case PERF_TYPE_HW_CACHE: case PERF_TYPE_HW_CACHE:
err = hw_perf_cache_event(event->attr.config, &ev); err = hw_perf_cache_event(event->attr.config, &ev);
if (err) if (err)
return ERR_PTR(err); return err;
break; break;
case PERF_TYPE_RAW: case PERF_TYPE_RAW:
...@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) ...@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
break; break;
default: default:
return ERR_PTR(-EINVAL); return -ENOENT;
} }
event->hw.config = ppmu->xlate_event(ev); event->hw.config = ppmu->xlate_event(ev);
if (!(event->hw.config & FSL_EMB_EVENT_VALID)) if (!(event->hw.config & FSL_EMB_EVENT_VALID))
return ERR_PTR(-EINVAL); return -EINVAL;
/* /*
* If this is in a group, check if it can go on with all the * If this is in a group, check if it can go on with all the
...@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) ...@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
n = collect_events(event->group_leader, n = collect_events(event->group_leader,
ppmu->n_counter - 1, events); ppmu->n_counter - 1, events);
if (n < 0) if (n < 0)
return ERR_PTR(-EINVAL); return -EINVAL;
} }
if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
...@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) ...@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
} }
if (num_restricted >= ppmu->n_restricted) if (num_restricted >= ppmu->n_restricted)
return ERR_PTR(-EINVAL); return -EINVAL;
} }
event->hw.idx = -1; event->hw.idx = -1;
...@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) ...@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
if (event->attr.exclude_kernel) if (event->attr.exclude_kernel)
event->hw.config_base |= PMLCA_FCS; event->hw.config_base |= PMLCA_FCS;
if (event->attr.exclude_idle) if (event->attr.exclude_idle)
return ERR_PTR(-ENOTSUPP); return -ENOTSUPP;
event->hw.last_period = event->hw.sample_period; event->hw.last_period = event->hw.sample_period;
local64_set(&event->hw.period_left, event->hw.last_period); local64_set(&event->hw.period_left, event->hw.last_period);
...@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) ...@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
} }
event->destroy = hw_perf_event_destroy; event->destroy = hw_perf_event_destroy;
if (err) return err;
return ERR_PTR(err);
return &fsl_emb_pmu;
} }
static struct pmu fsl_emb_pmu = {
.pmu_enable = fsl_emb_pmu_enable,
.pmu_disable = fsl_emb_pmu_disable,
.event_init = fsl_emb_pmu_event_init,
.add = fsl_emb_pmu_add,
.del = fsl_emb_pmu_del,
.start = fsl_emb_pmu_start,
.stop = fsl_emb_pmu_stop,
.read = fsl_emb_pmu_read,
};
/* /*
* A counter has overflowed; update its count and record * A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled * things if requested. Note that interrupts are hard-disabled
...@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ...@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
s64 prev, delta, left; s64 prev, delta, left;
int record = 0; int record = 0;
if (event->hw.state & PERF_HES_STOPPED) {
write_pmc(event->hw.idx, 0);
return;
}
/* we don't have to worry about interrupts here */ /* we don't have to worry about interrupts here */
prev = local64_read(&event->hw.prev_count); prev = local64_read(&event->hw.prev_count);
delta = (val - prev) & 0xfffffffful; delta = (val - prev) & 0xfffffffful;
...@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ...@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
val = 0x80000000LL - left; val = 0x80000000LL - left;
} }
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
/* /*
* Finally record data if requested. * Finally record data if requested.
*/ */
...@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ...@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
perf_sample_data_init(&data, 0); perf_sample_data_init(&data, 0);
data.period = event->hw.last_period; data.period = event->hw.last_period;
if (perf_event_overflow(event, nmi, &data, regs)) { if (perf_event_overflow(event, nmi, &data, regs))
/* fsl_emb_pmu_stop(event, 0);
* Interrupts are coming too fast - throttle them
* by setting the event to 0, so it will be
* at least 2^30 cycles until the next interrupt
* (assuming each event counts at most 2 counts
* per cycle).
*/
val = 0;
left = ~0ULL >> 1;
}
} }
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
} }
static void perf_event_interrupt(struct pt_regs *regs) static void perf_event_interrupt(struct pt_regs *regs)
...@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) ...@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
pr_info("%s performance monitor hardware support registered\n", pr_info("%s performance monitor hardware support registered\n",
pmu->name); pmu->name);
perf_pmu_register(&fsl_emb_pmu);
return 0; return 0;
} }
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
#include <linux/posix-timers.h> #include <linux/posix-timers.h>
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/perf_event.h> #include <linux/irq_work.h>
#include <asm/trace.h> #include <asm/trace.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void) ...@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
} }
#endif /* CONFIG_PPC_ISERIES */ #endif /* CONFIG_PPC_ISERIES */
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_IRQ_WORK
/* /*
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/ */
#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC64
static inline unsigned long test_perf_event_pending(void) static inline unsigned long test_irq_work_pending(void)
{ {
unsigned long x; unsigned long x;
asm volatile("lbz %0,%1(13)" asm volatile("lbz %0,%1(13)"
: "=r" (x) : "=r" (x)
: "i" (offsetof(struct paca_struct, perf_event_pending))); : "i" (offsetof(struct paca_struct, irq_work_pending)));
return x; return x;
} }
static inline void set_perf_event_pending_flag(void) static inline void set_irq_work_pending_flag(void)
{ {
asm volatile("stb %0,%1(13)" : : asm volatile("stb %0,%1(13)" : :
"r" (1), "r" (1),
"i" (offsetof(struct paca_struct, perf_event_pending))); "i" (offsetof(struct paca_struct, irq_work_pending)));
} }
static inline void clear_perf_event_pending(void) static inline void clear_irq_work_pending(void)
{ {
asm volatile("stb %0,%1(13)" : : asm volatile("stb %0,%1(13)" : :
"r" (0), "r" (0),
"i" (offsetof(struct paca_struct, perf_event_pending))); "i" (offsetof(struct paca_struct, irq_work_pending)));
} }
#else /* 32-bit */ #else /* 32-bit */
DEFINE_PER_CPU(u8, perf_event_pending); DEFINE_PER_CPU(u8, irq_work_pending);
#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 #define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending) #define test_irq_work_pending() __get_cpu_var(irq_work_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 #define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
#endif /* 32 vs 64 bit */ #endif /* 32 vs 64 bit */
void set_perf_event_pending(void) void set_irq_work_pending(void)
{ {
preempt_disable(); preempt_disable();
set_perf_event_pending_flag(); set_irq_work_pending_flag();
set_dec(1); set_dec(1);
preempt_enable(); preempt_enable();
} }
#else /* CONFIG_PERF_EVENTS */ #else /* CONFIG_IRQ_WORK */
#define test_perf_event_pending() 0 #define test_irq_work_pending() 0
#define clear_perf_event_pending() #define clear_irq_work_pending()
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_IRQ_WORK */
/* /*
* For iSeries shared processors, we have to let the hypervisor * For iSeries shared processors, we have to let the hypervisor
...@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs) ...@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
calculate_steal_time(); calculate_steal_time();
if (test_perf_event_pending()) { if (test_irq_work_pending()) {
clear_perf_event_pending(); clear_irq_work_pending();
perf_event_do_pending(); irq_work_run();
} }
#ifdef CONFIG_PPC_ISERIES #ifdef CONFIG_PPC_ISERIES
......
...@@ -95,6 +95,7 @@ config S390 ...@@ -95,6 +95,7 @@ config S390
select HAVE_KVM if 64BIT select HAVE_KVM if 64BIT
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
select INIT_ALL_POSSIBLE select INIT_ALL_POSSIBLE
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select HAVE_KERNEL_GZIP select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_BZIP2
......
...@@ -12,10 +12,6 @@ ...@@ -12,10 +12,6 @@
#ifndef __ASM_HARDIRQ_H #ifndef __ASM_HARDIRQ_H
#define __ASM_HARDIRQ_H #define __ASM_HARDIRQ_H
#include <linux/threads.h>
#include <linux/sched.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
#include <asm/lowcore.h> #include <asm/lowcore.h>
#define local_softirq_pending() (S390_lowcore.softirq_pending) #define local_softirq_pending() (S390_lowcore.softirq_pending)
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
* Copyright 2009 Martin Schwidefsky, IBM Corporation. * Copyright 2009 Martin Schwidefsky, IBM Corporation.
*/ */
static inline void set_perf_event_pending(void) {} /* Empty, just to avoid compiling error */
static inline void clear_perf_event_pending(void) {}
#define PERF_EVENT_INDEX_OFFSET 0 #define PERF_EVENT_INDEX_OFFSET 0
...@@ -16,6 +16,7 @@ config SUPERH ...@@ -16,6 +16,7 @@ config SUPERH
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
select HAVE_DMA_API_DEBUG select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS select HAVE_DMA_ATTRS
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC select PERF_USE_VMALLOC
select HAVE_KERNEL_GZIP select HAVE_KERNEL_GZIP
...@@ -249,6 +250,11 @@ config ARCH_SHMOBILE ...@@ -249,6 +250,11 @@ config ARCH_SHMOBILE
select PM select PM
select PM_RUNTIME select PM_RUNTIME
config CPU_HAS_PMU
depends on CPU_SH4 || CPU_SH4A
default y
bool
if SUPERH32 if SUPERH32
choice choice
...@@ -738,6 +744,14 @@ config GUSA_RB ...@@ -738,6 +744,14 @@ config GUSA_RB
LLSC, this should be more efficient than the other alternative of LLSC, this should be more efficient than the other alternative of
disabling interrupts around the atomic sequence. disabling interrupts around the atomic sequence.
config HW_PERF_EVENTS
bool "Enable hardware performance counter support for perf events"
depends on PERF_EVENTS && CPU_HAS_PMU
default y
help
Enable hardware performance counter support for perf events. If
disabled, perf events will use software events only.
source "drivers/sh/Kconfig" source "drivers/sh/Kconfig"
endmenu endmenu
......
...@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *); ...@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *);
extern int reserve_pmc_hardware(void); extern int reserve_pmc_hardware(void);
extern void release_pmc_hardware(void); extern void release_pmc_hardware(void);
static inline void set_perf_event_pending(void)
{
/* Nothing to see here, move along. */
}
#define PERF_EVENT_INDEX_OFFSET 0
#endif /* __ASM_SH_PERF_EVENT_H */ #endif /* __ASM_SH_PERF_EVENT_H */
...@@ -14,11 +14,6 @@ ...@@ -14,11 +14,6 @@
#include <asm/unwinder.h> #include <asm/unwinder.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip;
}
static void callchain_warning(void *data, char *msg) static void callchain_warning(void *data, char *msg)
{ {
...@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable) ...@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
struct perf_callchain_entry *entry = data; struct perf_callchain_entry *entry = data;
if (reliable) if (reliable)
callchain_store(entry, addr); perf_callchain_store(entry, addr);
} }
static const struct stacktrace_ops callchain_ops = { static const struct stacktrace_ops callchain_ops = {
...@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = { ...@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = {
.address = callchain_address, .address = callchain_address,
}; };
static void void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{ {
callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, regs->pc);
callchain_store(entry, regs->pc);
unwind_stack(NULL, regs, NULL, &callchain_ops, entry); unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
} }
static void
perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
int is_user;
if (!regs)
return;
is_user = user_mode(regs);
if (is_user && current->state != TASK_RUNNING)
return;
/*
* Only the kernel side is implemented for now.
*/
if (!is_user)
perf_callchain_kernel(regs, entry);
}
/*
* No need for separate IRQ and NMI entries.
*/
static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
{
struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
entry->nr = 0;
perf_do_callchain(regs, entry);
return entry;
}
...@@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void) ...@@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void)
return !!sh_pmu; return !!sh_pmu;
} }
const char *perf_pmu_name(void)
{
if (!sh_pmu)
return NULL;
return sh_pmu->name;
}
EXPORT_SYMBOL_GPL(perf_pmu_name);
int perf_num_counters(void)
{
if (!sh_pmu)
return 0;
return sh_pmu->num_events;
}
EXPORT_SYMBOL_GPL(perf_num_counters);
/* /*
* Release the PMU if this is the last perf_event. * Release the PMU if this is the last perf_event.
*/ */
...@@ -206,50 +224,80 @@ static void sh_perf_event_update(struct perf_event *event, ...@@ -206,50 +224,80 @@ static void sh_perf_event_update(struct perf_event *event,
local64_add(delta, &event->count); local64_add(delta, &event->count);
} }
static void sh_pmu_disable(struct perf_event *event) static void sh_pmu_stop(struct perf_event *event, int flags)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx; int idx = hwc->idx;
clear_bit(idx, cpuc->active_mask); if (!(event->hw.state & PERF_HES_STOPPED)) {
sh_pmu->disable(hwc, idx); sh_pmu->disable(hwc, idx);
cpuc->events[idx] = NULL;
event->hw.state |= PERF_HES_STOPPED;
}
barrier(); if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
sh_perf_event_update(event, &event->hw, idx); sh_perf_event_update(event, &event->hw, idx);
event->hw.state |= PERF_HES_UPTODATE;
}
}
cpuc->events[idx] = NULL; static void sh_pmu_start(struct perf_event *event, int flags)
clear_bit(idx, cpuc->used_mask); {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
if (WARN_ON_ONCE(idx == -1))
return;
if (flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
cpuc->events[idx] = event;
event->hw.state = 0;
sh_pmu->enable(hwc, idx);
}
static void sh_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
sh_pmu_stop(event, PERF_EF_UPDATE);
__clear_bit(event->hw.idx, cpuc->used_mask);
perf_event_update_userpage(event); perf_event_update_userpage(event);
} }
static int sh_pmu_enable(struct perf_event *event) static int sh_pmu_add(struct perf_event *event, int flags)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx; int idx = hwc->idx;
int ret = -EAGAIN;
perf_pmu_disable(event->pmu);
if (test_and_set_bit(idx, cpuc->used_mask)) { if (__test_and_set_bit(idx, cpuc->used_mask)) {
idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
if (idx == sh_pmu->num_events) if (idx == sh_pmu->num_events)
return -EAGAIN; goto out;
set_bit(idx, cpuc->used_mask); __set_bit(idx, cpuc->used_mask);
hwc->idx = idx; hwc->idx = idx;
} }
sh_pmu->disable(hwc, idx); sh_pmu->disable(hwc, idx);
cpuc->events[idx] = event; event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
set_bit(idx, cpuc->active_mask); if (flags & PERF_EF_START)
sh_pmu_start(event, PERF_EF_RELOAD);
sh_pmu->enable(hwc, idx);
perf_event_update_userpage(event); perf_event_update_userpage(event);
ret = 0;
return 0; out:
perf_pmu_enable(event->pmu);
return ret;
} }
static void sh_pmu_read(struct perf_event *event) static void sh_pmu_read(struct perf_event *event)
...@@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event) ...@@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event)
sh_perf_event_update(event, &event->hw, event->hw.idx); sh_perf_event_update(event, &event->hw, event->hw.idx);
} }
static const struct pmu pmu = { static int sh_pmu_event_init(struct perf_event *event)
.enable = sh_pmu_enable,
.disable = sh_pmu_disable,
.read = sh_pmu_read,
};
const struct pmu *hw_perf_event_init(struct perf_event *event)
{ {
int err = __hw_perf_event_init(event); int err;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HW_CACHE:
case PERF_TYPE_HARDWARE:
err = __hw_perf_event_init(event);
break;
default:
return -ENOENT;
}
if (unlikely(err)) { if (unlikely(err)) {
if (event->destroy) if (event->destroy)
event->destroy(event); event->destroy(event);
return ERR_PTR(err);
} }
return &pmu; return err;
}
static void sh_pmu_enable(struct pmu *pmu)
{
if (!sh_pmu_initialized())
return;
sh_pmu->enable_all();
}
static void sh_pmu_disable(struct pmu *pmu)
{
if (!sh_pmu_initialized())
return;
sh_pmu->disable_all();
} }
static struct pmu pmu = {
.pmu_enable = sh_pmu_enable,
.pmu_disable = sh_pmu_disable,
.event_init = sh_pmu_event_init,
.add = sh_pmu_add,
.del = sh_pmu_del,
.start = sh_pmu_start,
.stop = sh_pmu_stop,
.read = sh_pmu_read,
};
static void sh_pmu_setup(int cpu) static void sh_pmu_setup(int cpu)
{ {
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
...@@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) ...@@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK; return NOTIFY_OK;
} }
void hw_perf_enable(void) int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
{
if (!sh_pmu_initialized())
return;
sh_pmu->enable_all();
}
void hw_perf_disable(void)
{
if (!sh_pmu_initialized())
return;
sh_pmu->disable_all();
}
int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
{ {
if (sh_pmu) if (sh_pmu)
return -EBUSY; return -EBUSY;
sh_pmu = pmu; sh_pmu = _pmu;
pr_info("Performance Events: %s support registered\n", pmu->name); pr_info("Performance Events: %s support registered\n", _pmu->name);
WARN_ON(pmu->num_events > MAX_HWEVENTS); WARN_ON(_pmu->num_events > MAX_HWEVENTS);
perf_pmu_register(&pmu);
perf_cpu_notifier(sh_pmu_notifier); perf_cpu_notifier(sh_pmu_notifier);
return 0; return 0;
} }
...@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ ...@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprofilefs.o oprofile_stats.o \ oprofilefs.o oprofile_stats.o \
timer_int.o ) timer_int.o )
ifeq ($(CONFIG_HW_PERF_EVENTS),y)
DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
endif
oprofile-y := $(DRIVER_OBJS) common.o backtrace.o oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
...@@ -17,114 +17,45 @@ ...@@ -17,114 +17,45 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/perf_event.h>
#include <asm/processor.h> #include <asm/processor.h>
#include "op_impl.h"
static struct op_sh_model *model;
static struct op_counter_config ctr[20];
#ifdef CONFIG_HW_PERF_EVENTS
extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
static int op_sh_setup(void) char *op_name_from_perf_id(void)
{
/* Pre-compute the values to stuff in the hardware registers. */
model->reg_setup(ctr);
/* Configure the registers on all cpus. */
on_each_cpu(model->cpu_setup, NULL, 1);
return 0;
}
static int op_sh_create_files(struct super_block *sb, struct dentry *root)
{ {
int i, ret = 0; const char *pmu;
char buf[20];
for (i = 0; i < model->num_counters; i++) { int size;
struct dentry *dir;
char buf[4];
snprintf(buf, sizeof(buf), "%d", i); pmu = perf_pmu_name();
dir = oprofilefs_mkdir(sb, root, buf); if (!pmu)
return NULL;
ret |= oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); size = snprintf(buf, sizeof(buf), "sh/%s", pmu);
ret |= oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event); if (size > -1 && size < sizeof(buf))
ret |= oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel); return buf;
ret |= oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
if (model->create_files) return NULL;
ret |= model->create_files(sb, dir);
else
ret |= oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
/* Dummy entries */
ret |= oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
}
return ret;
} }
static int op_sh_start(void) int __init oprofile_arch_init(struct oprofile_operations *ops)
{ {
/* Enable performance monitoring for all counters. */ ops->backtrace = sh_backtrace;
on_each_cpu(model->cpu_start, NULL, 1);
return 0; return oprofile_perf_init(ops);
} }
static void op_sh_stop(void) void __exit oprofile_arch_exit(void)
{ {
/* Disable performance monitoring for all counters. */ oprofile_perf_exit();
on_each_cpu(model->cpu_stop, NULL, 1);
} }
#else
int __init oprofile_arch_init(struct oprofile_operations *ops) int __init oprofile_arch_init(struct oprofile_operations *ops)
{ {
struct op_sh_model *lmodel = NULL; pr_info("oprofile: hardware counters not available\n");
int ret;
/*
* Always assign the backtrace op. If the counter initialization
* fails, we fall back to the timer which will still make use of
* this.
*/
ops->backtrace = sh_backtrace;
/*
* XXX
*
* All of the SH7750/SH-4A counters have been converted to perf,
* this infrastructure hook is left for other users until they've
* had a chance to convert over, at which point all of this
* will be deleted.
*/
if (!lmodel)
return -ENODEV;
if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER))
return -ENODEV; return -ENODEV;
ret = lmodel->init();
if (unlikely(ret != 0))
return ret;
model = lmodel;
ops->setup = op_sh_setup;
ops->create_files = op_sh_create_files;
ops->start = op_sh_start;
ops->stop = op_sh_stop;
ops->cpu_type = lmodel->cpu_type;
printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
lmodel->cpu_type);
return 0;
}
void oprofile_arch_exit(void)
{
if (model && model->exit)
model->exit();
} }
void __exit oprofile_arch_exit(void) {}
#endif /* CONFIG_HW_PERF_EVENTS */
#ifndef __OP_IMPL_H
#define __OP_IMPL_H
/* Per-counter configuration as set via oprofilefs. */
struct op_counter_config {
unsigned long enabled;
unsigned long event;
unsigned long count;
/* Dummy values for userspace tool compliance */
unsigned long kernel;
unsigned long user;
unsigned long unit_mask;
};
/* Per-architecture configury and hooks. */
struct op_sh_model {
void (*reg_setup)(struct op_counter_config *);
int (*create_files)(struct super_block *sb, struct dentry *dir);
void (*cpu_setup)(void *dummy);
int (*init)(void);
void (*exit)(void);
void (*cpu_start)(void *args);
void (*cpu_stop)(void *args);
char *cpu_type;
unsigned char num_counters;
};
/* arch/sh/oprofile/common.c */
extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
#endif /* __OP_IMPL_H */
...@@ -26,10 +26,12 @@ config SPARC ...@@ -26,10 +26,12 @@ config SPARC
select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_OPTIONAL_GPIOLIB
select RTC_CLASS select RTC_CLASS
select RTC_DRV_M48T59 select RTC_DRV_M48T59
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC select PERF_USE_VMALLOC
select HAVE_DMA_ATTRS select HAVE_DMA_ATTRS
select HAVE_DMA_API_DEBUG select HAVE_DMA_API_DEBUG
select HAVE_ARCH_JUMP_LABEL
config SPARC32 config SPARC32
def_bool !64BIT def_bool !64BIT
...@@ -53,6 +55,7 @@ config SPARC64 ...@@ -53,6 +55,7 @@ config SPARC64
select RTC_DRV_BQ4802 select RTC_DRV_BQ4802
select RTC_DRV_SUN4V select RTC_DRV_SUN4V
select RTC_DRV_STARFIRE select RTC_DRV_STARFIRE
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC select PERF_USE_VMALLOC
......
#ifndef _ASM_SPARC_JUMP_LABEL_H
#define _ASM_SPARC_JUMP_LABEL_H
#ifdef __KERNEL__
#include <linux/types.h>
#include <asm/system.h>
#define JUMP_LABEL_NOP_SIZE 4
#define JUMP_LABEL(key, label) \
do { \
asm goto("1:\n\t" \
"nop\n\t" \
"nop\n\t" \
".pushsection __jump_table, \"a\"\n\t"\
".word 1b, %l[" #label "], %c0\n\t" \
".popsection \n\t" \
: : "i" (key) : : label);\
} while (0)
#endif /* __KERNEL__ */
typedef u32 jump_label_t;
struct jump_entry {
jump_label_t code;
jump_label_t target;
jump_label_t key;
};
#endif
#ifndef __ASM_SPARC_PERF_EVENT_H #ifndef __ASM_SPARC_PERF_EVENT_H
#define __ASM_SPARC_PERF_EVENT_H #define __ASM_SPARC_PERF_EVENT_H
extern void set_perf_event_pending(void);
#define PERF_EVENT_INDEX_OFFSET 0
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
#include <asm/ptrace.h> #include <asm/ptrace.h>
......
...@@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y) ...@@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y)
pc--$(CONFIG_PERF_EVENTS) := perf_event.o pc--$(CONFIG_PERF_EVENTS) := perf_event.o
obj-$(CONFIG_SPARC64) += $(pc--y) obj-$(CONFIG_SPARC64) += $(pc--y)
obj-$(CONFIG_SPARC64) += jump_label.o
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/jump_label.h>
#include <linux/memory.h>
#ifdef HAVE_JUMP_LABEL
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
u32 val;
u32 *insn = (u32 *) (unsigned long) entry->code;
if (type == JUMP_LABEL_ENABLE) {
s32 off = (s32)entry->target - (s32)entry->code;
#ifdef CONFIG_SPARC64
/* ba,pt %xcc, . + (off << 2) */
val = 0x10680000 | ((u32) off >> 2);
#else
/* ba . + (off << 2) */
val = 0x10800000 | ((u32) off >> 2);
#endif
} else {
val = 0x01000000;
}
get_online_cpus();
mutex_lock(&text_mutex);
*insn = val;
flushi(insn);
mutex_unlock(&text_mutex);
put_online_cpus();
}
void arch_jump_label_text_poke_early(jump_label_t addr)
{
u32 *insn_p = (u32 *) (unsigned long) addr;
*insn_p = 0x01000000;
flushi(insn_p);
}
#endif
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
#include <asm/spitfire.h> #include <asm/spitfire.h>
#ifdef CONFIG_SPARC64 #ifdef CONFIG_SPARC64
#include <linux/jump_label.h>
static void *module_map(unsigned long size) static void *module_map(unsigned long size)
{ {
struct vm_struct *area; struct vm_struct *area;
...@@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr, ...@@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs, const Elf_Shdr *sechdrs,
struct module *me) struct module *me)
{ {
/* make jump label nops */
jump_label_apply_nops(me);
/* Cheetah's I-cache is fully coherent. */ /* Cheetah's I-cache is fully coherent. */
if (tlb_type == spitfire) { if (tlb_type == spitfire) {
unsigned long va; unsigned long va;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/perf_event.h> #include <linux/irq_work.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <asm/pil.h> #include <asm/pil.h>
...@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs) ...@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
old_regs = set_irq_regs(regs); old_regs = set_irq_regs(regs);
irq_enter(); irq_enter();
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_IRQ_WORK
perf_event_do_pending(); irq_work_run();
#endif #endif
irq_exit(); irq_exit();
set_irq_regs(old_regs); set_irq_regs(old_regs);
} }
void set_perf_event_pending(void) void arch_irq_work_raise(void)
{ {
set_softint(1 << PIL_DEFERRED_PCR_WORK); set_softint(1 << PIL_DEFERRED_PCR_WORK);
} }
......
This diff is collapsed.
...@@ -25,6 +25,7 @@ config X86 ...@@ -25,6 +25,7 @@ config X86
select HAVE_IDE select HAVE_IDE
select HAVE_OPROFILE select HAVE_OPROFILE
select HAVE_PERF_EVENTS if (!M386 && !M486) select HAVE_PERF_EVENTS if (!M386 && !M486)
select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT select HAVE_IOREMAP_PROT
select HAVE_KPROBES select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_OPTIONAL_GPIOLIB
...@@ -33,6 +34,7 @@ config X86 ...@@ -33,6 +34,7 @@ config X86
select HAVE_KRETPROBES select HAVE_KRETPROBES
select HAVE_OPTPROBES select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER
...@@ -59,6 +61,8 @@ config X86 ...@@ -59,6 +61,8 @@ config X86
select ANON_INODES select ANON_INODES
select HAVE_ARCH_KMEMCHECK select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER select HAVE_USER_RETURN_NOTIFIER
select HAVE_ARCH_JUMP_LABEL
select HAVE_TEXT_POKE_SMP
config INSTRUCTION_DECODER config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS) def_bool (KPROBES || PERF_EVENTS)
...@@ -2125,6 +2129,10 @@ config HAVE_ATOMIC_IOMAP ...@@ -2125,6 +2129,10 @@ config HAVE_ATOMIC_IOMAP
def_bool y def_bool y
depends on X86_32 depends on X86_32
config HAVE_TEXT_POKE_SMP
bool
select STOP_MACHINE if SMP
source "net/Kconfig" source "net/Kconfig"
source "drivers/Kconfig" source "drivers/Kconfig"
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/stddef.h> #include <linux/stddef.h>
#include <linux/stringify.h> #include <linux/stringify.h>
#include <linux/jump_label.h>
#include <asm/asm.h> #include <asm/asm.h>
/* /*
...@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, ...@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define __parainstructions_end NULL #define __parainstructions_end NULL
#endif #endif
extern void *text_poke_early(void *addr, const void *opcode, size_t len);
/* /*
* Clear and restore the kernel write-protection flag on the local CPU. * Clear and restore the kernel write-protection flag on the local CPU.
* Allows the kernel to edit read-only pages. * Allows the kernel to edit read-only pages.
...@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, ...@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_smp(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
#define IDEAL_NOP_SIZE_5 5
extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
extern void arch_init_ideal_nop5(void);
#else
static inline void arch_init_ideal_nop5(void) {}
#endif
#endif /* _ASM_X86_ALTERNATIVE_H */ #endif /* _ASM_X86_ALTERNATIVE_H */
...@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) ...@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_IRQ_WORK
BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
#endif #endif
#ifdef CONFIG_X86_THERMAL_VECTOR #ifdef CONFIG_X86_THERMAL_VECTOR
......
...@@ -14,7 +14,7 @@ typedef struct { ...@@ -14,7 +14,7 @@ typedef struct {
#endif #endif
unsigned int x86_platform_ipis; /* arch dependent */ unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs; unsigned int apic_perf_irqs;
unsigned int apic_pending_irqs; unsigned int apic_irq_work_irqs;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
unsigned int irq_resched_count; unsigned int irq_resched_count;
unsigned int irq_call_count; unsigned int irq_call_count;
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
extern void apic_timer_interrupt(void); extern void apic_timer_interrupt(void);
extern void x86_platform_ipi(void); extern void x86_platform_ipi(void);
extern void error_interrupt(void); extern void error_interrupt(void);
extern void perf_pending_interrupt(void); extern void irq_work_interrupt(void);
extern void spurious_interrupt(void); extern void spurious_interrupt(void);
extern void thermal_interrupt(void); extern void thermal_interrupt(void);
......
...@@ -114,9 +114,9 @@ ...@@ -114,9 +114,9 @@
#define X86_PLATFORM_IPI_VECTOR 0xed #define X86_PLATFORM_IPI_VECTOR 0xed
/* /*
* Performance monitoring pending work vector: * IRQ work vector:
*/ */
#define LOCAL_PENDING_VECTOR 0xec #define IRQ_WORK_VECTOR 0xec
#define UV_BAU_MESSAGE 0xea #define UV_BAU_MESSAGE 0xea
......
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
#ifdef __KERNEL__
#include <linux/types.h>
#include <asm/nops.h>
#define JUMP_LABEL_NOP_SIZE 5
# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
# define JUMP_LABEL(key, label) \
do { \
asm goto("1:" \
JUMP_LABEL_INITIAL_NOP \
".pushsection __jump_table, \"a\" \n\t"\
_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
".popsection \n\t" \
: : "i" (key) : : label); \
} while (0)
#endif /* __KERNEL__ */
#ifdef CONFIG_X86_64
typedef u64 jump_label_t;
#else
typedef u32 jump_label_t;
#endif
struct jump_entry {
jump_label_t code;
jump_label_t target;
jump_label_t key;
};
#endif
...@@ -36,19 +36,6 @@ ...@@ -36,19 +36,6 @@
#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
/* Non HT mask */
#define P4_ESCR_MASK \
(P4_ESCR_EVENT_MASK | \
P4_ESCR_EVENTMASK_MASK | \
P4_ESCR_TAG_MASK | \
P4_ESCR_TAG_ENABLE | \
P4_ESCR_T0_OS | \
P4_ESCR_T0_USR)
/* HT mask */
#define P4_ESCR_MASK_HT \
(P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
#define P4_CCCR_OVF 0x80000000U #define P4_CCCR_OVF 0x80000000U
#define P4_CCCR_CASCADE 0x40000000U #define P4_CCCR_CASCADE 0x40000000U
#define P4_CCCR_OVF_PMI_T0 0x04000000U #define P4_CCCR_OVF_PMI_T0 0x04000000U
...@@ -70,23 +57,6 @@ ...@@ -70,23 +57,6 @@
#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
/* Non HT mask */
#define P4_CCCR_MASK \
(P4_CCCR_OVF | \
P4_CCCR_CASCADE | \
P4_CCCR_OVF_PMI_T0 | \
P4_CCCR_FORCE_OVF | \
P4_CCCR_EDGE | \
P4_CCCR_THRESHOLD_MASK | \
P4_CCCR_COMPLEMENT | \
P4_CCCR_COMPARE | \
P4_CCCR_ESCR_SELECT_MASK | \
P4_CCCR_ENABLE)
/* HT mask */
#define P4_CCCR_MASK_HT \
(P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
#define P4_GEN_ESCR_EMASK(class, name, bit) \ #define P4_GEN_ESCR_EMASK(class, name, bit) \
class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_EMASK_BIT(class, name) class##__##name #define P4_ESCR_EMASK_BIT(class, name) class##__##name
...@@ -127,6 +97,28 @@ ...@@ -127,6 +97,28 @@
#define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT_SHIFT 63
#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
/*
* The bits we allow to pass for RAW events
*/
#define P4_CONFIG_MASK_ESCR \
P4_ESCR_EVENT_MASK | \
P4_ESCR_EVENTMASK_MASK | \
P4_ESCR_TAG_MASK | \
P4_ESCR_TAG_ENABLE
#define P4_CONFIG_MASK_CCCR \
P4_CCCR_EDGE | \
P4_CCCR_THRESHOLD_MASK | \
P4_CCCR_COMPLEMENT | \
P4_CCCR_COMPARE | \
P4_CCCR_THREAD_ANY | \
P4_CCCR_RESERVED
/* some dangerous bits are reserved for kernel internals */
#define P4_CONFIG_MASK \
(p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
(p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
static inline bool p4_is_event_cascaded(u64 config) static inline bool p4_is_event_cascaded(u64 config)
{ {
u32 cccr = p4_config_unpack_cccr(config); u32 cccr = p4_config_unpack_cccr(config);
......
...@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n ...@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n
obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o obj-y += time.o ioport.o ldt.o dumpstack.o
obj-y += setup.o x86_init.o i8259.o irqinit.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
......
...@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) ...@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[]; extern s32 __smp_locks[], __smp_locks_end[];
static void *text_poke_early(void *addr, const void *opcode, size_t len); void *text_poke_early(void *addr, const void *opcode, size_t len);
/* Replace instructions with better alternatives for this CPU type. /* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with This runs before SMP is initialized to avoid SMP problems with
...@@ -522,7 +522,7 @@ void __init alternative_instructions(void) ...@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
* instructions. And on the local CPU you need to be protected again NMI or MCE * instructions. And on the local CPU you need to be protected again NMI or MCE
* handlers seeing an inconsistent instruction while you patch. * handlers seeing an inconsistent instruction while you patch.
*/ */
static void *__init_or_module text_poke_early(void *addr, const void *opcode, void *__init_or_module text_poke_early(void *addr, const void *opcode,
size_t len) size_t len)
{ {
unsigned long flags; unsigned long flags;
...@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) ...@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
tpp.len = len; tpp.len = len;
atomic_set(&stop_machine_first, 1); atomic_set(&stop_machine_first, 1);
wrote_text = 0; wrote_text = 0;
stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); /* Use __stop_machine() because the caller already got online_cpus. */
__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
return addr; return addr;
} }
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
void __init arch_init_ideal_nop5(void)
{
extern const unsigned char ftrace_test_p6nop[];
extern const unsigned char ftrace_test_nop5[];
extern const unsigned char ftrace_test_jmp[];
int faulted = 0;
/*
* There is no good nop for all x86 archs.
* We will default to using the P6_NOP5, but first we
* will test to make sure that the nop will actually
* work on this CPU. If it faults, we will then
* go to a lesser efficient 5 byte nop. If that fails
* we then just use a jmp as our nop. This isn't the most
* efficient nop, but we can not use a multi part nop
* since we would then risk being preempted in the middle
* of that nop, and if we enabled tracing then, it might
* cause a system crash.
*
* TODO: check the cpuid to determine the best nop.
*/
asm volatile (
"ftrace_test_jmp:"
"jmp ftrace_test_p6nop\n"
"nop\n"
"nop\n"
"nop\n" /* 2 byte jmp + 3 bytes */
"ftrace_test_p6nop:"
P6_NOP5
"jmp 1f\n"
"ftrace_test_nop5:"
".byte 0x66,0x66,0x66,0x66,0x90\n"
"1:"
".section .fixup, \"ax\"\n"
"2: movl $1, %0\n"
" jmp ftrace_test_nop5\n"
"3: movl $2, %0\n"
" jmp 1b\n"
".previous\n"
_ASM_EXTABLE(ftrace_test_p6nop, 2b)
_ASM_EXTABLE(ftrace_test_nop5, 3b)
: "=r"(faulted) : "0" (faulted));
switch (faulted) {
case 0:
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
break;
case 1:
pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
break;
case 2:
pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
break;
}
}
#endif
This diff is collapsed.
...@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids ...@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(DTLB) ] = { [ C(DTLB) ] = {
[ C(OP_READ) ] = { [ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
[ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
}, },
[ C(OP_WRITE) ] = { [ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0, [ C(RESULT_ACCESS) ] = 0,
...@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids ...@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(ITLB) ] = { [ C(ITLB) ] = {
[ C(OP_READ) ] = { [ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
[ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
}, },
[ C(OP_WRITE) ] = { [ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1, [ C(RESULT_ACCESS) ] = -1,
......
...@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
struct cpu_hw_events *cpuc; struct cpu_hw_events *cpuc;
int bit, loops; int bit, loops;
u64 status; u64 status;
int handled = 0; int handled;
perf_sample_data_init(&data, 0); perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events); cpuc = &__get_cpu_var(cpu_hw_events);
intel_pmu_disable_all(); intel_pmu_disable_all();
intel_pmu_drain_bts_buffer(); handled = intel_pmu_drain_bts_buffer();
status = intel_pmu_get_status(); status = intel_pmu_get_status();
if (!status) { if (!status) {
intel_pmu_enable_all(0); intel_pmu_enable_all(0);
return 0; return handled;
} }
loops = 0; loops = 0;
...@@ -763,7 +763,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -763,7 +763,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
data.period = event->hw.last_period; data.period = event->hw.last_period;
if (perf_event_overflow(event, 1, &data, regs)) if (perf_event_overflow(event, 1, &data, regs))
x86_pmu_stop(event); x86_pmu_stop(event, 0);
} }
/* /*
......
...@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void) ...@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
update_debugctlmsr(debugctlmsr); update_debugctlmsr(debugctlmsr);
} }
static void intel_pmu_drain_bts_buffer(void) static int intel_pmu_drain_bts_buffer(void)
{ {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds; struct debug_store *ds = cpuc->ds;
...@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void) ...@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
struct pt_regs regs; struct pt_regs regs;
if (!event) if (!event)
return; return 0;
if (!ds) if (!ds)
return; return 0;
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
top = (struct bts_record *)(unsigned long)ds->bts_index; top = (struct bts_record *)(unsigned long)ds->bts_index;
if (top <= at) if (top <= at)
return; return 0;
ds->bts_index = ds->bts_buffer_base; ds->bts_index = ds->bts_buffer_base;
...@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void) ...@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
perf_prepare_sample(&header, &data, event, &regs); perf_prepare_sample(&header, &data, event, &regs);
if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
return; return 1;
for (; at < top; at++) { for (; at < top; at++) {
data.ip = at->from; data.ip = at->from;
...@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void) ...@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
/* There's new data available. */ /* There's new data available. */
event->hw.interrupts++; event->hw.interrupts++;
event->pending_kill = POLL_IN; event->pending_kill = POLL_IN;
return 1;
} }
/* /*
...@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, ...@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.flags &= ~PERF_EFLAGS_EXACT; regs.flags &= ~PERF_EFLAGS_EXACT;
if (perf_event_overflow(event, 1, &data, &regs)) if (perf_event_overflow(event, 1, &data, &regs))
x86_pmu_stop(event); x86_pmu_stop(event, 0);
} }
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
......
This diff is collapsed.
...@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \ ...@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt spurious_interrupt smp_spurious_interrupt
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_IRQ_WORK
apicinterrupt LOCAL_PENDING_VECTOR \ apicinterrupt IRQ_WORK_VECTOR \
perf_pending_interrupt smp_perf_pending_interrupt irq_work_interrupt smp_irq_work_interrupt
#endif #endif
/* /*
......
...@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) ...@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
return mod_code_status; return mod_code_status;
} }
static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
static unsigned char *ftrace_nop_replace(void) static unsigned char *ftrace_nop_replace(void)
{ {
return ftrace_nop; return ideal_nop5;
} }
static int static int
...@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ...@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
int __init ftrace_dyn_arch_init(void *data) int __init ftrace_dyn_arch_init(void *data)
{ {
extern const unsigned char ftrace_test_p6nop[];
extern const unsigned char ftrace_test_nop5[];
extern const unsigned char ftrace_test_jmp[];
int faulted = 0;
/*
* There is no good nop for all x86 archs.
* We will default to using the P6_NOP5, but first we
* will test to make sure that the nop will actually
* work on this CPU. If it faults, we will then
* go to a lesser efficient 5 byte nop. If that fails
* we then just use a jmp as our nop. This isn't the most
* efficient nop, but we can not use a multi part nop
* since we would then risk being preempted in the middle
* of that nop, and if we enabled tracing then, it might
* cause a system crash.
*
* TODO: check the cpuid to determine the best nop.
*/
asm volatile (
"ftrace_test_jmp:"
"jmp ftrace_test_p6nop\n"
"nop\n"
"nop\n"
"nop\n" /* 2 byte jmp + 3 bytes */
"ftrace_test_p6nop:"
P6_NOP5
"jmp 1f\n"
"ftrace_test_nop5:"
".byte 0x66,0x66,0x66,0x66,0x90\n"
"1:"
".section .fixup, \"ax\"\n"
"2: movl $1, %0\n"
" jmp ftrace_test_nop5\n"
"3: movl $2, %0\n"
" jmp 1b\n"
".previous\n"
_ASM_EXTABLE(ftrace_test_p6nop, 2b)
_ASM_EXTABLE(ftrace_test_nop5, 3b)
: "=r"(faulted) : "0" (faulted));
switch (faulted) {
case 0:
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
break;
case 1:
pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
break;
case 2:
pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
break;
}
/* The return code is retured via data */ /* The return code is retured via data */
*(unsigned long *)data = 0; *(unsigned long *)data = 0;
......
...@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) ...@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
for_each_online_cpu(j) for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
seq_printf(p, " Performance monitoring interrupts\n"); seq_printf(p, " Performance monitoring interrupts\n");
seq_printf(p, "%*s: ", prec, "PND"); seq_printf(p, "%*s: ", prec, "IWI");
for_each_online_cpu(j) for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
seq_printf(p, " Performance pending work\n"); seq_printf(p, " IRQ work interrupts\n");
#endif #endif
if (x86_platform_ipi_callback) { if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT"); seq_printf(p, "%*s: ", prec, "PLT");
...@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) ...@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count; sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs; sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_pending_irqs; sum += irq_stats(cpu)->apic_irq_work_irqs;
#endif #endif
if (x86_platform_ipi_callback) if (x86_platform_ipi_callback)
sum += irq_stats(cpu)->x86_platform_ipis; sum += irq_stats(cpu)->x86_platform_ipis;
......
/*
* x86 specific code for irq_work
*
* Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
*/
#include <linux/kernel.h>
#include <linux/irq_work.h>
#include <linux/hardirq.h>
#include <asm/apic.h>
void smp_irq_work_interrupt(struct pt_regs *regs)
{
irq_enter();
ack_APIC_irq();
inc_irq_stat(apic_irq_work_irqs);
irq_work_run();
irq_exit();
}
void arch_irq_work_raise(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
if (!cpu_has_apic)
return;
apic->send_IPI_self(IRQ_WORK_VECTOR);
apic_wait_icr_idle();
#endif
}
...@@ -224,9 +224,9 @@ static void __init apic_intr_init(void) ...@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
/* Performance monitoring interrupts: */ /* IRQ work interrupts: */
# ifdef CONFIG_PERF_EVENTS # ifdef CONFIG_IRQ_WORK
alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
# endif # endif
#endif #endif
......
/*
* jump label x86 support
*
* Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
*
*/
#include <linux/jump_label.h>
#include <linux/memory.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/cpu.h>
#include <asm/kprobes.h>
#include <asm/alternative.h>
#ifdef HAVE_JUMP_LABEL
union jump_code_union {
char code[JUMP_LABEL_NOP_SIZE];
struct {
char jump;
int offset;
} __attribute__((packed));
};
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
union jump_code_union code;
if (type == JUMP_LABEL_ENABLE) {
code.jump = 0xe9;
code.offset = entry->target -
(entry->code + JUMP_LABEL_NOP_SIZE);
} else
memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
get_online_cpus();
mutex_lock(&text_mutex);
text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
mutex_unlock(&text_mutex);
put_online_cpus();
}
void arch_jump_label_text_poke_early(jump_label_t addr)
{
text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
}
#endif
This diff is collapsed.
...@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr, ...@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr,
apply_paravirt(pseg, pseg + para->sh_size); apply_paravirt(pseg, pseg + para->sh_size);
} }
/* make jump label nops */
jump_label_apply_nops(me);
return 0; return 0;
} }
......
...@@ -112,6 +112,7 @@ ...@@ -112,6 +112,7 @@
#include <asm/numa_64.h> #include <asm/numa_64.h>
#endif #endif
#include <asm/mce.h> #include <asm/mce.h>
#include <asm/alternative.h>
/* /*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
...@@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p) ...@@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p)
{ {
int acpi = 0; int acpi = 0;
int k8 = 0; int k8 = 0;
unsigned long flags;
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
...@@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p) ...@@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.banner(); x86_init.oem.banner();
mcheck_init(); mcheck_init();
local_irq_save(flags);
arch_init_ideal_nop5();
local_irq_restore(flags);
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
......
This diff is collapsed.
...@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, ...@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
if (!pte) if (!pte)
return false; return false;
WARN_ON_ONCE(in_nmi());
if (error_code & 2) if (error_code & 2)
kmemcheck_access(regs, address, KMEMCHECK_WRITE); kmemcheck_access(regs, address, KMEMCHECK_WRITE);
else else
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment