Commit a6a0683b authored by Viresh Kumar's avatar Viresh Kumar

arch: x86: Remove CONFIG_OPROFILE support

The "oprofile" user-space tools don't use the kernel OPROFILE support
any more, and haven't in a long time. User-space has been converted to
the perf interfaces.

Remove the old oprofile's architecture specific support.
Suggested-by: default avatarChristoph Hellwig <hch@infradead.org>
Suggested-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarViresh Kumar <viresh.kumar@linaro.org>
Acked-by: default avatarRobert Richter <rric@kernel.org>
Acked-by: default avatarWilliam Cohen <wcohen@redhat.com>
Acked-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
Acked-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 2083fecd
...@@ -206,7 +206,6 @@ config X86 ...@@ -206,7 +206,6 @@ config X86
select HAVE_MOVE_PMD select HAVE_MOVE_PMD
select HAVE_MOVE_PUD select HAVE_MOVE_PUD
select HAVE_NMI select HAVE_NMI
select HAVE_OPROFILE
select HAVE_OPTPROBES select HAVE_OPTPROBES
select HAVE_PCSPKR_PLATFORM select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
......
...@@ -229,9 +229,6 @@ core-y += arch/x86/ ...@@ -229,9 +229,6 @@ core-y += arch/x86/
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
drivers-$(CONFIG_PCI) += arch/x86/pci/ drivers-$(CONFIG_PCI) += arch/x86/pci/
# must be linked after kernel/
drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
# suspend and hibernation support # suspend and hibernation support
drivers-$(CONFIG_PM) += arch/x86/power/ drivers-$(CONFIG_PM) += arch/x86/power/
......
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int reserve_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int);
extern int reserve_evntsel_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int);
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* local apic based NMI watchdog for various CPUs. * local apic based NMI watchdog for various CPUs.
* *
* This file also handles reservation of performance counters for coordination * This file also handles reservation of performance counters for coordination
* with other users (like oprofile). * with other users.
* *
* Note that these events normally don't tick when the CPU idles. This means * Note that these events normally don't tick when the CPU idles. This means
* the frequency varies with CPU load. * the frequency varies with CPU load.
...@@ -105,15 +105,6 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) ...@@ -105,15 +105,6 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
} }
/* checks for a bit availability (hack for oprofile) */
int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
{
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
return !test_bit(counter, perfctr_nmi_owner);
}
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
int reserve_perfctr_nmi(unsigned int msr) int reserve_perfctr_nmi(unsigned int msr)
{ {
unsigned int counter; unsigned int counter;
......
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o \
timer_int.o nmi_timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
op_model_ppro.o op_model_p4.o
/**
* @file backtrace.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
* @author David Smith
*/
#include <linux/oprofile.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/compat.h>
#include <linux/uaccess.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
#ifdef CONFIG_COMPAT
static struct stack_frame_ia32 *
dump_user_backtrace_32(struct stack_frame_ia32 *head)
{
/* Also check accessibility of one struct frame_head beyond: */
struct stack_frame_ia32 bufhead[2];
struct stack_frame_ia32 *fp;
unsigned long bytes;
bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
if (bytes != 0)
return NULL;
fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
oprofile_add_trace(bufhead[0].return_address);
/* frame pointers should strictly progress back up the stack
* (towards higher addresses) */
if (head >= fp)
return NULL;
return fp;
}
static inline int
x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
{
struct stack_frame_ia32 *head;
/* User process is IA32 */
if (!current || user_64bit_mode(regs))
return 0;
head = (struct stack_frame_ia32 *) regs->bp;
while (depth-- && head)
head = dump_user_backtrace_32(head);
return 1;
}
#else
static inline int
x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
{
return 0;
}
#endif /* CONFIG_COMPAT */
static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
{
/* Also check accessibility of one struct frame_head beyond: */
struct stack_frame bufhead[2];
unsigned long bytes;
bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
if (bytes != 0)
return NULL;
oprofile_add_trace(bufhead[0].return_address);
/* frame pointers should strictly progress back up the stack
* (towards higher addresses) */
if (head >= bufhead[0].next_frame)
return NULL;
return bufhead[0].next_frame;
}
void
x86_backtrace(struct pt_regs * const regs, unsigned int depth)
{
struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
if (!user_mode(regs)) {
struct unwind_state state;
unsigned long addr;
if (!depth)
return;
oprofile_add_trace(regs->ip);
if (!--depth)
return;
for (unwind_start(&state, current, regs, NULL);
!unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
if (!addr)
break;
oprofile_add_trace(addr);
if (!--depth)
break;
}
return;
}
if (x86_backtrace_32(regs, depth))
return;
while (depth-- && head)
head = dump_user_backtrace(head);
}
/**
* @file init.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*/
#include <linux/oprofile.h>
#include <linux/init.h>
#include <linux/errno.h>
/*
* We support CPUs that have performance counters like the Pentium Pro
* with the NMI mode driver.
*/
#ifdef CONFIG_X86_LOCAL_APIC
extern int op_nmi_init(struct oprofile_operations *ops);
extern void op_nmi_exit(void);
#else
static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
static void op_nmi_exit(void) { }
#endif
extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
ops->backtrace = x86_backtrace;
return op_nmi_init(ops);
}
void oprofile_arch_exit(void)
{
op_nmi_exit();
}
/**
* @file nmi_int.c
*
* @remark Copyright 2002-2009 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
* @author Robert Richter <robert.richter@amd.com>
* @author Barry Kasindorf <barry.kasindorf@amd.com>
* @author Jason Yeh <jason.yeh@amd.com>
* @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
*/
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
#include <linux/syscore_ops.h>
#include <linux/slab.h>
#include <linux/moduleparam.h>
#include <linux/kdebug.h>
#include <linux/cpu.h>
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
#include "op_counter.h"
#include "op_x86_model.h"
static struct op_x86_model_spec *model;
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
/* must be protected with get_online_cpus()/put_online_cpus(): */
static int nmi_enabled;
static int ctr_running;
struct op_counter_config counter_config[OP_MAX_COUNTER];
/* common functions */
u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
struct op_counter_config *counter_config)
{
u64 val = 0;
u16 event = (u16)counter_config->event;
val |= ARCH_PERFMON_EVENTSEL_INT;
val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
val |= (counter_config->unit_mask & 0xFF) << 8;
counter_config->extra &= (ARCH_PERFMON_EVENTSEL_INV |
ARCH_PERFMON_EVENTSEL_EDGE |
ARCH_PERFMON_EVENTSEL_CMASK);
val |= counter_config->extra;
event &= model->event_mask ? model->event_mask : 0xFF;
val |= event & 0xFF;
val |= (u64)(event & 0x0F00) << 24;
return val;
}
static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs)
{
if (ctr_running)
model->check_ctrs(regs, this_cpu_ptr(&cpu_msrs));
else if (!nmi_enabled)
return NMI_DONE;
else
model->stop(this_cpu_ptr(&cpu_msrs));
return NMI_HANDLED;
}
static void nmi_cpu_save_registers(struct op_msrs *msrs)
{
struct op_msr *counters = msrs->counters;
struct op_msr *controls = msrs->controls;
unsigned int i;
for (i = 0; i < model->num_counters; ++i) {
if (counters[i].addr)
rdmsrl(counters[i].addr, counters[i].saved);
}
for (i = 0; i < model->num_controls; ++i) {
if (controls[i].addr)
rdmsrl(controls[i].addr, controls[i].saved);
}
}
static void nmi_cpu_start(void *dummy)
{
struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs);
if (!msrs->controls)
WARN_ON_ONCE(1);
else
model->start(msrs);
}
static int nmi_start(void)
{
get_online_cpus();
ctr_running = 1;
/* make ctr_running visible to the nmi handler: */
smp_mb();
on_each_cpu(nmi_cpu_start, NULL, 1);
put_online_cpus();
return 0;
}
static void nmi_cpu_stop(void *dummy)
{
struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs);
if (!msrs->controls)
WARN_ON_ONCE(1);
else
model->stop(msrs);
}
static void nmi_stop(void)
{
get_online_cpus();
on_each_cpu(nmi_cpu_stop, NULL, 1);
ctr_running = 0;
put_online_cpus();
}
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
static DEFINE_PER_CPU(int, switch_index);
static inline int has_mux(void)
{
return !!model->switch_ctrl;
}
inline int op_x86_phys_to_virt(int phys)
{
return __this_cpu_read(switch_index) + phys;
}
inline int op_x86_virt_to_phys(int virt)
{
return virt % model->num_counters;
}
static void nmi_shutdown_mux(void)
{
int i;
if (!has_mux())
return;
for_each_possible_cpu(i) {
kfree(per_cpu(cpu_msrs, i).multiplex);
per_cpu(cpu_msrs, i).multiplex = NULL;
per_cpu(switch_index, i) = 0;
}
}
static int nmi_setup_mux(void)
{
size_t multiplex_size =
sizeof(struct op_msr) * model->num_virt_counters;
int i;
if (!has_mux())
return 1;
for_each_possible_cpu(i) {
per_cpu(cpu_msrs, i).multiplex =
kzalloc(multiplex_size, GFP_KERNEL);
if (!per_cpu(cpu_msrs, i).multiplex)
return 0;
}
return 1;
}
static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
{
int i;
struct op_msr *multiplex = msrs->multiplex;
if (!has_mux())
return;
for (i = 0; i < model->num_virt_counters; ++i) {
if (counter_config[i].enabled) {
multiplex[i].saved = -(u64)counter_config[i].count;
} else {
multiplex[i].saved = 0;
}
}
per_cpu(switch_index, cpu) = 0;
}
static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
{
struct op_msr *counters = msrs->counters;
struct op_msr *multiplex = msrs->multiplex;
int i;
for (i = 0; i < model->num_counters; ++i) {
int virt = op_x86_phys_to_virt(i);
if (counters[i].addr)
rdmsrl(counters[i].addr, multiplex[virt].saved);
}
}
static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
{
struct op_msr *counters = msrs->counters;
struct op_msr *multiplex = msrs->multiplex;
int i;
for (i = 0; i < model->num_counters; ++i) {
int virt = op_x86_phys_to_virt(i);
if (counters[i].addr)
wrmsrl(counters[i].addr, multiplex[virt].saved);
}
}
static void nmi_cpu_switch(void *dummy)
{
int cpu = smp_processor_id();
int si = per_cpu(switch_index, cpu);
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
nmi_cpu_stop(NULL);
nmi_cpu_save_mpx_registers(msrs);
/* move to next set */
si += model->num_counters;
if ((si >= model->num_virt_counters) || (counter_config[si].count == 0))
per_cpu(switch_index, cpu) = 0;
else
per_cpu(switch_index, cpu) = si;
model->switch_ctrl(model, msrs);
nmi_cpu_restore_mpx_registers(msrs);
nmi_cpu_start(NULL);
}
/*
* Quick check to see if multiplexing is necessary.
* The check should be sufficient since counters are used
* in ordre.
*/
static int nmi_multiplex_on(void)
{
return counter_config[model->num_counters].count ? 0 : -EINVAL;
}
static int nmi_switch_event(void)
{
if (!has_mux())
return -ENOSYS; /* not implemented */
if (nmi_multiplex_on() < 0)
return -EINVAL; /* not necessary */
get_online_cpus();
if (ctr_running)
on_each_cpu(nmi_cpu_switch, NULL, 1);
put_online_cpus();
return 0;
}
static inline void mux_init(struct oprofile_operations *ops)
{
if (has_mux())
ops->switch_events = nmi_switch_event;
}
static void mux_clone(int cpu)
{
if (!has_mux())
return;
memcpy(per_cpu(cpu_msrs, cpu).multiplex,
per_cpu(cpu_msrs, 0).multiplex,
sizeof(struct op_msr) * model->num_virt_counters);
}
#else
inline int op_x86_phys_to_virt(int phys) { return phys; }
inline int op_x86_virt_to_phys(int virt) { return virt; }
static inline void nmi_shutdown_mux(void) { }
static inline int nmi_setup_mux(void) { return 1; }
static inline void
nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
static inline void mux_init(struct oprofile_operations *ops) { }
static void mux_clone(int cpu) { }
#endif
static void free_msrs(void)
{
int i;
for_each_possible_cpu(i) {
kfree(per_cpu(cpu_msrs, i).counters);
per_cpu(cpu_msrs, i).counters = NULL;
kfree(per_cpu(cpu_msrs, i).controls);
per_cpu(cpu_msrs, i).controls = NULL;
}
nmi_shutdown_mux();
}
static int allocate_msrs(void)
{
size_t controls_size = sizeof(struct op_msr) * model->num_controls;
size_t counters_size = sizeof(struct op_msr) * model->num_counters;
int i;
for_each_possible_cpu(i) {
per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
GFP_KERNEL);
if (!per_cpu(cpu_msrs, i).counters)
goto fail;
per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
GFP_KERNEL);
if (!per_cpu(cpu_msrs, i).controls)
goto fail;
}
if (!nmi_setup_mux())
goto fail;
return 1;
fail:
free_msrs();
return 0;
}
static void nmi_cpu_setup(void)
{
int cpu = smp_processor_id();
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
nmi_cpu_save_registers(msrs);
raw_spin_lock(&oprofilefs_lock);
model->setup_ctrs(model, msrs);
nmi_cpu_setup_mux(cpu, msrs);
raw_spin_unlock(&oprofilefs_lock);
per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
static void nmi_cpu_restore_registers(struct op_msrs *msrs)
{
struct op_msr *counters = msrs->counters;
struct op_msr *controls = msrs->controls;
unsigned int i;
for (i = 0; i < model->num_controls; ++i) {
if (controls[i].addr)
wrmsrl(controls[i].addr, controls[i].saved);
}
for (i = 0; i < model->num_counters; ++i) {
if (counters[i].addr)
wrmsrl(counters[i].addr, counters[i].saved);
}
}
static void nmi_cpu_shutdown(void)
{
unsigned int v;
int cpu = smp_processor_id();
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
/* restoring APIC_LVTPC can trigger an apic error because the delivery
* mode and vector nr combination can be illegal. That's by design: on
* power on apic lvt contain a zero vector nr which are legal only for
* NMI delivery mode. So inhibit apic err before restoring lvtpc
*/
v = apic_read(APIC_LVTERR);
apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
apic_write(APIC_LVTERR, v);
nmi_cpu_restore_registers(msrs);
}
static int nmi_cpu_online(unsigned int cpu)
{
local_irq_disable();
if (nmi_enabled)
nmi_cpu_setup();
if (ctr_running)
nmi_cpu_start(NULL);
local_irq_enable();
return 0;
}
static int nmi_cpu_down_prep(unsigned int cpu)
{
local_irq_disable();
if (ctr_running)
nmi_cpu_stop(NULL);
if (nmi_enabled)
nmi_cpu_shutdown();
local_irq_enable();
return 0;
}
static int nmi_create_files(struct dentry *root)
{
unsigned int i;
for (i = 0; i < model->num_virt_counters; ++i) {
struct dentry *dir;
char buf[4];
/* quick little hack to _not_ expose a counter if it is not
* available for use. This should protect userspace app.
* NOTE: assumes 1:1 mapping here (that counters are organized
* sequentially in their struct assignment).
*/
if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i)))
continue;
snprintf(buf, sizeof(buf), "%d", i);
dir = oprofilefs_mkdir(root, buf);
oprofilefs_create_ulong(dir, "enabled", &counter_config[i].enabled);
oprofilefs_create_ulong(dir, "event", &counter_config[i].event);
oprofilefs_create_ulong(dir, "count", &counter_config[i].count);
oprofilefs_create_ulong(dir, "unit_mask", &counter_config[i].unit_mask);
oprofilefs_create_ulong(dir, "kernel", &counter_config[i].kernel);
oprofilefs_create_ulong(dir, "user", &counter_config[i].user);
oprofilefs_create_ulong(dir, "extra", &counter_config[i].extra);
}
return 0;
}
static enum cpuhp_state cpuhp_nmi_online;
static int nmi_setup(void)
{
int err = 0;
int cpu;
if (!allocate_msrs())
return -ENOMEM;
/* We need to serialize save and setup for HT because the subset
* of msrs are distinct for save and setup operations
*/
/* Assume saved/restored counters are the same on all CPUs */
err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
if (err)
goto fail;
for_each_possible_cpu(cpu) {
if (!IS_ENABLED(CONFIG_SMP) || !cpu)
continue;
memcpy(per_cpu(cpu_msrs, cpu).counters,
per_cpu(cpu_msrs, 0).counters,
sizeof(struct op_msr) * model->num_counters);
memcpy(per_cpu(cpu_msrs, cpu).controls,
per_cpu(cpu_msrs, 0).controls,
sizeof(struct op_msr) * model->num_controls);
mux_clone(cpu);
}
nmi_enabled = 0;
ctr_running = 0;
/* make variables visible to the nmi handler: */
smp_mb();
err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify,
0, "oprofile");
if (err)
goto fail;
nmi_enabled = 1;
/* make nmi_enabled visible to the nmi handler: */
smp_mb();
err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/oprofile:online",
nmi_cpu_online, nmi_cpu_down_prep);
if (err < 0)
goto fail_nmi;
cpuhp_nmi_online = err;
return 0;
fail_nmi:
unregister_nmi_handler(NMI_LOCAL, "oprofile");
fail:
free_msrs();
return err;
}
static void nmi_shutdown(void)
{
struct op_msrs *msrs;
cpuhp_remove_state(cpuhp_nmi_online);
nmi_enabled = 0;
ctr_running = 0;
/* make variables visible to the nmi handler: */
smp_mb();
unregister_nmi_handler(NMI_LOCAL, "oprofile");
msrs = &get_cpu_var(cpu_msrs);
model->shutdown(msrs);
free_msrs();
put_cpu_var(cpu_msrs);
}
#ifdef CONFIG_PM
static int nmi_suspend(void)
{
/* Only one CPU left, just stop that one */
if (nmi_enabled == 1)
nmi_cpu_stop(NULL);
return 0;
}
static void nmi_resume(void)
{
if (nmi_enabled == 1)
nmi_cpu_start(NULL);
}
static struct syscore_ops oprofile_syscore_ops = {
.resume = nmi_resume,
.suspend = nmi_suspend,
};
static void __init init_suspend_resume(void)
{
register_syscore_ops(&oprofile_syscore_ops);
}
static void exit_suspend_resume(void)
{
unregister_syscore_ops(&oprofile_syscore_ops);
}
#else
static inline void init_suspend_resume(void) { }
static inline void exit_suspend_resume(void) { }
#endif /* CONFIG_PM */
static int __init p4_init(char **cpu_type)
{
__u8 cpu_model = boot_cpu_data.x86_model;
if (cpu_model > 6 || cpu_model == 5)
return 0;
#ifndef CONFIG_SMP
*cpu_type = "i386/p4";
model = &op_p4_spec;
return 1;
#else
switch (smp_num_siblings) {
case 1:
*cpu_type = "i386/p4";
model = &op_p4_spec;
return 1;
case 2:
*cpu_type = "i386/p4-ht";
model = &op_p4_ht2_spec;
return 1;
}
#endif
printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
return 0;
}
enum __force_cpu_type {
reserved = 0, /* do not force */
timer,
arch_perfmon,
};
static int force_cpu_type;
static int set_cpu_type(const char *str, const struct kernel_param *kp)
{
if (!strcmp(str, "timer")) {
force_cpu_type = timer;
printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
} else if (!strcmp(str, "arch_perfmon")) {
force_cpu_type = arch_perfmon;
printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
} else {
force_cpu_type = 0;
}
return 0;
}
module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
static int __init ppro_init(char **cpu_type)
{
__u8 cpu_model = boot_cpu_data.x86_model;
struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
if (force_cpu_type == arch_perfmon && boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
return 0;
/*
* Documentation on identifying Intel processors by CPU family
* and model can be found in the Intel Software Developer's
* Manuals (SDM):
*
* http://www.intel.com/products/processor/manuals/
*
* As of May 2010 the documentation for this was in the:
* "Intel 64 and IA-32 Architectures Software Developer's
* Manual Volume 3B: System Programming Guide", "Table B-1
* CPUID Signature Values of DisplayFamily_DisplayModel".
*/
switch (cpu_model) {
case 0 ... 2:
*cpu_type = "i386/ppro";
break;
case 3 ... 5:
*cpu_type = "i386/pii";
break;
case 6 ... 8:
case 10 ... 11:
*cpu_type = "i386/piii";
break;
case 9:
case 13:
*cpu_type = "i386/p6_mobile";
break;
case 14:
*cpu_type = "i386/core";
break;
case 0x0f:
case 0x16:
case 0x17:
case 0x1d:
*cpu_type = "i386/core_2";
break;
case 0x1a:
case 0x1e:
case 0x2e:
spec = &op_arch_perfmon_spec;
*cpu_type = "i386/core_i7";
break;
case 0x1c:
*cpu_type = "i386/atom";
break;
default:
/* Unknown */
return 0;
}
model = spec;
return 1;
}
int __init op_nmi_init(struct oprofile_operations *ops)
{
__u8 vendor = boot_cpu_data.x86_vendor;
__u8 family = boot_cpu_data.x86;
char *cpu_type = NULL;
int ret = 0;
if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
if (force_cpu_type == timer)
return -ENODEV;
switch (vendor) {
case X86_VENDOR_AMD:
/* Needs to be at least an Athlon (or hammer in 32bit mode) */
switch (family) {
case 6:
cpu_type = "i386/athlon";
break;
case 0xf:
/*
* Actually it could be i386/hammer too, but
* give user space an consistent name.
*/
cpu_type = "x86-64/hammer";
break;
case 0x10:
cpu_type = "x86-64/family10";
break;
case 0x11:
cpu_type = "x86-64/family11h";
break;
case 0x12:
cpu_type = "x86-64/family12h";
break;
case 0x14:
cpu_type = "x86-64/family14h";
break;
case 0x15:
cpu_type = "x86-64/family15h";
break;
default:
return -ENODEV;
}
model = &op_amd_spec;
break;
case X86_VENDOR_INTEL:
switch (family) {
/* Pentium IV */
case 0xf:
p4_init(&cpu_type);
break;
/* A P6-class processor */
case 6:
ppro_init(&cpu_type);
break;
default:
break;
}
if (cpu_type)
break;
if (!boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
return -ENODEV;
/* use arch perfmon as fallback */
cpu_type = "i386/arch_perfmon";
model = &op_arch_perfmon_spec;
break;
default:
return -ENODEV;
}
/* default values, can be overwritten by model */
ops->create_files = nmi_create_files;
ops->setup = nmi_setup;
ops->shutdown = nmi_shutdown;
ops->start = nmi_start;
ops->stop = nmi_stop;
ops->cpu_type = cpu_type;
if (model->init)
ret = model->init(ops);
if (ret)
return ret;
if (!model->num_virt_counters)
model->num_virt_counters = model->num_counters;
mux_init(ops);
init_suspend_resume();
printk(KERN_INFO "oprofile: using NMI interrupt.\n");
return 0;
}
void op_nmi_exit(void)
{
exit_suspend_resume();
}
/**
* @file op_counter.h
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
*/
#ifndef OP_COUNTER_H
#define OP_COUNTER_H
#define OP_MAX_COUNTER 32
/* Per-perfctr configuration as set via
* oprofilefs.
*/
struct op_counter_config {
unsigned long count;
unsigned long enabled;
unsigned long event;
unsigned long kernel;
unsigned long user;
unsigned long unit_mask;
unsigned long extra;
};
extern struct op_counter_config counter_config[];
#endif /* OP_COUNTER_H */
/*
* @file op_model_amd.c
* athlon / K7 / K8 / Family 10h model-specific MSR operations
*
* @remark Copyright 2002-2009 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
* @author Philippe Elie
* @author Graydon Hoare
* @author Robert Richter <robert.richter@amd.com>
* @author Barry Kasindorf <barry.kasindorf@amd.com>
* @author Jason Yeh <jason.yeh@amd.com>
* @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
*/
#include <linux/oprofile.h>
#include <linux/device.h>
#include <linux/pci.h>
#include <linux/percpu.h>
#include <asm/ptrace.h>
#include <asm/msr.h>
#include <asm/nmi.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include "op_x86_model.h"
#include "op_counter.h"
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
#define NUM_VIRT_COUNTERS 32
#else
#define NUM_VIRT_COUNTERS 0
#endif
#define OP_EVENT_MASK 0x0FFF
#define OP_CTR_OVERFLOW (1ULL<<31)
#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
static int num_counters;
static unsigned long reset_value[OP_MAX_COUNTER];
#define IBS_FETCH_SIZE 6
#define IBS_OP_SIZE 12
static u32 ibs_caps;
struct ibs_config {
unsigned long op_enabled;
unsigned long fetch_enabled;
unsigned long max_cnt_fetch;
unsigned long max_cnt_op;
unsigned long rand_en;
unsigned long dispatched_ops;
unsigned long branch_target;
};
struct ibs_state {
u64 ibs_op_ctl;
int branch_target;
unsigned long sample_size;
};
static struct ibs_config ibs_config;
static struct ibs_state ibs_state;
/*
* IBS randomization macros
*/
#define IBS_RANDOM_BITS 12
#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)
#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))
/*
* 16-bit Linear Feedback Shift Register (LFSR)
*
* 16 14 13 11
* Feedback polynomial = X + X + X + X + 1
*/
static unsigned int lfsr_random(void)
{
static unsigned int lfsr_value = 0xF00D;
unsigned int bit;
/* Compute next bit to shift in */
bit = ((lfsr_value >> 0) ^
(lfsr_value >> 2) ^
(lfsr_value >> 3) ^
(lfsr_value >> 5)) & 0x0001;
/* Advance to next register value */
lfsr_value = (lfsr_value >> 1) | (bit << 15);
return lfsr_value;
}
/*
* IBS software randomization
*
* The IBS periodic op counter is randomized in software. The lower 12
* bits of the 20 bit counter are randomized. IbsOpCurCnt is
* initialized with a 12 bit random value.
*/
static inline u64 op_amd_randomize_ibs_op(u64 val)
{
unsigned int random = lfsr_random();
if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
/*
* Work around if the hw can not write to IbsOpCurCnt
*
* Randomize the lower 8 bits of the 16 bit
* IbsOpMaxCnt [15:0] value in the range of -128 to
* +127 by adding/subtracting an offset to the
* maximum count (IbsOpMaxCnt).
*
* To avoid over or underflows and protect upper bits
* starting at bit 16, the initial value for
* IbsOpMaxCnt must fit in the range from 0x0081 to
* 0xff80.
*/
val += (s8)(random >> 4);
else
val |= (u64)(random & IBS_RANDOM_MASK) << 32;
return val;
}
static inline void
op_amd_handle_ibs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
u64 val, ctl;
struct op_entry entry;
if (!ibs_caps)
return;
if (ibs_config.fetch_enabled) {
rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
if (ctl & IBS_FETCH_VAL) {
rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
oprofile_write_reserve(&entry, regs, val,
IBS_FETCH_CODE, IBS_FETCH_SIZE);
oprofile_add_data64(&entry, val);
oprofile_add_data64(&entry, ctl);
rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
oprofile_add_data64(&entry, val);
oprofile_write_commit(&entry);
/* reenable the IRQ */
ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
ctl |= IBS_FETCH_ENABLE;
wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
}
}
if (ibs_config.op_enabled) {
rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
if (ctl & IBS_OP_VAL) {
rdmsrl(MSR_AMD64_IBSOPRIP, val);
oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
ibs_state.sample_size);
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA, val);
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA2, val);
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA3, val);
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSDCLINAD, val);
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
oprofile_add_data64(&entry, val);
if (ibs_state.branch_target) {
rdmsrl(MSR_AMD64_IBSBRTARGET, val);
oprofile_add_data(&entry, (unsigned long)val);
}
oprofile_write_commit(&entry);
/* reenable the IRQ */
ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
}
}
}
static inline void op_amd_start_ibs(void)
{
u64 val;
if (!ibs_caps)
return;
memset(&ibs_state, 0, sizeof(ibs_state));
/*
* Note: Since the max count settings may out of range we
* write back the actual used values so that userland can read
* it.
*/
if (ibs_config.fetch_enabled) {
val = ibs_config.max_cnt_fetch >> 4;
val = min(val, IBS_FETCH_MAX_CNT);
ibs_config.max_cnt_fetch = val << 4;
val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
val |= IBS_FETCH_ENABLE;
wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
}
if (ibs_config.op_enabled) {
val = ibs_config.max_cnt_op >> 4;
if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
/*
* IbsOpCurCnt not supported. See
* op_amd_randomize_ibs_op() for details.
*/
val = clamp(val, 0x0081ULL, 0xFF80ULL);
ibs_config.max_cnt_op = val << 4;
} else {
/*
* The start value is randomized with a
* positive offset, we need to compensate it
* with the half of the randomized range. Also
* avoid underflows.
*/
val += IBS_RANDOM_MAXCNT_OFFSET;
if (ibs_caps & IBS_CAPS_OPCNTEXT)
val = min(val, IBS_OP_MAX_CNT_EXT);
else
val = min(val, IBS_OP_MAX_CNT);
ibs_config.max_cnt_op =
(val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
}
val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
val |= IBS_OP_ENABLE;
ibs_state.ibs_op_ctl = val;
ibs_state.sample_size = IBS_OP_SIZE;
if (ibs_config.branch_target) {
ibs_state.branch_target = 1;
ibs_state.sample_size++;
}
val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, val);
}
}
static void op_amd_stop_ibs(void)
{
if (!ibs_caps)
return;
if (ibs_config.fetch_enabled)
/* clear max count and enable */
wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
if (ibs_config.op_enabled)
/* clear max count and enable */
wrmsrl(MSR_AMD64_IBSOPCTL, 0);
}
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
struct op_msrs const * const msrs)
{
u64 val;
int i;
/* enable active counters */
for (i = 0; i < num_counters; ++i) {
int virt = op_x86_phys_to_virt(i);
if (!reset_value[virt])
continue;
rdmsrl(msrs->controls[i].addr, val);
val &= model->reserved;
val |= op_x86_get_ctrl(model, &counter_config[virt]);
wrmsrl(msrs->controls[i].addr, val);
}
}
#endif
/* functions for op_amd_spec */
static void op_amd_shutdown(struct op_msrs const * const msrs)
{
int i;
for (i = 0; i < num_counters; ++i) {
if (!msrs->counters[i].addr)
continue;
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
}
static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
{
int i;
for (i = 0; i < num_counters; i++) {
if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
goto fail;
if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
goto fail;
}
/* both registers must be reserved */
if (num_counters == AMD64_NUM_COUNTERS_CORE) {
msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
} else {
msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
}
continue;
fail:
if (!counter_config[i].enabled)
continue;
op_x86_warn_reserved(i);
op_amd_shutdown(msrs);
return -EBUSY;
}
return 0;
}
static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
struct op_msrs const * const msrs)
{
u64 val;
int i;
/* setup reset_value */
for (i = 0; i < OP_MAX_COUNTER; ++i) {
if (counter_config[i].enabled
&& msrs->counters[op_x86_virt_to_phys(i)].addr)
reset_value[i] = counter_config[i].count;
else
reset_value[i] = 0;
}
/* clear all counters */
for (i = 0; i < num_counters; ++i) {
if (!msrs->controls[i].addr)
continue;
rdmsrl(msrs->controls[i].addr, val);
if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
op_x86_warn_in_use(i);
val &= model->reserved;
wrmsrl(msrs->controls[i].addr, val);
/*
* avoid a false detection of ctr overflows in NMI
* handler
*/
wrmsrl(msrs->counters[i].addr, -1LL);
}
/* enable active counters */
for (i = 0; i < num_counters; ++i) {
int virt = op_x86_phys_to_virt(i);
if (!reset_value[virt])
continue;
/* setup counter registers */
wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
/* setup control registers */
rdmsrl(msrs->controls[i].addr, val);
val &= model->reserved;
val |= op_x86_get_ctrl(model, &counter_config[virt]);
wrmsrl(msrs->controls[i].addr, val);
}
}
static int op_amd_check_ctrs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
u64 val;
int i;
for (i = 0; i < num_counters; ++i) {
int virt = op_x86_phys_to_virt(i);
if (!reset_value[virt])
continue;
rdmsrl(msrs->counters[i].addr, val);
/* bit is clear if overflowed: */
if (val & OP_CTR_OVERFLOW)
continue;
oprofile_add_sample(regs, virt);
wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
}
op_amd_handle_ibs(regs, msrs);
/* See op_model_ppro.c */
return 1;
}
static void op_amd_start(struct op_msrs const * const msrs)
{
u64 val;
int i;
for (i = 0; i < num_counters; ++i) {
if (!reset_value[op_x86_phys_to_virt(i)])
continue;
rdmsrl(msrs->controls[i].addr, val);
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
}
op_amd_start_ibs();
}
static void op_amd_stop(struct op_msrs const * const msrs)
{
u64 val;
int i;
/*
* Subtle: stop on all counters to avoid race with setting our
* pm callback
*/
for (i = 0; i < num_counters; ++i) {
if (!reset_value[op_x86_phys_to_virt(i)])
continue;
rdmsrl(msrs->controls[i].addr, val);
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
}
op_amd_stop_ibs();
}
/*
* check and reserve APIC extended interrupt LVT offset for IBS if
* available
*/
static void init_ibs(void)
{
ibs_caps = get_ibs_caps();
if (!ibs_caps)
return;
printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
}
static int (*create_arch_files)(struct dentry *root);
static int setup_ibs_files(struct dentry *root)
{
struct dentry *dir;
int ret = 0;
/* architecture specific files */
if (create_arch_files)
ret = create_arch_files(root);
if (ret)
return ret;
if (!ibs_caps)
return ret;
/* model specific files */
/* setup some reasonable defaults */
memset(&ibs_config, 0, sizeof(ibs_config));
ibs_config.max_cnt_fetch = 250000;
ibs_config.max_cnt_op = 250000;
if (ibs_caps & IBS_CAPS_FETCHSAM) {
dir = oprofilefs_mkdir(root, "ibs_fetch");
oprofilefs_create_ulong(dir, "enable",
&ibs_config.fetch_enabled);
oprofilefs_create_ulong(dir, "max_count",
&ibs_config.max_cnt_fetch);
oprofilefs_create_ulong(dir, "rand_enable",
&ibs_config.rand_en);
}
if (ibs_caps & IBS_CAPS_OPSAM) {
dir = oprofilefs_mkdir(root, "ibs_op");
oprofilefs_create_ulong(dir, "enable",
&ibs_config.op_enabled);
oprofilefs_create_ulong(dir, "max_count",
&ibs_config.max_cnt_op);
if (ibs_caps & IBS_CAPS_OPCNT)
oprofilefs_create_ulong(dir, "dispatched_ops",
&ibs_config.dispatched_ops);
if (ibs_caps & IBS_CAPS_BRNTRGT)
oprofilefs_create_ulong(dir, "branch_target",
&ibs_config.branch_target);
}
return 0;
}
struct op_x86_model_spec op_amd_spec;
static int op_amd_init(struct oprofile_operations *ops)
{
init_ibs();
create_arch_files = ops->create_files;
ops->create_files = setup_ibs_files;
if (boot_cpu_data.x86 == 0x15) {
num_counters = AMD64_NUM_COUNTERS_CORE;
} else {
num_counters = AMD64_NUM_COUNTERS;
}
op_amd_spec.num_counters = num_counters;
op_amd_spec.num_controls = num_counters;
op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
return 0;
}
struct op_x86_model_spec op_amd_spec = {
/* num_counters/num_controls filled in at runtime */
.reserved = MSR_AMD_EVENTSEL_RESERVED,
.event_mask = OP_EVENT_MASK,
.init = op_amd_init,
.fill_in_addresses = &op_amd_fill_in_addresses,
.setup_ctrs = &op_amd_setup_ctrs,
.check_ctrs = &op_amd_check_ctrs,
.start = &op_amd_start,
.stop = &op_amd_stop,
.shutdown = &op_amd_shutdown,
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
.switch_ctrl = &op_mux_switch_ctrl,
#endif
};
/**
* @file op_model_p4.c
* P4 model-specific MSR operations
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author Graydon Hoare
*/
#include <linux/oprofile.h>
#include <linux/smp.h>
#include <linux/ptrace.h>
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/fixmap.h>
#include <asm/apic.h>
#include "op_x86_model.h"
#include "op_counter.h"
#define NUM_EVENTS 39
#define NUM_COUNTERS_NON_HT 8
#define NUM_ESCRS_NON_HT 45
#define NUM_CCCRS_NON_HT 18
#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
#define NUM_COUNTERS_HT2 4
#define NUM_ESCRS_HT2 23
#define NUM_CCCRS_HT2 9
#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
#define OP_CTR_OVERFLOW (1ULL<<31)
static unsigned int num_counters = NUM_COUNTERS_NON_HT;
static unsigned int num_controls = NUM_CONTROLS_NON_HT;
/* this has to be checked dynamically since the
hyper-threadedness of a chip is discovered at
kernel boot-time. */
static inline void setup_num_counters(void)
{
#ifdef CONFIG_SMP
if (smp_num_siblings == 2) {
num_counters = NUM_COUNTERS_HT2;
num_controls = NUM_CONTROLS_HT2;
}
#endif
}
static inline int addr_increment(void)
{
#ifdef CONFIG_SMP
return smp_num_siblings == 2 ? 2 : 1;
#else
return 1;
#endif
}
/* tables to simulate simplified hardware view of p4 registers */
struct p4_counter_binding {
int virt_counter;
int counter_address;
int cccr_address;
};
struct p4_event_binding {
int escr_select; /* value to put in CCCR */
int event_select; /* value to put in ESCR */
struct {
int virt_counter; /* for this counter... */
int escr_address; /* use this ESCR */
} bindings[2];
};
/* nb: these CTR_* defines are a duplicate of defines in
event/i386.p4*events. */
#define CTR_BPU_0 (1 << 0)
#define CTR_MS_0 (1 << 1)
#define CTR_FLAME_0 (1 << 2)
#define CTR_IQ_4 (1 << 3)
#define CTR_BPU_2 (1 << 4)
#define CTR_MS_2 (1 << 5)
#define CTR_FLAME_2 (1 << 6)
#define CTR_IQ_5 (1 << 7)
static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
{ CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
{ CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
{ CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
{ CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
{ CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
{ CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
};
#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
/* p4 event codes in libop/op_event.h are indices into this table. */
static struct p4_event_binding p4_events[NUM_EVENTS] = {
{ /* BRANCH_RETIRED */
0x05, 0x06,
{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
{CTR_IQ_5, MSR_P4_CRU_ESCR3} }
},
{ /* MISPRED_BRANCH_RETIRED */
0x04, 0x03,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
{ CTR_IQ_5, MSR_P4_CRU_ESCR1} }
},
{ /* TC_DELIVER_MODE */
0x01, 0x01,
{ { CTR_MS_0, MSR_P4_TC_ESCR0},
{ CTR_MS_2, MSR_P4_TC_ESCR1} }
},
{ /* BPU_FETCH_REQUEST */
0x00, 0x03,
{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
{ CTR_BPU_2, MSR_P4_BPU_ESCR1} }
},
{ /* ITLB_REFERENCE */
0x03, 0x18,
{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
{ CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
},
{ /* MEMORY_CANCEL */
0x05, 0x02,
{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
{ CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
},
{ /* MEMORY_COMPLETE */
0x02, 0x08,
{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
{ CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
},
{ /* LOAD_PORT_REPLAY */
0x02, 0x04,
{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
{ CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
},
{ /* STORE_PORT_REPLAY */
0x02, 0x05,
{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
{ CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
},
{ /* MOB_LOAD_REPLAY */
0x02, 0x03,
{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
{ CTR_BPU_2, MSR_P4_MOB_ESCR1} }
},
{ /* PAGE_WALK_TYPE */
0x04, 0x01,
{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
{ CTR_BPU_2, MSR_P4_PMH_ESCR1} }
},
{ /* BSQ_CACHE_REFERENCE */
0x07, 0x0c,
{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
{ CTR_BPU_2, MSR_P4_BSU_ESCR1} }
},
{ /* IOQ_ALLOCATION */
0x06, 0x03,
{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
{ 0, 0 } }
},
{ /* IOQ_ACTIVE_ENTRIES */
0x06, 0x1a,
{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
{ 0, 0 } }
},
{ /* FSB_DATA_ACTIVITY */
0x06, 0x17,
{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
{ CTR_BPU_2, MSR_P4_FSB_ESCR1} }
},
{ /* BSQ_ALLOCATION */
0x07, 0x05,
{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
{ 0, 0 } }
},
{ /* BSQ_ACTIVE_ENTRIES */
0x07, 0x06,
{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
{ 0, 0 } }
},
{ /* X87_ASSIST */
0x05, 0x03,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
},
{ /* SSE_INPUT_ASSIST */
0x01, 0x34,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* PACKED_SP_UOP */
0x01, 0x08,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* PACKED_DP_UOP */
0x01, 0x0c,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* SCALAR_SP_UOP */
0x01, 0x0a,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* SCALAR_DP_UOP */
0x01, 0x0e,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* 64BIT_MMX_UOP */
0x01, 0x02,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* 128BIT_MMX_UOP */
0x01, 0x1a,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* X87_FP_UOP */
0x01, 0x04,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* X87_SIMD_MOVES_UOP */
0x01, 0x2e,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* MACHINE_CLEAR */
0x05, 0x02,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
},
{ /* GLOBAL_POWER_EVENTS */
0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
{ CTR_BPU_2, MSR_P4_FSB_ESCR1} }
},
{ /* TC_MS_XFER */
0x00, 0x05,
{ { CTR_MS_0, MSR_P4_MS_ESCR0},
{ CTR_MS_2, MSR_P4_MS_ESCR1} }
},
{ /* UOP_QUEUE_WRITES */
0x00, 0x09,
{ { CTR_MS_0, MSR_P4_MS_ESCR0},
{ CTR_MS_2, MSR_P4_MS_ESCR1} }
},
{ /* FRONT_END_EVENT */
0x05, 0x08,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
},
{ /* EXECUTION_EVENT */
0x05, 0x0c,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
},
{ /* REPLAY_EVENT */
0x05, 0x09,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
},
{ /* INSTR_RETIRED */
0x04, 0x02,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
{ CTR_IQ_5, MSR_P4_CRU_ESCR1} }
},
{ /* UOPS_RETIRED */
0x04, 0x01,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
{ CTR_IQ_5, MSR_P4_CRU_ESCR1} }
},
{ /* UOP_TYPE */
0x02, 0x02,
{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
{ CTR_IQ_5, MSR_P4_RAT_ESCR1} }
},
{ /* RETIRED_MISPRED_BRANCH_TYPE */
0x02, 0x05,
{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
{ CTR_MS_2, MSR_P4_TBPU_ESCR1} }
},
{ /* RETIRED_BRANCH_TYPE */
0x02, 0x04,
{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
{ CTR_MS_2, MSR_P4_TBPU_ESCR1} }
}
};
#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
#define ESCR_RESERVED_BITS 0x80000003
#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
#define CCCR_RESERVED_BITS 0x38030FFF
#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
/* this assigns a "stagger" to the current CPU, which is used throughout
the code in this module as an extra array offset, to select the "even"
or "odd" part of all the divided resources. */
static unsigned int get_stagger(void)
{
#ifdef CONFIG_SMP
int cpu = smp_processor_id();
return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
#endif
return 0;
}
/* finally, mediate access to a real hardware counter
by passing a "virtual" counter numer to this macro,
along with your stagger setting. */
#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
static unsigned long reset_value[NUM_COUNTERS_NON_HT];
static void p4_shutdown(struct op_msrs const * const msrs)
{
int i;
for (i = 0; i < num_counters; ++i) {
if (msrs->counters[i].addr)
release_perfctr_nmi(msrs->counters[i].addr);
}
/*
* some of the control registers are specially reserved in
* conjunction with the counter registers (hence the starting offset).
* This saves a few bits.
*/
for (i = num_counters; i < num_controls; ++i) {
if (msrs->controls[i].addr)
release_evntsel_nmi(msrs->controls[i].addr);
}
}
static int p4_fill_in_addresses(struct op_msrs * const msrs)
{
unsigned int i;
unsigned int addr, cccraddr, stag;
setup_num_counters();
stag = get_stagger();
/* the counter & cccr registers we pay attention to */
for (i = 0; i < num_counters; ++i) {
addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
if (reserve_perfctr_nmi(addr)) {
msrs->counters[i].addr = addr;
msrs->controls[i].addr = cccraddr;
}
}
/* 43 ESCR registers in three or four discontiguous group */
for (addr = MSR_P4_BSU_ESCR0 + stag;
addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
if (reserve_evntsel_nmi(addr))
msrs->controls[i].addr = addr;
}
/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
* to avoid special case in nmi_{save|restore}_registers() */
if (boot_cpu_data.x86_model >= 0x3) {
for (addr = MSR_P4_BSU_ESCR0 + stag;
addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
if (reserve_evntsel_nmi(addr))
msrs->controls[i].addr = addr;
}
} else {
for (addr = MSR_P4_IQ_ESCR0 + stag;
addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
if (reserve_evntsel_nmi(addr))
msrs->controls[i].addr = addr;
}
}
for (addr = MSR_P4_RAT_ESCR0 + stag;
addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
if (reserve_evntsel_nmi(addr))
msrs->controls[i].addr = addr;
}
for (addr = MSR_P4_MS_ESCR0 + stag;
addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
if (reserve_evntsel_nmi(addr))
msrs->controls[i].addr = addr;
}
for (addr = MSR_P4_IX_ESCR0 + stag;
addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
if (reserve_evntsel_nmi(addr))
msrs->controls[i].addr = addr;
}
/* there are 2 remaining non-contiguously located ESCRs */
if (num_counters == NUM_COUNTERS_NON_HT) {
/* standard non-HT CPUs handle both remaining ESCRs*/
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
} else if (stag == 0) {
/* HT CPUs give the first remainder to the even thread, as
the 32nd control register */
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
} else {
/* and two copies of the second to the odd thread,
for the 22st and 23nd control registers */
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
}
}
for (i = 0; i < num_counters; ++i) {
if (!counter_config[i].enabled)
continue;
if (msrs->controls[i].addr)
continue;
op_x86_warn_reserved(i);
p4_shutdown(msrs);
return -EBUSY;
}
return 0;
}
static void pmc_setup_one_p4_counter(unsigned int ctr)
{
int i;
int const maxbind = 2;
unsigned int cccr = 0;
unsigned int escr = 0;
unsigned int high = 0;
unsigned int counter_bit;
struct p4_event_binding *ev = NULL;
unsigned int stag;
stag = get_stagger();
/* convert from counter *number* to counter *bit* */
counter_bit = 1 << VIRT_CTR(stag, ctr);
/* find our event binding structure. */
if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
printk(KERN_ERR
"oprofile: P4 event code 0x%lx out of range\n",
counter_config[ctr].event);
return;
}
ev = &(p4_events[counter_config[ctr].event - 1]);
for (i = 0; i < maxbind; i++) {
if (ev->bindings[i].virt_counter & counter_bit) {
/* modify ESCR */
rdmsr(ev->bindings[i].escr_address, escr, high);
ESCR_CLEAR(escr);
if (stag == 0) {
ESCR_SET_USR_0(escr, counter_config[ctr].user);
ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
} else {
ESCR_SET_USR_1(escr, counter_config[ctr].user);
ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
}
ESCR_SET_EVENT_SELECT(escr, ev->event_select);
ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
wrmsr(ev->bindings[i].escr_address, escr, high);
/* modify CCCR */
rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
cccr, high);
CCCR_CLEAR(cccr);
CCCR_SET_REQUIRED_BITS(cccr);
CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
if (stag == 0)
CCCR_SET_PMI_OVF_0(cccr);
else
CCCR_SET_PMI_OVF_1(cccr);
wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
cccr, high);
return;
}
}
printk(KERN_ERR
"oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
counter_config[ctr].event, stag, ctr);
}
static void p4_setup_ctrs(struct op_x86_model_spec const *model,
struct op_msrs const * const msrs)
{
unsigned int i;
unsigned int low, high;
unsigned int stag;
stag = get_stagger();
rdmsr(MSR_IA32_MISC_ENABLE, low, high);
if (!MISC_PMC_ENABLED_P(low)) {
printk(KERN_ERR "oprofile: P4 PMC not available\n");
return;
}
/* clear the cccrs we will use */
for (i = 0; i < num_counters; i++) {
if (unlikely(!msrs->controls[i].addr))
continue;
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_CLEAR(low);
CCCR_SET_REQUIRED_BITS(low);
wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
}
/* clear all escrs (including those outside our concern) */
for (i = num_counters; i < num_controls; i++) {
if (unlikely(!msrs->controls[i].addr))
continue;
wrmsr(msrs->controls[i].addr, 0, 0);
}
/* setup all counters */
for (i = 0; i < num_counters; ++i) {
if (counter_config[i].enabled && msrs->controls[i].addr) {
reset_value[i] = counter_config[i].count;
pmc_setup_one_p4_counter(i);
wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
-(u64)counter_config[i].count);
} else {
reset_value[i] = 0;
}
}
}
static int p4_check_ctrs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
unsigned long ctr, low, high, stag, real;
int i;
stag = get_stagger();
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
/*
* there is some eccentricity in the hardware which
* requires that we perform 2 extra corrections:
*
* - check both the CCCR:OVF flag for overflow and the
* counter high bit for un-flagged overflows.
*
* - write the counter back twice to ensure it gets
* updated properly.
*
* the former seems to be related to extra NMIs happening
* during the current NMI; the latter is reported as errata
* N15 in intel doc 249199-029, pentium 4 specification
* update, though their suggested work-around does not
* appear to solve the problem.
*/
real = VIRT_CTR(stag, i);
rdmsr(p4_counters[real].cccr_address, low, high);
rdmsr(p4_counters[real].counter_address, ctr, high);
if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
oprofile_add_sample(regs, i);
wrmsrl(p4_counters[real].counter_address,
-(u64)reset_value[i]);
CCCR_CLEAR_OVF(low);
wrmsr(p4_counters[real].cccr_address, low, high);
wrmsrl(p4_counters[real].counter_address,
-(u64)reset_value[i]);
}
}
/* P4 quirk: you have to re-unmask the apic vector */
apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
/* See op_model_ppro.c */
return 1;
}
static void p4_start(struct op_msrs const * const msrs)
{
unsigned int low, high, stag;
int i;
stag = get_stagger();
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_SET_ENABLE(low);
wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
}
}
static void p4_stop(struct op_msrs const * const msrs)
{
unsigned int low, high, stag;
int i;
stag = get_stagger();
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_SET_DISABLE(low);
wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
}
}
#ifdef CONFIG_SMP
struct op_x86_model_spec op_p4_ht2_spec = {
.num_counters = NUM_COUNTERS_HT2,
.num_controls = NUM_CONTROLS_HT2,
.fill_in_addresses = &p4_fill_in_addresses,
.setup_ctrs = &p4_setup_ctrs,
.check_ctrs = &p4_check_ctrs,
.start = &p4_start,
.stop = &p4_stop,
.shutdown = &p4_shutdown
};
#endif
struct op_x86_model_spec op_p4_spec = {
.num_counters = NUM_COUNTERS_NON_HT,
.num_controls = NUM_CONTROLS_NON_HT,
.fill_in_addresses = &p4_fill_in_addresses,
.setup_ctrs = &p4_setup_ctrs,
.check_ctrs = &p4_check_ctrs,
.start = &p4_start,
.stop = &p4_stop,
.shutdown = &p4_shutdown
};
/*
* @file op_model_ppro.h
* Family 6 perfmon and architectural perfmon MSR operations
*
* @remark Copyright 2002 OProfile authors
* @remark Copyright 2008 Intel Corporation
* @remark Read the file COPYING
*
* @author John Levon
* @author Philippe Elie
* @author Graydon Hoare
* @author Andi Kleen
* @author Robert Richter <robert.richter@amd.com>
*/
#include <linux/oprofile.h>
#include <linux/slab.h>
#include <asm/ptrace.h>
#include <asm/msr.h>
#include <asm/apic.h>
#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
static int num_counters = 2;
static int counter_width = 32;
#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))
static u64 reset_value[OP_MAX_COUNTER];
static void ppro_shutdown(struct op_msrs const * const msrs)
{
int i;
for (i = 0; i < num_counters; ++i) {
if (!msrs->counters[i].addr)
continue;
release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
}
}
static int ppro_fill_in_addresses(struct op_msrs * const msrs)
{
int i;
for (i = 0; i < num_counters; i++) {
if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
goto fail;
if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) {
release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
goto fail;
}
/* both registers must be reserved */
msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
continue;
fail:
if (!counter_config[i].enabled)
continue;
op_x86_warn_reserved(i);
ppro_shutdown(msrs);
return -EBUSY;
}
return 0;
}
static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
struct op_msrs const * const msrs)
{
u64 val;
int i;
if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
union cpuid10_eax eax;
eax.full = cpuid_eax(0xa);
/*
* For Core2 (family 6, model 15), don't reset the
* counter width:
*/
if (!(eax.split.version_id == 0 &&
__this_cpu_read(cpu_info.x86) == 6 &&
__this_cpu_read(cpu_info.x86_model) == 15)) {
if (counter_width < eax.split.bit_width)
counter_width = eax.split.bit_width;
}
}
/* clear all counters */
for (i = 0; i < num_counters; ++i) {
if (!msrs->controls[i].addr)
continue;
rdmsrl(msrs->controls[i].addr, val);
if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
op_x86_warn_in_use(i);
val &= model->reserved;
wrmsrl(msrs->controls[i].addr, val);
/*
* avoid a false detection of ctr overflows in NMI *
* handler
*/
wrmsrl(msrs->counters[i].addr, -1LL);
}
/* enable active counters */
for (i = 0; i < num_counters; ++i) {
if (counter_config[i].enabled && msrs->counters[i].addr) {
reset_value[i] = counter_config[i].count;
wrmsrl(msrs->counters[i].addr, -reset_value[i]);
rdmsrl(msrs->controls[i].addr, val);
val &= model->reserved;
val |= op_x86_get_ctrl(model, &counter_config[i]);
wrmsrl(msrs->controls[i].addr, val);
} else {
reset_value[i] = 0;
}
}
}
static int ppro_check_ctrs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
u64 val;
int i;
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
rdmsrl(msrs->counters[i].addr, val);
if (val & (1ULL << (counter_width - 1)))
continue;
oprofile_add_sample(regs, i);
wrmsrl(msrs->counters[i].addr, -reset_value[i]);
}
/* Only P6 based Pentium M need to re-unmask the apic vector but it
* doesn't hurt other P6 variant */
apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
/* We can't work out if we really handled an interrupt. We
* might have caught a *second* counter just after overflowing
* the interrupt for this counter then arrives
* and we don't find a counter that's overflowed, so we
* would return 0 and get dazed + confused. Instead we always
* assume we found an overflow. This sucks.
*/
return 1;
}
static void ppro_start(struct op_msrs const * const msrs)
{
u64 val;
int i;
for (i = 0; i < num_counters; ++i) {
if (reset_value[i]) {
rdmsrl(msrs->controls[i].addr, val);
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
}
}
}
static void ppro_stop(struct op_msrs const * const msrs)
{
u64 val;
int i;
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
rdmsrl(msrs->controls[i].addr, val);
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
}
}
struct op_x86_model_spec op_ppro_spec = {
.num_counters = 2,
.num_controls = 2,
.reserved = MSR_PPRO_EVENTSEL_RESERVED,
.fill_in_addresses = &ppro_fill_in_addresses,
.setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs,
.start = &ppro_start,
.stop = &ppro_stop,
.shutdown = &ppro_shutdown
};
/*
* Architectural performance monitoring.
*
* Newer Intel CPUs (Core1+) have support for architectural
* events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
* The advantage of this is that it can be done without knowing about
* the specific CPU.
*/
static void arch_perfmon_setup_counters(void)
{
union cpuid10_eax eax;
eax.full = cpuid_eax(0xa);
/* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 15) {
eax.split.version_id = 2;
eax.split.num_counters = 2;
eax.split.bit_width = 40;
}
num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER);
op_arch_perfmon_spec.num_counters = num_counters;
op_arch_perfmon_spec.num_controls = num_counters;
}
static int arch_perfmon_init(struct oprofile_operations *ignore)
{
arch_perfmon_setup_counters();
return 0;
}
struct op_x86_model_spec op_arch_perfmon_spec = {
.reserved = MSR_PPRO_EVENTSEL_RESERVED,
.init = &arch_perfmon_init,
/* num_counters/num_controls filled in at runtime */
.fill_in_addresses = &ppro_fill_in_addresses,
/* user space does the cpuid check for available events */
.setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs,
.start = &ppro_start,
.stop = &ppro_stop,
.shutdown = &ppro_shutdown
};
/**
* @file op_x86_model.h
* interface to x86 model-specific MSR operations
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author Graydon Hoare
* @author Robert Richter <robert.richter@amd.com>
*/
#ifndef OP_X86_MODEL_H
#define OP_X86_MODEL_H
#include <asm/types.h>
#include <asm/perf_event.h>
struct op_msr {
unsigned long addr;
u64 saved;
};
struct op_msrs {
struct op_msr *counters;
struct op_msr *controls;
struct op_msr *multiplex;
};
struct pt_regs;
struct oprofile_operations;
/* The model vtable abstracts the differences between
* various x86 CPU models' perfctr support.
*/
struct op_x86_model_spec {
unsigned int num_counters;
unsigned int num_controls;
unsigned int num_virt_counters;
u64 reserved;
u16 event_mask;
int (*init)(struct oprofile_operations *ops);
int (*fill_in_addresses)(struct op_msrs * const msrs);
void (*setup_ctrs)(struct op_x86_model_spec const *model,
struct op_msrs const * const msrs);
int (*check_ctrs)(struct pt_regs * const regs,
struct op_msrs const * const msrs);
void (*start)(struct op_msrs const * const msrs);
void (*stop)(struct op_msrs const * const msrs);
void (*shutdown)(struct op_msrs const * const msrs);
#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
void (*switch_ctrl)(struct op_x86_model_spec const *model,
struct op_msrs const * const msrs);
#endif
};
struct op_counter_config;
static inline void op_x86_warn_in_use(int counter)
{
/*
* The warning indicates an already running counter. If
* oprofile doesn't collect data, then try using a different
* performance counter on your platform to monitor the desired
* event. Delete counter #%d from the desired event by editing
* the /usr/share/oprofile/%s/<cpu>/events file. If the event
* cannot be monitored by any other counter, contact your
* hardware or BIOS vendor.
*/
pr_warn("oprofile: counter #%d on cpu #%d may already be used\n",
counter, smp_processor_id());
}
static inline void op_x86_warn_reserved(int counter)
{
pr_warn("oprofile: counter #%d is already reserved\n", counter);
}
extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
struct op_counter_config *counter_config);
extern int op_x86_phys_to_virt(int phys);
extern int op_x86_virt_to_phys(int virt);
extern struct op_x86_model_spec op_ppro_spec;
extern struct op_x86_model_spec op_p4_spec;
extern struct op_x86_model_spec op_p4_ht2_spec;
extern struct op_x86_model_spec op_amd_spec;
extern struct op_x86_model_spec op_arch_perfmon_spec;
#endif /* OP_X86_MODEL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment