Commit a072738e authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Ingo Molnar

perf, x86: Implement initial P4 PMU driver

The netburst PMU is way different from the "architectural
perfomance monitoring" specification that current CPUs use.
P4 uses a tuple of ESCR+CCCR+COUNTER MSR registers to handle
perfomance monitoring events.

A few implementational details:

1) We need a separate x86_pmu::hw_config helper in struct
   x86_pmu since register bit-fields are quite different from P6,
   Core and later cpu series.

2) For the same reason is a x86_pmu::schedule_events helper
   introduced.

3) hw_perf_event::config consists of packed ESCR+CCCR values.
   It's allowed since in reality both registers only use a half
   of their size. Of course before making a real write into a
   particular MSR we need to unpack the value and extend it to
   a proper size.

4) The tuple of packed ESCR+CCCR in hw_perf_event::config
   doesn't describe the memory address of ESCR MSR register
   so that we need to keep a mapping between these tuples
   used and available ESCR (various P4 events may use same
   ESCRs but not simultaneously), for this sake every active
   event has a per-cpu map of hw_perf_event::idx <--> ESCR
   addresses.

5) Since hw_perf_event::idx is an offset to counter/control register
   we need to lift X86_PMC_MAX_GENERIC up, otherwise kernel
   strips it down to 8 registers and event armed may never be turned
   off (ie the bit in active_mask is set but the loop never reaches
   this index to check), thanks to Peter Zijlstra

Restrictions:

 - No cascaded counters support (do we ever need them?)
 - No dependent events support (so PERF_COUNT_HW_INSTRUCTIONS
   doesn't work for now)
 - There are events with same counters which can't work simultaneously
   (need to use intersected ones due to broken counter 1)
 - No PERF_COUNT_HW_CACHE_ events yet

Todo:

 - Implement dependent events
 - Need proper hashing for event opcodes (no linear search, good for
   debugging stage but not in real loads)
 - Some events counted during a clock cycle -- need to set threshold
   for them and count every clock cycle just to get summary statistics
   (ie to behave the same way as other PMUs do)
 - Need to swicth to use event_constraints
 - To support RAW events we need to encode a global list of P4 events
   into p4_templates
 - Cache events need to be added

Event support status matrix:

 Event			status
 -----------------------------
 cycles			works
 cache-references	works
 cache-misses		works
 branch-misses		works
 bus-cycles		partially (does not work on 64bit cpu with HT enabled)
 instruction		doesnt work (needs dependent event [mop tagging])
 branches		doesnt work
Signed-off-by: default avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: default avatarLin Ming <ming.m.lin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20100311165439.GB5129@lenovo>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 9b33fa6b
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* Performance event hw details: * Performance event hw details:
*/ */
#define X86_PMC_MAX_GENERIC 8 #define X86_PMC_MAX_GENERIC 32
#define X86_PMC_MAX_FIXED 3 #define X86_PMC_MAX_FIXED 3
#define X86_PMC_IDX_GENERIC 0 #define X86_PMC_IDX_GENERIC 0
......
This diff is collapsed.
...@@ -190,6 +190,8 @@ struct x86_pmu { ...@@ -190,6 +190,8 @@ struct x86_pmu {
void (*enable_all)(void); void (*enable_all)(void);
void (*enable)(struct perf_event *); void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *); void (*disable)(struct perf_event *);
int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel; unsigned eventsel;
unsigned perfctr; unsigned perfctr;
u64 (*event_map)(int); u64 (*event_map)(int);
...@@ -415,6 +417,25 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) ...@@ -415,6 +417,25 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
return 0; return 0;
} }
static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
{
/*
* Generate PMC IRQs:
* (keep 'enabled' bit clear for now)
*/
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
/*
* Count user and OS events unless requested not to
*/
if (!attr->exclude_user)
hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
if (!attr->exclude_kernel)
hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
return 0;
}
/* /*
* Setup the hardware configuration for a given attr_type * Setup the hardware configuration for a given attr_type
*/ */
...@@ -446,23 +467,13 @@ static int __hw_perf_event_init(struct perf_event *event) ...@@ -446,23 +467,13 @@ static int __hw_perf_event_init(struct perf_event *event)
event->destroy = hw_perf_event_destroy; event->destroy = hw_perf_event_destroy;
/*
* Generate PMC IRQs:
* (keep 'enabled' bit clear for now)
*/
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
hwc->idx = -1; hwc->idx = -1;
hwc->last_cpu = -1; hwc->last_cpu = -1;
hwc->last_tag = ~0ULL; hwc->last_tag = ~0ULL;
/* /* Processor specifics */
* Count user and OS events unless requested not to. if (x86_pmu.hw_config(attr, hwc))
*/ return -EOPNOTSUPP;
if (!attr->exclude_user)
hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
if (!attr->exclude_kernel)
hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
if (!hwc->sample_period) { if (!hwc->sample_period) {
hwc->sample_period = x86_pmu.max_period; hwc->sample_period = x86_pmu.max_period;
...@@ -517,7 +528,7 @@ static int __hw_perf_event_init(struct perf_event *event) ...@@ -517,7 +528,7 @@ static int __hw_perf_event_init(struct perf_event *event)
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */ /* BTS is currently only allowed for user-mode. */
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) if (!attr->exclude_kernel)
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -931,7 +942,7 @@ static int x86_pmu_enable(struct perf_event *event) ...@@ -931,7 +942,7 @@ static int x86_pmu_enable(struct perf_event *event)
if (n < 0) if (n < 0)
return n; return n;
ret = x86_schedule_events(cpuc, n, assign); ret = x86_pmu.schedule_events(cpuc, n, assign);
if (ret) if (ret)
return ret; return ret;
/* /*
...@@ -1263,7 +1274,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, ...@@ -1263,7 +1274,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
if (n0 < 0) if (n0 < 0)
return n0; return n0;
ret = x86_schedule_events(cpuc, n0, assign); ret = x86_pmu.schedule_events(cpuc, n0, assign);
if (ret) if (ret)
return ret; return ret;
...@@ -1313,6 +1324,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, ...@@ -1313,6 +1324,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
#include "perf_event_amd.c" #include "perf_event_amd.c"
#include "perf_event_p6.c" #include "perf_event_p6.c"
#include "perf_event_p4.c"
#include "perf_event_intel_lbr.c" #include "perf_event_intel_lbr.c"
#include "perf_event_intel_ds.c" #include "perf_event_intel_ds.c"
#include "perf_event_intel.c" #include "perf_event_intel.c"
...@@ -1515,7 +1527,7 @@ static int validate_group(struct perf_event *event) ...@@ -1515,7 +1527,7 @@ static int validate_group(struct perf_event *event)
fake_cpuc->n_events = n; fake_cpuc->n_events = n;
ret = x86_schedule_events(fake_cpuc, n, NULL); ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
out_free: out_free:
kfree(fake_cpuc); kfree(fake_cpuc);
......
...@@ -363,6 +363,8 @@ static __initconst struct x86_pmu amd_pmu = { ...@@ -363,6 +363,8 @@ static __initconst struct x86_pmu amd_pmu = {
.enable_all = x86_pmu_enable_all, .enable_all = x86_pmu_enable_all,
.enable = x86_pmu_enable_event, .enable = x86_pmu_enable_event,
.disable = x86_pmu_disable_event, .disable = x86_pmu_disable_event,
.hw_config = x86_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_K7_EVNTSEL0, .eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0, .perfctr = MSR_K7_PERFCTR0,
.event_map = amd_pmu_event_map, .event_map = amd_pmu_event_map,
......
...@@ -749,6 +749,8 @@ static __initconst struct x86_pmu core_pmu = { ...@@ -749,6 +749,8 @@ static __initconst struct x86_pmu core_pmu = {
.enable_all = x86_pmu_enable_all, .enable_all = x86_pmu_enable_all,
.enable = x86_pmu_enable_event, .enable = x86_pmu_enable_event,
.disable = x86_pmu_disable_event, .disable = x86_pmu_disable_event,
.hw_config = x86_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map, .event_map = intel_pmu_event_map,
...@@ -786,6 +788,8 @@ static __initconst struct x86_pmu intel_pmu = { ...@@ -786,6 +788,8 @@ static __initconst struct x86_pmu intel_pmu = {
.enable_all = intel_pmu_enable_all, .enable_all = intel_pmu_enable_all,
.enable = intel_pmu_enable_event, .enable = intel_pmu_enable_event,
.disable = intel_pmu_disable_event, .disable = intel_pmu_disable_event,
.hw_config = x86_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map, .event_map = intel_pmu_event_map,
...@@ -839,12 +843,13 @@ static __init int intel_pmu_init(void) ...@@ -839,12 +843,13 @@ static __init int intel_pmu_init(void)
int version; int version;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
/* check for P6 processor family */ switch (boot_cpu_data.x86) {
if (boot_cpu_data.x86 == 6) { case 0x6:
return p6_pmu_init(); return p6_pmu_init();
} else { case 0xf:
return p4_pmu_init();
}
return -ENODEV; return -ENODEV;
}
} }
/* /*
......
This diff is collapsed.
...@@ -109,6 +109,8 @@ static __initconst struct x86_pmu p6_pmu = { ...@@ -109,6 +109,8 @@ static __initconst struct x86_pmu p6_pmu = {
.enable_all = p6_pmu_enable_all, .enable_all = p6_pmu_enable_all,
.enable = p6_pmu_enable_event, .enable = p6_pmu_enable_event,
.disable = p6_pmu_disable_event, .disable = p6_pmu_disable_event,
.hw_config = x86_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_P6_EVNTSEL0, .eventsel = MSR_P6_EVNTSEL0,
.perfctr = MSR_P6_PERFCTR0, .perfctr = MSR_P6_PERFCTR0,
.event_map = p6_pmu_event_map, .event_map = p6_pmu_event_map,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment