Commit a072738e authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Ingo Molnar

perf, x86: Implement initial P4 PMU driver

The netburst PMU is way different from the "architectural
perfomance monitoring" specification that current CPUs use.
P4 uses a tuple of ESCR+CCCR+COUNTER MSR registers to handle
perfomance monitoring events.

A few implementational details:

1) We need a separate x86_pmu::hw_config helper in struct
   x86_pmu since register bit-fields are quite different from P6,
   Core and later cpu series.

2) For the same reason is a x86_pmu::schedule_events helper
   introduced.

3) hw_perf_event::config consists of packed ESCR+CCCR values.
   It's allowed since in reality both registers only use a half
   of their size. Of course before making a real write into a
   particular MSR we need to unpack the value and extend it to
   a proper size.

4) The tuple of packed ESCR+CCCR in hw_perf_event::config
   doesn't describe the memory address of ESCR MSR register
   so that we need to keep a mapping between these tuples
   used and available ESCR (various P4 events may use same
   ESCRs but not simultaneously), for this sake every active
   event has a per-cpu map of hw_perf_event::idx <--> ESCR
   addresses.

5) Since hw_perf_event::idx is an offset to counter/control register
   we need to lift X86_PMC_MAX_GENERIC up, otherwise kernel
   strips it down to 8 registers and event armed may never be turned
   off (ie the bit in active_mask is set but the loop never reaches
   this index to check), thanks to Peter Zijlstra

Restrictions:

 - No cascaded counters support (do we ever need them?)
 - No dependent events support (so PERF_COUNT_HW_INSTRUCTIONS
   doesn't work for now)
 - There are events with same counters which can't work simultaneously
   (need to use intersected ones due to broken counter 1)
 - No PERF_COUNT_HW_CACHE_ events yet

Todo:

 - Implement dependent events
 - Need proper hashing for event opcodes (no linear search, good for
   debugging stage but not in real loads)
 - Some events counted during a clock cycle -- need to set threshold
   for them and count every clock cycle just to get summary statistics
   (ie to behave the same way as other PMUs do)
 - Need to swicth to use event_constraints
 - To support RAW events we need to encode a global list of P4 events
   into p4_templates
 - Cache events need to be added

Event support status matrix:

 Event			status
 -----------------------------
 cycles			works
 cache-references	works
 cache-misses		works
 branch-misses		works
 bus-cycles		partially (does not work on 64bit cpu with HT enabled)
 instruction		doesnt work (needs dependent event [mop tagging])
 branches		doesnt work
Signed-off-by: default avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: default avatarLin Ming <ming.m.lin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20100311165439.GB5129@lenovo>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 9b33fa6b
......@@ -5,7 +5,7 @@
* Performance event hw details:
*/
#define X86_PMC_MAX_GENERIC 8
#define X86_PMC_MAX_GENERIC 32
#define X86_PMC_MAX_FIXED 3
#define X86_PMC_IDX_GENERIC 0
......
This diff is collapsed.
......@@ -190,6 +190,8 @@ struct x86_pmu {
void (*enable_all)(void);
void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *);
int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
unsigned perfctr;
u64 (*event_map)(int);
......@@ -415,6 +417,25 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
return 0;
}
static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
{
/*
* Generate PMC IRQs:
* (keep 'enabled' bit clear for now)
*/
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
/*
* Count user and OS events unless requested not to
*/
if (!attr->exclude_user)
hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
if (!attr->exclude_kernel)
hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
return 0;
}
/*
* Setup the hardware configuration for a given attr_type
*/
......@@ -446,23 +467,13 @@ static int __hw_perf_event_init(struct perf_event *event)
event->destroy = hw_perf_event_destroy;
/*
* Generate PMC IRQs:
* (keep 'enabled' bit clear for now)
*/
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
hwc->idx = -1;
hwc->last_cpu = -1;
hwc->last_tag = ~0ULL;
/*
* Count user and OS events unless requested not to.
*/
if (!attr->exclude_user)
hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
if (!attr->exclude_kernel)
hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
/* Processor specifics */
if (x86_pmu.hw_config(attr, hwc))
return -EOPNOTSUPP;
if (!hwc->sample_period) {
hwc->sample_period = x86_pmu.max_period;
......@@ -517,7 +528,7 @@ static int __hw_perf_event_init(struct perf_event *event)
return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
if (!attr->exclude_kernel)
return -EOPNOTSUPP;
}
......@@ -931,7 +942,7 @@ static int x86_pmu_enable(struct perf_event *event)
if (n < 0)
return n;
ret = x86_schedule_events(cpuc, n, assign);
ret = x86_pmu.schedule_events(cpuc, n, assign);
if (ret)
return ret;
/*
......@@ -1263,7 +1274,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
if (n0 < 0)
return n0;
ret = x86_schedule_events(cpuc, n0, assign);
ret = x86_pmu.schedule_events(cpuc, n0, assign);
if (ret)
return ret;
......@@ -1313,6 +1324,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
#include "perf_event_amd.c"
#include "perf_event_p6.c"
#include "perf_event_p4.c"
#include "perf_event_intel_lbr.c"
#include "perf_event_intel_ds.c"
#include "perf_event_intel.c"
......@@ -1515,7 +1527,7 @@ static int validate_group(struct perf_event *event)
fake_cpuc->n_events = n;
ret = x86_schedule_events(fake_cpuc, n, NULL);
ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
out_free:
kfree(fake_cpuc);
......
......@@ -363,6 +363,8 @@ static __initconst struct x86_pmu amd_pmu = {
.enable_all = x86_pmu_enable_all,
.enable = x86_pmu_enable_event,
.disable = x86_pmu_disable_event,
.hw_config = x86_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
.event_map = amd_pmu_event_map,
......
......@@ -749,6 +749,8 @@ static __initconst struct x86_pmu core_pmu = {
.enable_all = x86_pmu_enable_all,
.enable = x86_pmu_enable_event,
.disable = x86_pmu_disable_event,
.hw_config = x86_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map,
......@@ -786,6 +788,8 @@ static __initconst struct x86_pmu intel_pmu = {
.enable_all = intel_pmu_enable_all,
.enable = intel_pmu_enable_event,
.disable = intel_pmu_disable_event,
.hw_config = x86_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map,
......@@ -839,12 +843,13 @@ static __init int intel_pmu_init(void)
int version;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
/* check for P6 processor family */
if (boot_cpu_data.x86 == 6) {
return p6_pmu_init();
} else {
switch (boot_cpu_data.x86) {
case 0x6:
return p6_pmu_init();
case 0xf:
return p4_pmu_init();
}
return -ENODEV;
}
}
/*
......
This diff is collapsed.
......@@ -109,6 +109,8 @@ static __initconst struct x86_pmu p6_pmu = {
.enable_all = p6_pmu_enable_all,
.enable = p6_pmu_enable_event,
.disable = p6_pmu_disable_event,
.hw_config = x86_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_P6_EVNTSEL0,
.perfctr = MSR_P6_PERFCTR0,
.event_map = p6_pmu_event_map,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment