Commit 6ae5fa61 authored by Andi Kleen's avatar Andi Kleen Committed by Ingo Molnar

perf/x86: Fix data source decoding for Skylake

Skylake changed the encoding of the PEBS data source field.
Some combinations are not available anymore, but some new cases
e.g. for L4 cache hit are added.

Fix up the conversion table for Skylake, similar as had been done
for Nehalem.

On Skylake server the encoding for L4 actually means persistent
memory. Handle this case too.

To properly describe it in the abstracted perf format I had to add
some new fields. Since a hit can have only one level add a new
field that is an enumeration, not a bit field to describe
the level. It can describe any level. Some numbers are also
used to describe PMEM and LFB.

Also add a new generic remote flag that can be combined with
the generic level to signify a remote cache.

And there is an extension field for the snoop indication to handle
the Forward state.

I didn't add a generic flag for hops because it's not needed
for Skylake.

I changed the existing encodings for older CPUs to also fill in the
new level and remote fields.
Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/20170816222156.19953-3-andi@firstfloor.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 95298355
......@@ -4208,6 +4208,8 @@ __init int intel_pmu_init(void)
skl_format_attr);
WARN_ON(!x86_pmu.format_attrs);
x86_pmu.cpu_events = hsw_events_attrs;
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
pr_cont("Skylake events, ");
break;
......
......@@ -49,34 +49,47 @@ union intel_x86_pebs_dse {
*/
#define P(a, b) PERF_MEM_S(a, b)
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
#define LEVEL(x) P(LVLNUM, x)
#define REM P(REMOTE, REMOTE)
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
/* Version for Sandy Bridge and later */
static u64 pebs_data_source[] = {
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */
OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
};
/* Patch up minor differences in the bits */
void __init intel_pmu_pebs_data_source_nhm(void)
{
pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT);
pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
}
void __init intel_pmu_pebs_data_source_skl(bool pmem)
{
u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
}
static u64 precise_store_data(u64 status)
......
......@@ -948,6 +948,8 @@ void intel_pmu_lbr_init_knl(void);
void intel_pmu_pebs_data_source_nhm(void);
void intel_pmu_pebs_data_source_skl(bool pmem);
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
......
......@@ -954,14 +954,20 @@ union perf_mem_data_src {
mem_snoop:5, /* snoop mode */
mem_lock:2, /* lock instr */
mem_dtlb:7, /* tlb access */
mem_rsvd:31;
mem_lvl_num:4, /* memory hierarchy level number */
mem_remote:1, /* remote */
mem_snoopx:2, /* snoop mode, ext */
mem_rsvd:24;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
__u64 mem_rsvd:31,
__u64 mem_rsvd:24,
mem_snoopx:2, /* snoop mode, ext */
mem_remote:1, /* remote */
mem_lvl_num:4, /* memory hierarchy level number */
mem_dtlb:7, /* tlb access */
mem_lock:2, /* lock instr */
mem_snoop:5, /* snoop mode */
......@@ -998,6 +1004,22 @@ union perf_mem_data_src {
#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
#define PERF_MEM_LVL_SHIFT 5
#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
#define PERF_MEM_REMOTE_SHIFT 37
#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
/* 5-0xa available */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
#define PERF_MEM_LVLNUM_SHIFT 33
/* snoop mode */
#define PERF_MEM_SNOOP_NA 0x01 /* not available */
#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
......@@ -1006,6 +1028,10 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
#define PERF_MEM_SNOOP_SHIFT 19
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
#define PERF_MEM_SNOOPX_SHIFT 37
/* locked instruction */
#define PERF_MEM_LOCK_NA 0x01 /* not available */
#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment