Commit 4e6430cb authored by Ali Saidi's avatar Ali Saidi Committed by Arnaldo Carvalho de Melo

perf arm-spe: Use SPE data source for neoverse cores

When synthesizing data from SPE, augment the type with source information
for Arm Neoverse cores. The field is IMPLDEF but the Neoverse cores all use
the same encoding. I can't find encoding information for any other SPE
implementations to unify their choices with Arm's thus that is left for
future work.

This change populates the mem_lvl_num for Neoverse cores as well as the
deprecated mem_lvl namespace.
Reviewed-by: default avatarGerman Gomez <german.gomez@arm.com>
Reviewed-by: default avatarLeo Yan <leo.yan@linaro.org>
Signed-off-by: default avatarAli Saidi <alisaidi@amazon.com>
Tested-by: default avatarLeo Yan <leo.yan@linaro.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Like Xu <likexu@tencent.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Timothy Hayes <timothy.hayes@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20220811062451.435810-4-leo.yan@linaro.orgSigned-off-by: default avatarLeo Yan <leo.yan@linaro.org>
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent f78d6250
...@@ -220,6 +220,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) ...@@ -220,6 +220,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break; break;
case ARM_SPE_DATA_SOURCE: case ARM_SPE_DATA_SOURCE:
decoder->record.source = payload;
break; break;
case ARM_SPE_BAD: case ARM_SPE_BAD:
break; break;
......
...@@ -29,6 +29,17 @@ enum arm_spe_op_type { ...@@ -29,6 +29,17 @@ enum arm_spe_op_type {
ARM_SPE_ST = 1 << 1, ARM_SPE_ST = 1 << 1,
}; };
enum arm_spe_neoverse_data_source {
ARM_SPE_NV_L1D = 0x0,
ARM_SPE_NV_L2 = 0x8,
ARM_SPE_NV_PEER_CORE = 0x9,
ARM_SPE_NV_LOCAL_CLUSTER = 0xa,
ARM_SPE_NV_SYS_CACHE = 0xb,
ARM_SPE_NV_PEER_CLUSTER = 0xc,
ARM_SPE_NV_REMOTE = 0xd,
ARM_SPE_NV_DRAM = 0xe,
};
struct arm_spe_record { struct arm_spe_record {
enum arm_spe_sample_type type; enum arm_spe_sample_type type;
int err; int err;
...@@ -40,6 +51,7 @@ struct arm_spe_record { ...@@ -40,6 +51,7 @@ struct arm_spe_record {
u64 virt_addr; u64 virt_addr;
u64 phys_addr; u64 phys_addr;
u64 context_id; u64 context_id;
u16 source;
}; };
struct arm_spe_insn; struct arm_spe_insn;
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include "arm-spe-decoder/arm-spe-decoder.h" #include "arm-spe-decoder/arm-spe-decoder.h"
#include "arm-spe-decoder/arm-spe-pkt-decoder.h" #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
#include "../../arch/arm64/include/asm/cputype.h"
#define MAX_TIMESTAMP (~0ULL) #define MAX_TIMESTAMP (~0ULL)
struct arm_spe { struct arm_spe {
...@@ -45,6 +46,7 @@ struct arm_spe { ...@@ -45,6 +46,7 @@ struct arm_spe {
struct perf_session *session; struct perf_session *session;
struct machine *machine; struct machine *machine;
u32 pmu_type; u32 pmu_type;
u64 midr;
struct perf_tsc_conversion tc; struct perf_tsc_conversion tc;
...@@ -387,35 +389,128 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, ...@@ -387,35 +389,128 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
return arm_spe_deliver_synth_event(spe, speq, event, &sample); return arm_spe_deliver_synth_event(spe, speq, event, &sample);
} }
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) static const struct midr_range neoverse_spe[] = {
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
{},
};
static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{ {
union perf_mem_data_src data_src = { 0 }; /*
* Even though four levels of cache hierarchy are possible, no known
* production Neoverse systems currently include more than three levels
* so for the time being we assume three exist. If a production system
* is built with four the this function would have to be changed to
* detect the number of levels for reporting.
*/
if (record->op == ARM_SPE_LD) /*
data_src.mem_op = PERF_MEM_OP_LOAD; * We have no data on the hit level or data source for stores in the
else if (record->op == ARM_SPE_ST) * Neoverse SPE records.
data_src.mem_op = PERF_MEM_OP_STORE; */
else if (record->op & ARM_SPE_ST) {
return 0; data_src->mem_lvl = PERF_MEM_LVL_NA;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
data_src->mem_snoop = PERF_MEM_SNOOP_NA;
return;
}
switch (record->source) {
case ARM_SPE_NV_L1D:
data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
break;
case ARM_SPE_NV_L2:
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
break;
case ARM_SPE_NV_PEER_CORE:
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
break;
/*
* We don't know if this is L1, L2 but we do know it was a cache-2-cache
* transfer, so set SNOOPX_PEER
*/
case ARM_SPE_NV_LOCAL_CLUSTER:
case ARM_SPE_NV_PEER_CLUSTER:
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
break;
/*
* System cache is assumed to be L3
*/
case ARM_SPE_NV_SYS_CACHE:
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
break;
/*
* We don't know what level it hit in, except it came from the other
* socket
*/
case ARM_SPE_NV_REMOTE:
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
break;
case ARM_SPE_NV_DRAM:
data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
break;
default:
break;
}
}
static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
data_src.mem_lvl = PERF_MEM_LVL_L3; data_src->mem_lvl = PERF_MEM_LVL_L3;
if (record->type & ARM_SPE_LLC_MISS) if (record->type & ARM_SPE_LLC_MISS)
data_src.mem_lvl |= PERF_MEM_LVL_MISS; data_src->mem_lvl |= PERF_MEM_LVL_MISS;
else else
data_src.mem_lvl |= PERF_MEM_LVL_HIT; data_src->mem_lvl |= PERF_MEM_LVL_HIT;
} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
data_src.mem_lvl = PERF_MEM_LVL_L1; data_src->mem_lvl = PERF_MEM_LVL_L1;
if (record->type & ARM_SPE_L1D_MISS) if (record->type & ARM_SPE_L1D_MISS)
data_src.mem_lvl |= PERF_MEM_LVL_MISS; data_src->mem_lvl |= PERF_MEM_LVL_MISS;
else else
data_src.mem_lvl |= PERF_MEM_LVL_HIT; data_src->mem_lvl |= PERF_MEM_LVL_HIT;
} }
if (record->type & ARM_SPE_REMOTE_ACCESS) if (record->type & ARM_SPE_REMOTE_ACCESS)
data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
}
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
{
union perf_mem_data_src data_src = { 0 };
bool is_neoverse = is_midr_in_range(midr, neoverse_spe);
if (record->op == ARM_SPE_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
else if (record->op == ARM_SPE_ST)
data_src.mem_op = PERF_MEM_OP_STORE;
else
return 0;
if (is_neoverse)
arm_spe__synth_data_source_neoverse(record, &data_src);
else
arm_spe__synth_data_source_generic(record, &data_src);
if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
data_src.mem_dtlb = PERF_MEM_TLB_WK; data_src.mem_dtlb = PERF_MEM_TLB_WK;
...@@ -436,7 +531,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) ...@@ -436,7 +531,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
u64 data_src; u64 data_src;
int err; int err;
data_src = arm_spe__synth_data_source(record); data_src = arm_spe__synth_data_source(record, spe->midr);
if (spe->sample_flc) { if (spe->sample_flc) {
if (record->type & ARM_SPE_L1D_MISS) { if (record->type & ARM_SPE_L1D_MISS) {
...@@ -1178,6 +1273,8 @@ int arm_spe_process_auxtrace_info(union perf_event *event, ...@@ -1178,6 +1273,8 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
struct perf_record_time_conv *tc = &session->time_conv; struct perf_record_time_conv *tc = &session->time_conv;
const char *cpuid = perf_env__cpuid(session->evlist->env);
u64 midr = strtol(cpuid, NULL, 16);
struct arm_spe *spe; struct arm_spe *spe;
int err; int err;
...@@ -1197,6 +1294,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, ...@@ -1197,6 +1294,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->machine = &session->machines.host; /* No kvm support */ spe->machine = &session->machines.host; /* No kvm support */
spe->auxtrace_type = auxtrace_info->type; spe->auxtrace_type = auxtrace_info->type;
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
spe->midr = midr;
spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment