Commit 722e42e4 authored by Kan Liang's avatar Kan Liang Committed by Peter Zijlstra

perf/x86: Support counter mask

The current perf assumes that both GP and fixed counters are contiguous.
But it's not guaranteed on newer Intel platforms or in a virtualization
environment.

Use the counter mask to replace the number of counters for both GP and
the fixed counters. For the other ARCHs or old platforms which don't
support a counter mask, using GENMASK_ULL(num_counter - 1, 0) to
replace. There is no functional change for them.

The interface to KVM is not changed. The number of counters still be
passed to KVM. It can be updated later separately.
Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Reviewed-by: default avatarIan Rogers <irogers@google.com>
Link: https://lkml.kernel.org/r/20240626143545.480761-3-kan.liang@linux.intel.com
parent a23eb2fc
...@@ -432,7 +432,7 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, ...@@ -432,7 +432,7 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
* be removed on one CPU at a time AND PMU is disabled * be removed on one CPU at a time AND PMU is disabled
* when we come here * when we come here
*/ */
for (i = 0; i < x86_pmu.num_counters; i++) { for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *tmp = event; struct perf_event *tmp = event;
if (try_cmpxchg(nb->owners + i, &tmp, NULL)) if (try_cmpxchg(nb->owners + i, &tmp, NULL))
...@@ -501,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev ...@@ -501,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
* because of successive calls to x86_schedule_events() from * because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable() * hw_perf_group_sched_in() without hw_perf_enable()
*/ */
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
if (new == -1 || hwc->idx == idx) if (new == -1 || hwc->idx == idx)
/* assign free slot, prefer hwc->idx */ /* assign free slot, prefer hwc->idx */
old = cmpxchg(nb->owners + idx, NULL, event); old = cmpxchg(nb->owners + idx, NULL, event);
...@@ -544,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu) ...@@ -544,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
/* /*
* initialize all possible NB constraints * initialize all possible NB constraints
*/ */
for (i = 0; i < x86_pmu.num_counters; i++) { for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
__set_bit(i, nb->event_constraints[i].idxmsk); __set_bit(i, nb->event_constraints[i].idxmsk);
nb->event_constraints[i].weight = 1; nb->event_constraints[i].weight = 1;
} }
...@@ -737,7 +737,7 @@ static void amd_pmu_check_overflow(void) ...@@ -737,7 +737,7 @@ static void amd_pmu_check_overflow(void)
* counters are always enabled when this function is called and * counters are always enabled when this function is called and
* ARCH_PERFMON_EVENTSEL_INT is always set. * ARCH_PERFMON_EVENTSEL_INT is always set.
*/ */
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -757,7 +757,7 @@ static void amd_pmu_enable_all(int added) ...@@ -757,7 +757,7 @@ static void amd_pmu_enable_all(int added)
amd_brs_enable_all(); amd_brs_enable_all();
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
/* only activate events which are marked as active */ /* only activate events which are marked as active */
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -980,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) ...@@ -980,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
/* Clear any reserved bits set by buggy microcode */ /* Clear any reserved bits set by buggy microcode */
status &= amd_pmu_global_cntr_mask; status &= amd_pmu_global_cntr_mask;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -1315,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = { ...@@ -1315,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.addr_offset = amd_pmu_addr_offset, .addr_offset = amd_pmu_addr_offset,
.event_map = amd_pmu_event_map, .event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map), .max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS, .cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
.add = amd_pmu_add_event, .add = amd_pmu_add_event,
.del = amd_pmu_del_event, .del = amd_pmu_del_event,
.cntval_bits = 48, .cntval_bits = 48,
...@@ -1414,7 +1414,7 @@ static int __init amd_core_pmu_init(void) ...@@ -1414,7 +1414,7 @@ static int __init amd_core_pmu_init(void)
*/ */
x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR; x86_pmu.perfctr = MSR_F15H_PERF_CTR;
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
/* Check for Performance Monitoring v2 support */ /* Check for Performance Monitoring v2 support */
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) { if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
...@@ -1424,9 +1424,9 @@ static int __init amd_core_pmu_init(void) ...@@ -1424,9 +1424,9 @@ static int __init amd_core_pmu_init(void)
x86_pmu.version = 2; x86_pmu.version = 2;
/* Find the number of available Core PMCs */ /* Find the number of available Core PMCs */
x86_pmu.num_counters = ebx.split.num_core_pmc; x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1; amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
/* Update PMC handling functions */ /* Update PMC handling functions */
x86_pmu.enable_all = amd_pmu_v2_enable_all; x86_pmu.enable_all = amd_pmu_v2_enable_all;
...@@ -1454,12 +1454,12 @@ static int __init amd_core_pmu_init(void) ...@@ -1454,12 +1454,12 @@ static int __init amd_core_pmu_init(void)
* even numbered counter that has a consecutive adjacent odd * even numbered counter that has a consecutive adjacent odd
* numbered counter following it. * numbered counter following it.
*/ */
for (i = 0; i < x86_pmu.num_counters - 1; i += 2) for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
even_ctr_mask |= BIT_ULL(i); even_ctr_mask |= BIT_ULL(i);
pair_constraint = (struct event_constraint) pair_constraint = (struct event_constraint)
__EVENT_CONSTRAINT(0, even_ctr_mask, 0, __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
x86_pmu.num_counters / 2, 0, x86_pmu_max_num_counters(NULL) / 2, 0,
PERF_X86_EVENT_PAIR); PERF_X86_EVENT_PAIR);
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
......
...@@ -189,29 +189,31 @@ static DEFINE_MUTEX(pmc_reserve_mutex); ...@@ -189,29 +189,31 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
static inline int get_possible_num_counters(void) static inline u64 get_possible_counter_mask(void)
{ {
int i, num_counters = x86_pmu.num_counters; u64 cntr_mask = x86_pmu.cntr_mask64;
int i;
if (!is_hybrid()) if (!is_hybrid())
return num_counters; return cntr_mask;
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) for (i = 0; i < x86_pmu.num_hybrid_pmus; i++)
num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters); cntr_mask |= x86_pmu.hybrid_pmu[i].cntr_mask64;
return num_counters; return cntr_mask;
} }
static bool reserve_pmc_hardware(void) static bool reserve_pmc_hardware(void)
{ {
int i, num_counters = get_possible_num_counters(); u64 cntr_mask = get_possible_counter_mask();
int i, end;
for (i = 0; i < num_counters; i++) { for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail; goto perfctr_fail;
} }
for (i = 0; i < num_counters; i++) { for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail; goto eventsel_fail;
} }
...@@ -219,13 +221,14 @@ static bool reserve_pmc_hardware(void) ...@@ -219,13 +221,14 @@ static bool reserve_pmc_hardware(void)
return true; return true;
eventsel_fail: eventsel_fail:
for (i--; i >= 0; i--) end = i;
for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
release_evntsel_nmi(x86_pmu_config_addr(i)); release_evntsel_nmi(x86_pmu_config_addr(i));
i = X86_PMC_IDX_MAX;
i = num_counters;
perfctr_fail: perfctr_fail:
for (i--; i >= 0; i--) end = i;
for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
release_perfctr_nmi(x86_pmu_event_addr(i)); release_perfctr_nmi(x86_pmu_event_addr(i));
return false; return false;
...@@ -233,9 +236,10 @@ static bool reserve_pmc_hardware(void) ...@@ -233,9 +236,10 @@ static bool reserve_pmc_hardware(void)
static void release_pmc_hardware(void) static void release_pmc_hardware(void)
{ {
int i, num_counters = get_possible_num_counters(); u64 cntr_mask = get_possible_counter_mask();
int i;
for (i = 0; i < num_counters; i++) { for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
release_perfctr_nmi(x86_pmu_event_addr(i)); release_perfctr_nmi(x86_pmu_event_addr(i));
release_evntsel_nmi(x86_pmu_config_addr(i)); release_evntsel_nmi(x86_pmu_config_addr(i));
} }
...@@ -248,7 +252,8 @@ static void release_pmc_hardware(void) {} ...@@ -248,7 +252,8 @@ static void release_pmc_hardware(void) {}
#endif #endif
bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed) bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
unsigned long *fixed_cntr_mask)
{ {
u64 val, val_fail = -1, val_new= ~0; u64 val, val_fail = -1, val_new= ~0;
int i, reg, reg_fail = -1, ret = 0; int i, reg, reg_fail = -1, ret = 0;
...@@ -259,7 +264,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed) ...@@ -259,7 +264,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
* Check to see if the BIOS enabled any of the counters, if so * Check to see if the BIOS enabled any of the counters, if so
* complain and bail. * complain and bail.
*/ */
for (i = 0; i < num_counters; i++) { for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i); reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val); ret = rdmsrl_safe(reg, &val);
if (ret) if (ret)
...@@ -273,12 +278,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed) ...@@ -273,12 +278,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
} }
} }
if (num_counters_fixed) { if (*(u64 *)fixed_cntr_mask) {
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
ret = rdmsrl_safe(reg, &val); ret = rdmsrl_safe(reg, &val);
if (ret) if (ret)
goto msr_fail; goto msr_fail;
for (i = 0; i < num_counters_fixed; i++) { for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(i, pmu)) if (fixed_counter_disabled(i, pmu))
continue; continue;
if (val & (0x03ULL << i*4)) { if (val & (0x03ULL << i*4)) {
...@@ -679,7 +684,7 @@ void x86_pmu_disable_all(void) ...@@ -679,7 +684,7 @@ void x86_pmu_disable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx; int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw; struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
u64 val; u64 val;
...@@ -736,7 +741,7 @@ void x86_pmu_enable_all(int added) ...@@ -736,7 +741,7 @@ void x86_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx; int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw; struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
...@@ -975,7 +980,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events); ...@@ -975,7 +980,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{ {
int num_counters = hybrid(cpuc->pmu, num_counters);
struct event_constraint *c; struct event_constraint *c;
struct perf_event *e; struct perf_event *e;
int n0, i, wmin, wmax, unsched = 0; int n0, i, wmin, wmax, unsched = 0;
...@@ -1051,7 +1055,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) ...@@ -1051,7 +1055,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/* slow path */ /* slow path */
if (i != n) { if (i != n) {
int gpmax = num_counters; int gpmax = x86_pmu_max_num_counters(cpuc->pmu);
/* /*
* Do not allow scheduling of more than half the available * Do not allow scheduling of more than half the available
...@@ -1072,7 +1076,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) ...@@ -1072,7 +1076,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
* the extra Merge events needed by large increment events. * the extra Merge events needed by large increment events.
*/ */
if (x86_pmu.flags & PMU_FL_PAIR) { if (x86_pmu.flags & PMU_FL_PAIR) {
gpmax = num_counters - cpuc->n_pair; gpmax -= cpuc->n_pair;
WARN_ON(gpmax <= 0); WARN_ON(gpmax <= 0);
} }
...@@ -1157,12 +1161,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event, ...@@ -1157,12 +1161,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
*/ */
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
{ {
int num_counters = hybrid(cpuc->pmu, num_counters);
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
struct perf_event *event; struct perf_event *event;
int n, max_count; int n, max_count;
max_count = num_counters + num_counters_fixed; max_count = x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixed(cpuc->pmu);
/* current number of events already accepted */ /* current number of events already accepted */
n = cpuc->n_events; n = cpuc->n_events;
...@@ -1522,13 +1524,13 @@ void perf_event_print_debug(void) ...@@ -1522,13 +1524,13 @@ void perf_event_print_debug(void)
u64 pebs, debugctl; u64 pebs, debugctl;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
int num_counters = hybrid(cpuc->pmu, num_counters); unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints);
unsigned long flags; unsigned long flags;
int idx; int idx;
if (!num_counters) if (!*(u64 *)cntr_mask)
return; return;
local_irq_save(flags); local_irq_save(flags);
...@@ -1555,7 +1557,7 @@ void perf_event_print_debug(void) ...@@ -1555,7 +1557,7 @@ void perf_event_print_debug(void)
} }
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
for (idx = 0; idx < num_counters; idx++) { for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
rdmsrl(x86_pmu_event_addr(idx), pmc_count); rdmsrl(x86_pmu_event_addr(idx), pmc_count);
...@@ -1568,7 +1570,7 @@ void perf_event_print_debug(void) ...@@ -1568,7 +1570,7 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: gen-PMC%d left: %016llx\n", pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
cpu, idx, prev_left); cpu, idx, prev_left);
} }
for (idx = 0; idx < num_counters_fixed; idx++) { for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(idx, cpuc->pmu)) if (fixed_counter_disabled(idx, cpuc->pmu))
continue; continue;
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
...@@ -1682,7 +1684,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) ...@@ -1682,7 +1684,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
*/ */
apic_write(APIC_LVTPC, APIC_DM_NMI); apic_write(APIC_LVTPC, APIC_DM_NMI);
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -2038,18 +2040,15 @@ static void _x86_pmu_read(struct perf_event *event) ...@@ -2038,18 +2040,15 @@ static void _x86_pmu_read(struct perf_event *event)
static_call(x86_pmu_update)(event); static_call(x86_pmu_update)(event);
} }
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, void x86_pmu_show_pmu_cap(struct pmu *pmu)
u64 intel_ctrl)
{ {
pr_info("... version: %d\n", x86_pmu.version); pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", num_counters); pr_info("... generic registers: %d\n", x86_pmu_num_counters(pmu));
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period); pr_info("... max period: %016Lx\n", x86_pmu.max_period);
pr_info("... fixed-purpose events: %lu\n", pr_info("... fixed-purpose events: %d\n", x86_pmu_num_counters_fixed(pmu));
hweight64((((1ULL << num_counters_fixed) - 1) pr_info("... event mask: %016Lx\n", hybrid(pmu, intel_ctrl));
<< INTEL_PMC_IDX_FIXED) & intel_ctrl));
pr_info("... event mask: %016Lx\n", intel_ctrl);
} }
static int __init init_hw_perf_events(void) static int __init init_hw_perf_events(void)
...@@ -2086,7 +2085,7 @@ static int __init init_hw_perf_events(void) ...@@ -2086,7 +2085,7 @@ static int __init init_hw_perf_events(void)
pmu_check_apic(); pmu_check_apic();
/* sanity check that the hardware exists or is emulated */ /* sanity check that the hardware exists or is emulated */
if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed)) if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask))
goto out_bad_pmu; goto out_bad_pmu;
pr_cont("%s PMU driver.\n", x86_pmu.name); pr_cont("%s PMU driver.\n", x86_pmu.name);
...@@ -2097,14 +2096,14 @@ static int __init init_hw_perf_events(void) ...@@ -2097,14 +2096,14 @@ static int __init init_hw_perf_events(void)
quirk->func(); quirk->func();
if (!x86_pmu.intel_ctrl) if (!x86_pmu.intel_ctrl)
x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
perf_events_lapic_init(); perf_events_lapic_init();
register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
unconstrained = (struct event_constraint) unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, __EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64,
0, x86_pmu.num_counters, 0, 0); 0, x86_pmu_num_counters(NULL), 0, 0);
x86_pmu_format_group.attrs = x86_pmu.format_attrs; x86_pmu_format_group.attrs = x86_pmu.format_attrs;
...@@ -2113,11 +2112,8 @@ static int __init init_hw_perf_events(void) ...@@ -2113,11 +2112,8 @@ static int __init init_hw_perf_events(void)
pmu.attr_update = x86_pmu.attr_update; pmu.attr_update = x86_pmu.attr_update;
if (!is_hybrid()) { if (!is_hybrid())
x86_pmu_show_pmu_cap(x86_pmu.num_counters, x86_pmu_show_pmu_cap(NULL);
x86_pmu.num_counters_fixed,
x86_pmu.intel_ctrl);
}
if (!x86_pmu.read) if (!x86_pmu.read)
x86_pmu.read = _x86_pmu_read; x86_pmu.read = _x86_pmu_read;
...@@ -2481,7 +2477,7 @@ void perf_clear_dirty_counters(void) ...@@ -2481,7 +2477,7 @@ void perf_clear_dirty_counters(void)
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
if (i >= INTEL_PMC_IDX_FIXED) { if (i >= INTEL_PMC_IDX_FIXED) {
/* Metrics and fake events don't have corresponding HW counters. */ /* Metrics and fake events don't have corresponding HW counters. */
if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed)) if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask)))
continue; continue;
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
...@@ -2983,8 +2979,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) ...@@ -2983,8 +2979,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
* base PMU holds the correct number of counters for P-cores. * base PMU holds the correct number of counters for P-cores.
*/ */
cap->version = x86_pmu.version; cap->version = x86_pmu.version;
cap->num_counters_gp = x86_pmu.num_counters; cap->num_counters_gp = x86_pmu_num_counters(NULL);
cap->num_counters_fixed = x86_pmu.num_counters_fixed; cap->num_counters_fixed = x86_pmu_num_counters_fixed(NULL);
cap->bit_width_gp = x86_pmu.cntval_bits; cap->bit_width_gp = x86_pmu.cntval_bits;
cap->bit_width_fixed = x86_pmu.cntval_bits; cap->bit_width_fixed = x86_pmu.cntval_bits;
cap->events_mask = (unsigned int)x86_pmu.events_maskl; cap->events_mask = (unsigned int)x86_pmu.events_maskl;
......
...@@ -2874,23 +2874,23 @@ static void intel_pmu_reset(void) ...@@ -2874,23 +2874,23 @@ static void intel_pmu_reset(void)
{ {
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
int num_counters = hybrid(cpuc->pmu, num_counters); unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
unsigned long flags; unsigned long flags;
int idx; int idx;
if (!num_counters) if (!*(u64 *)cntr_mask)
return; return;
local_irq_save(flags); local_irq_save(flags);
pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
for (idx = 0; idx < num_counters; idx++) { for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) {
wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
} }
for (idx = 0; idx < num_counters_fixed; idx++) { for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
if (fixed_counter_disabled(idx, cpuc->pmu)) if (fixed_counter_disabled(idx, cpuc->pmu))
continue; continue;
wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
...@@ -2940,8 +2940,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs, ...@@ -2940,8 +2940,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
!guest_pebs_idxs) !guest_pebs_idxs)
return; return;
for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
event = cpuc->events[bit]; event = cpuc->events[bit];
if (!event->attr.precise_ip) if (!event->attr.precise_ip)
continue; continue;
...@@ -4199,7 +4198,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data) ...@@ -4199,7 +4198,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
int idx; int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[idx]; struct perf_event *event = cpuc->events[idx];
arr[idx].msr = x86_pmu_config_addr(idx); arr[idx].msr = x86_pmu_config_addr(idx);
...@@ -4217,7 +4216,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data) ...@@ -4217,7 +4216,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
} }
*nr = x86_pmu.num_counters; *nr = x86_pmu_max_num_counters(cpuc->pmu);
return arr; return arr;
} }
...@@ -4232,7 +4231,7 @@ static void core_pmu_enable_all(int added) ...@@ -4232,7 +4231,7 @@ static void core_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx; int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw; struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask) || if (!test_bit(idx, cpuc->active_mask) ||
...@@ -4684,13 +4683,33 @@ static void flip_smm_bit(void *data) ...@@ -4684,13 +4683,33 @@ static void flip_smm_bit(void *data)
} }
} }
static void intel_pmu_check_num_counters(int *num_counters, static void intel_pmu_check_counters_mask(u64 *cntr_mask,
int *num_counters_fixed, u64 *fixed_cntr_mask,
u64 *intel_ctrl, u64 fixed_mask); u64 *intel_ctrl)
{
unsigned int bit;
bit = fls64(*cntr_mask);
if (bit > INTEL_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
bit, INTEL_PMC_MAX_GENERIC);
*cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
}
*intel_ctrl = *cntr_mask;
bit = fls64(*fixed_cntr_mask);
if (bit > INTEL_PMC_MAX_FIXED) {
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
bit, INTEL_PMC_MAX_FIXED);
*fixed_cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
}
*intel_ctrl |= *fixed_cntr_mask << INTEL_PMC_IDX_FIXED;
}
static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
int num_counters, u64 cntr_mask,
int num_counters_fixed, u64 fixed_cntr_mask,
u64 intel_ctrl); u64 intel_ctrl);
static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
...@@ -4713,11 +4732,10 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) ...@@ -4713,11 +4732,10 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) { if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
&eax, &ebx, &ecx, &edx); &eax, &ebx, &ecx, &edx);
pmu->num_counters = fls(eax); pmu->cntr_mask64 = eax;
pmu->num_counters_fixed = fls(ebx); pmu->fixed_cntr_mask64 = ebx;
} }
if (!intel_pmu_broken_perf_cap()) { if (!intel_pmu_broken_perf_cap()) {
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
...@@ -4726,12 +4744,12 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) ...@@ -4726,12 +4744,12 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
{ {
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, intel_pmu_check_counters_mask(&pmu->cntr_mask64, &pmu->fixed_cntr_mask64,
&pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1); &pmu->intel_ctrl);
pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0)); pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
pmu->unconstrained = (struct event_constraint) pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
0, pmu->num_counters, 0, 0); 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
if (pmu->intel_cap.perf_metrics) if (pmu->intel_cap.perf_metrics)
pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
...@@ -4744,8 +4762,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) ...@@ -4744,8 +4762,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;
intel_pmu_check_event_constraints(pmu->event_constraints, intel_pmu_check_event_constraints(pmu->event_constraints,
pmu->num_counters, pmu->cntr_mask64,
pmu->num_counters_fixed, pmu->fixed_cntr_mask64,
pmu->intel_ctrl); pmu->intel_ctrl);
intel_pmu_check_extra_regs(pmu->extra_regs); intel_pmu_check_extra_regs(pmu->extra_regs);
...@@ -4806,7 +4824,7 @@ static bool init_hybrid_pmu(int cpu) ...@@ -4806,7 +4824,7 @@ static bool init_hybrid_pmu(int cpu)
intel_pmu_check_hybrid_pmus(pmu); intel_pmu_check_hybrid_pmus(pmu);
if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed)) if (!check_hw_exists(&pmu->pmu, pmu->cntr_mask, pmu->fixed_cntr_mask))
return false; return false;
pr_info("%s PMU driver: ", pmu->name); pr_info("%s PMU driver: ", pmu->name);
...@@ -4816,8 +4834,7 @@ static bool init_hybrid_pmu(int cpu) ...@@ -4816,8 +4834,7 @@ static bool init_hybrid_pmu(int cpu)
pr_cont("\n"); pr_cont("\n");
x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed, x86_pmu_show_pmu_cap(&pmu->pmu);
pmu->intel_ctrl);
end: end:
cpumask_set_cpu(cpu, &pmu->supported_cpus); cpumask_set_cpu(cpu, &pmu->supported_cpus);
...@@ -5955,29 +5972,9 @@ static const struct attribute_group *hybrid_attr_update[] = { ...@@ -5955,29 +5972,9 @@ static const struct attribute_group *hybrid_attr_update[] = {
static struct attribute *empty_attrs; static struct attribute *empty_attrs;
static void intel_pmu_check_num_counters(int *num_counters,
int *num_counters_fixed,
u64 *intel_ctrl, u64 fixed_mask)
{
if (*num_counters > INTEL_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
*num_counters, INTEL_PMC_MAX_GENERIC);
*num_counters = INTEL_PMC_MAX_GENERIC;
}
*intel_ctrl = (1ULL << *num_counters) - 1;
if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) {
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
*num_counters_fixed, INTEL_PMC_MAX_FIXED);
*num_counters_fixed = INTEL_PMC_MAX_FIXED;
}
*intel_ctrl |= fixed_mask << INTEL_PMC_IDX_FIXED;
}
static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
int num_counters, u64 cntr_mask,
int num_counters_fixed, u64 fixed_cntr_mask,
u64 intel_ctrl) u64 intel_ctrl)
{ {
struct event_constraint *c; struct event_constraint *c;
...@@ -6014,10 +6011,9 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con ...@@ -6014,10 +6011,9 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
* generic counters * generic counters
*/ */
if (!use_fixed_pseudo_encoding(c->code)) if (!use_fixed_pseudo_encoding(c->code))
c->idxmsk64 |= (1ULL << num_counters) - 1; c->idxmsk64 |= cntr_mask;
} }
c->idxmsk64 &= c->idxmsk64 &= cntr_mask | (fixed_cntr_mask << INTEL_PMC_IDX_FIXED);
~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed));
c->weight = hweight64(c->idxmsk64); c->weight = hweight64(c->idxmsk64);
} }
} }
...@@ -6068,12 +6064,12 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) ...@@ -6068,12 +6064,12 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id; pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id;
pmu->name = intel_hybrid_pmu_type_map[bit].name; pmu->name = intel_hybrid_pmu_type_map[bit].name;
pmu->num_counters = x86_pmu.num_counters; pmu->cntr_mask64 = x86_pmu.cntr_mask64;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed; pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0)); pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
pmu->unconstrained = (struct event_constraint) pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
0, pmu->num_counters, 0, 0); 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
if (pmu->pmu_type & hybrid_small) { if (pmu->pmu_type & hybrid_small) {
...@@ -6186,14 +6182,14 @@ __init int intel_pmu_init(void) ...@@ -6186,14 +6182,14 @@ __init int intel_pmu_init(void)
x86_pmu = intel_pmu; x86_pmu = intel_pmu;
x86_pmu.version = version; x86_pmu.version = version;
x86_pmu.num_counters = eax.split.num_counters; x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0);
x86_pmu.cntval_bits = eax.split.bit_width; x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
x86_pmu.events_maskl = ebx.full; x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length; x86_pmu.events_mask_len = eax.split.mask_length;
x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(x86_pmu.num_counters - 1, 0)); x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
x86_pmu.pebs_capable = PEBS_COUNTER_MASK; x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
/* /*
...@@ -6203,12 +6199,10 @@ __init int intel_pmu_init(void) ...@@ -6203,12 +6199,10 @@ __init int intel_pmu_init(void)
if (version > 1 && version < 5) { if (version > 1 && version < 5) {
int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
x86_pmu.num_counters_fixed = x86_pmu.fixed_cntr_mask64 =
max((int)edx.split.num_counters_fixed, assume); GENMASK_ULL(max((int)edx.split.num_counters_fixed, assume) - 1, 0);
fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
} else if (version >= 5) } else if (version >= 5)
x86_pmu.num_counters_fixed = fls(fixed_mask); x86_pmu.fixed_cntr_mask64 = fixed_mask;
if (boot_cpu_has(X86_FEATURE_PDCM)) { if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities; u64 capabilities;
...@@ -6803,11 +6797,13 @@ __init int intel_pmu_init(void) ...@@ -6803,11 +6797,13 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
intel_pmu_init_glc(&pmu->pmu); intel_pmu_init_glc(&pmu->pmu);
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
pmu->num_counters = x86_pmu.num_counters + 2; pmu->cntr_mask64 <<= 2;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; pmu->cntr_mask64 |= 0x3;
pmu->fixed_cntr_mask64 <<= 1;
pmu->fixed_cntr_mask64 |= 0x1;
} else { } else {
pmu->num_counters = x86_pmu.num_counters; pmu->cntr_mask64 = x86_pmu.cntr_mask64;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed; pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
} }
/* /*
...@@ -6817,15 +6813,16 @@ __init int intel_pmu_init(void) ...@@ -6817,15 +6813,16 @@ __init int intel_pmu_init(void)
* mistakenly add extra counters for P-cores. Correct the number of * mistakenly add extra counters for P-cores. Correct the number of
* counters here. * counters here.
*/ */
if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) { if ((x86_pmu_num_counters(&pmu->pmu) > 8) || (x86_pmu_num_counters_fixed(&pmu->pmu) > 4)) {
pmu->num_counters = x86_pmu.num_counters; pmu->cntr_mask64 = x86_pmu.cntr_mask64;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed; pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
} }
pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0)); pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
pmu->unconstrained = (struct event_constraint) pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
0, pmu->num_counters, 0, 0); 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
pmu->extra_regs = intel_glc_extra_regs; pmu->extra_regs = intel_glc_extra_regs;
/* Initialize Atom core specific PerfMon capabilities.*/ /* Initialize Atom core specific PerfMon capabilities.*/
...@@ -6892,9 +6889,9 @@ __init int intel_pmu_init(void) ...@@ -6892,9 +6889,9 @@ __init int intel_pmu_init(void)
* The constraints may be cut according to the CPUID enumeration * The constraints may be cut according to the CPUID enumeration
* by inserting the EVENT_CONSTRAINT_END. * by inserting the EVENT_CONSTRAINT_END.
*/ */
if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) if (fls64(x86_pmu.fixed_cntr_mask64) > INTEL_PMC_MAX_FIXED)
x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; x86_pmu.fixed_cntr_mask64 &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1; intel_v5_gen_event_constraints[fls64(x86_pmu.fixed_cntr_mask64)].weight = -1;
x86_pmu.event_constraints = intel_v5_gen_event_constraints; x86_pmu.event_constraints = intel_v5_gen_event_constraints;
pr_cont("generic architected perfmon, "); pr_cont("generic architected perfmon, ");
name = "generic_arch_v5+"; name = "generic_arch_v5+";
...@@ -6921,18 +6918,17 @@ __init int intel_pmu_init(void) ...@@ -6921,18 +6918,17 @@ __init int intel_pmu_init(void)
x86_pmu.attr_update = hybrid_attr_update; x86_pmu.attr_update = hybrid_attr_update;
} }
intel_pmu_check_num_counters(&x86_pmu.num_counters, intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
&x86_pmu.num_counters_fixed, &x86_pmu.fixed_cntr_mask64,
&x86_pmu.intel_ctrl, &x86_pmu.intel_ctrl);
(u64)fixed_mask);
/* AnyThread may be deprecated on arch perfmon v5 or later */ /* AnyThread may be deprecated on arch perfmon v5 or later */
if (x86_pmu.intel_cap.anythread_deprecated) if (x86_pmu.intel_cap.anythread_deprecated)
x86_pmu.format_attrs = intel_arch_formats_attr; x86_pmu.format_attrs = intel_arch_formats_attr;
intel_pmu_check_event_constraints(x86_pmu.event_constraints, intel_pmu_check_event_constraints(x86_pmu.event_constraints,
x86_pmu.num_counters, x86_pmu.cntr_mask64,
x86_pmu.num_counters_fixed, x86_pmu.fixed_cntr_mask64,
x86_pmu.intel_ctrl); x86_pmu.intel_ctrl);
/* /*
* Access LBR MSR may cause #GP under certain circumstances. * Access LBR MSR may cause #GP under certain circumstances.
......
...@@ -1138,7 +1138,6 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) ...@@ -1138,7 +1138,6 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
{ {
struct debug_store *ds = cpuc->ds; struct debug_store *ds = cpuc->ds;
int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu); int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu);
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
u64 threshold; u64 threshold;
int reserved; int reserved;
...@@ -1146,7 +1145,7 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) ...@@ -1146,7 +1145,7 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
return; return;
if (x86_pmu.flags & PMU_FL_PEBS_ALL) if (x86_pmu.flags & PMU_FL_PEBS_ALL)
reserved = max_pebs_events + num_counters_fixed; reserved = max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu);
else else
reserved = max_pebs_events; reserved = max_pebs_events;
...@@ -2172,8 +2171,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d ...@@ -2172,8 +2171,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
mask = x86_pmu.pebs_events_mask; mask = x86_pmu.pebs_events_mask;
size = max_pebs_events; size = max_pebs_events;
if (x86_pmu.flags & PMU_FL_PEBS_ALL) { if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED; mask |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; size = INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL);
} }
if (unlikely(base >= top)) { if (unlikely(base >= top)) {
...@@ -2269,11 +2268,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d ...@@ -2269,11 +2268,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
{ {
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
struct debug_store *ds = cpuc->ds; struct debug_store *ds = cpuc->ds;
struct perf_event *event; struct perf_event *event;
void *base, *at, *top; void *base, *at, *top;
int bit, size; int bit;
u64 mask; u64 mask;
if (!x86_pmu.pebs_active) if (!x86_pmu.pebs_active)
...@@ -2285,11 +2283,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d ...@@ -2285,11 +2283,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
ds->pebs_index = ds->pebs_buffer_base; ds->pebs_index = ds->pebs_buffer_base;
mask = hybrid(cpuc->pmu, pebs_events_mask) | mask = hybrid(cpuc->pmu, pebs_events_mask) |
(((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED); (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
if (unlikely(base >= top)) { if (unlikely(base >= top)) {
intel_pmu_pebs_event_update_no_drain(cpuc, size); intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
return; return;
} }
...@@ -2299,11 +2296,11 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d ...@@ -2299,11 +2296,11 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
pebs_status = get_pebs_status(at) & cpuc->pebs_enabled; pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
pebs_status &= mask; pebs_status &= mask;
for_each_set_bit(bit, (unsigned long *)&pebs_status, size) for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX)
counts[bit]++; counts[bit]++;
} }
for_each_set_bit(bit, (unsigned long *)&mask, size) { for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
if (counts[bit] == 0) if (counts[bit] == 0)
continue; continue;
......
...@@ -303,7 +303,7 @@ static const struct x86_pmu knc_pmu __initconst = { ...@@ -303,7 +303,7 @@ static const struct x86_pmu knc_pmu __initconst = {
.apic = 1, .apic = 1,
.max_period = (1ULL << 39) - 1, .max_period = (1ULL << 39) - 1,
.version = 0, .version = 0,
.num_counters = 2, .cntr_mask64 = 0x3,
.cntval_bits = 40, .cntval_bits = 40,
.cntval_mask = (1ULL << 40) - 1, .cntval_mask = (1ULL << 40) - 1,
.get_event_constraints = x86_get_event_constraints, .get_event_constraints = x86_get_event_constraints,
......
...@@ -919,7 +919,7 @@ static void p4_pmu_disable_all(void) ...@@ -919,7 +919,7 @@ static void p4_pmu_disable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx; int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[idx]; struct perf_event *event = cpuc->events[idx];
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -998,7 +998,7 @@ static void p4_pmu_enable_all(int added) ...@@ -998,7 +998,7 @@ static void p4_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx; int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[idx]; struct perf_event *event = cpuc->events[idx];
if (!test_bit(idx, cpuc->active_mask)) if (!test_bit(idx, cpuc->active_mask))
continue; continue;
...@@ -1040,7 +1040,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) ...@@ -1040,7 +1040,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
cpuc = this_cpu_ptr(&cpu_hw_events); cpuc = this_cpu_ptr(&cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) { for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
int overflow; int overflow;
if (!test_bit(idx, cpuc->active_mask)) { if (!test_bit(idx, cpuc->active_mask)) {
...@@ -1353,7 +1353,7 @@ static __initconst const struct x86_pmu p4_pmu = { ...@@ -1353,7 +1353,7 @@ static __initconst const struct x86_pmu p4_pmu = {
* though leave it restricted at moment assuming * though leave it restricted at moment assuming
* HT is on * HT is on
*/ */
.num_counters = ARCH_P4_MAX_CCCR, .cntr_mask64 = GENMASK_ULL(ARCH_P4_MAX_CCCR - 1, 0),
.apic = 1, .apic = 1,
.cntval_bits = ARCH_P4_CNTRVAL_BITS, .cntval_bits = ARCH_P4_CNTRVAL_BITS,
.cntval_mask = ARCH_P4_CNTRVAL_MASK, .cntval_mask = ARCH_P4_CNTRVAL_MASK,
...@@ -1395,7 +1395,7 @@ __init int p4_pmu_init(void) ...@@ -1395,7 +1395,7 @@ __init int p4_pmu_init(void)
* *
* Solve this by zero'ing out the registers to mimic a reset. * Solve this by zero'ing out the registers to mimic a reset.
*/ */
for (i = 0; i < x86_pmu.num_counters; i++) { for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i); reg = x86_pmu_config_addr(i);
wrmsrl_safe(reg, 0ULL); wrmsrl_safe(reg, 0ULL);
} }
......
...@@ -214,7 +214,7 @@ static __initconst const struct x86_pmu p6_pmu = { ...@@ -214,7 +214,7 @@ static __initconst const struct x86_pmu p6_pmu = {
.apic = 1, .apic = 1,
.max_period = (1ULL << 31) - 1, .max_period = (1ULL << 31) - 1,
.version = 0, .version = 0,
.num_counters = 2, .cntr_mask64 = 0x3,
/* /*
* Events have 40 bits implemented. However they are designed such * Events have 40 bits implemented. However they are designed such
* that bits [32-39] are sign extensions of bit 31. As such the * that bits [32-39] are sign extensions of bit 31. As such the
......
...@@ -685,8 +685,14 @@ struct x86_hybrid_pmu { ...@@ -685,8 +685,14 @@ struct x86_hybrid_pmu {
union perf_capabilities intel_cap; union perf_capabilities intel_cap;
u64 intel_ctrl; u64 intel_ctrl;
u64 pebs_events_mask; u64 pebs_events_mask;
int num_counters; union {
int num_counters_fixed; u64 cntr_mask64;
unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
union {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
struct event_constraint unconstrained; struct event_constraint unconstrained;
u64 hw_cache_event_ids u64 hw_cache_event_ids
...@@ -774,8 +780,14 @@ struct x86_pmu { ...@@ -774,8 +780,14 @@ struct x86_pmu {
int (*rdpmc_index)(int index); int (*rdpmc_index)(int index);
u64 (*event_map)(int); u64 (*event_map)(int);
int max_events; int max_events;
int num_counters; union {
int num_counters_fixed; u64 cntr_mask64;
unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
union {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
int cntval_bits; int cntval_bits;
u64 cntval_mask; u64 cntval_mask;
union { union {
...@@ -1125,8 +1137,8 @@ static inline int x86_pmu_rdpmc_index(int index) ...@@ -1125,8 +1137,8 @@ static inline int x86_pmu_rdpmc_index(int index)
return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
} }
bool check_hw_exists(struct pmu *pmu, int num_counters, bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
int num_counters_fixed); unsigned long *fixed_cntr_mask);
int x86_add_exclusive(unsigned int what); int x86_add_exclusive(unsigned int what);
...@@ -1197,8 +1209,27 @@ void x86_pmu_enable_event(struct perf_event *event); ...@@ -1197,8 +1209,27 @@ void x86_pmu_enable_event(struct perf_event *event);
int x86_pmu_handle_irq(struct pt_regs *regs); int x86_pmu_handle_irq(struct pt_regs *regs);
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, void x86_pmu_show_pmu_cap(struct pmu *pmu);
u64 intel_ctrl);
static inline int x86_pmu_num_counters(struct pmu *pmu)
{
return hweight64(hybrid(pmu, cntr_mask64));
}
static inline int x86_pmu_max_num_counters(struct pmu *pmu)
{
return fls64(hybrid(pmu, cntr_mask64));
}
static inline int x86_pmu_num_counters_fixed(struct pmu *pmu)
{
return hweight64(hybrid(pmu, fixed_cntr_mask64));
}
static inline int x86_pmu_max_num_counters_fixed(struct pmu *pmu)
{
return fls64(hybrid(pmu, fixed_cntr_mask64));
}
extern struct event_constraint emptyconstraint; extern struct event_constraint emptyconstraint;
......
...@@ -530,13 +530,13 @@ __init int zhaoxin_pmu_init(void) ...@@ -530,13 +530,13 @@ __init int zhaoxin_pmu_init(void)
pr_info("Version check pass!\n"); pr_info("Version check pass!\n");
x86_pmu.version = version; x86_pmu.version = version;
x86_pmu.num_counters = eax.split.num_counters; x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0);
x86_pmu.cntval_bits = eax.split.bit_width; x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
x86_pmu.events_maskl = ebx.full; x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length; x86_pmu.events_mask_len = eax.split.mask_length;
x86_pmu.num_counters_fixed = edx.split.num_counters_fixed; x86_pmu.fixed_cntr_mask64 = GENMASK_ULL(edx.split.num_counters_fixed - 1, 0);
x86_add_quirk(zhaoxin_arch_events_quirk); x86_add_quirk(zhaoxin_arch_events_quirk);
switch (boot_cpu_data.x86) { switch (boot_cpu_data.x86) {
...@@ -604,13 +604,13 @@ __init int zhaoxin_pmu_init(void) ...@@ -604,13 +604,13 @@ __init int zhaoxin_pmu_init(void)
return -ENODEV; return -ENODEV;
} }
x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1; x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; x86_pmu.intel_ctrl |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
if (x86_pmu.event_constraints) { if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) {
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; c->idxmsk64 |= x86_pmu.cntr_mask64;
c->weight += x86_pmu.num_counters; c->weight += x86_pmu_num_counters(NULL);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment