Commit 3871d93b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events updates from Ingo Molnar:
 "PMU driver updates:

   - Add AMD Last Branch Record Extension Version 2 (LbrExtV2) feature
     support for Zen 4 processors.

   - Extend the perf ABI to provide branch speculation information, if
     available, and use this on CPUs that have it (eg. LbrExtV2).

   - Improve Intel PEBS TSC timestamp handling & integration.

   - Add Intel Raptor Lake S CPU support.

   - Add 'perf mem' and 'perf c2c' memory profiling support on AMD CPUs
     by utilizing IBS tagged load/store samples.

   - Clean up & optimize various x86 PMU details.

  HW breakpoints:

   - Big rework to optimize the code for systems with hundreds of CPUs
     and thousands of breakpoints:

      - Replace the nr_bp_mutex global mutex with the bp_cpuinfo_sem
        per-CPU rwsem that is read-locked during most of the key
        operations.

      - Improve the O(#cpus * #tasks) logic in toggle_bp_slot() and
        fetch_bp_busy_slots().

      - Apply micro-optimizations & cleanups.

  - Misc cleanups & enhancements"

* tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (75 commits)
  perf/hw_breakpoint: Annotate tsk->perf_event_mutex vs ctx->mutex
  perf: Fix pmu_filter_match()
  perf: Fix lockdep_assert_event_ctx()
  perf/x86/amd/lbr: Adjust LBR regardless of filtering
  perf/x86/utils: Fix uninitialized var in get_branch_type()
  perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file
  perf/x86/amd: Support PERF_SAMPLE_PHY_ADDR
  perf/x86/amd: Support PERF_SAMPLE_ADDR
  perf/x86/amd: Support PERF_SAMPLE_{WEIGHT|WEIGHT_STRUCT}
  perf/x86/amd: Support PERF_SAMPLE_DATA_SRC
  perf/x86/amd: Add IBS OP_DATA2 DataSrc bit definitions
  perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO}
  perf/x86/uncore: Add new Raptor Lake S support
  perf/x86/cstate: Add new Raptor Lake S support
  perf/x86/msr: Add new Raptor Lake S support
  perf/x86: Add new Raptor Lake S support
  bpf: Check flags for branch stack in bpf_read_branch_records helper
  perf, hw_breakpoint: Fix use-after-free if perf_event_open() fails
  perf: Use sample_flags for raw_data
  perf: Use sample_flags for addr
  ...
parents 30c99993 82aad7ff
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/init.h> #include <linux/init.h>
...@@ -129,7 +130,14 @@ struct breakpoint { ...@@ -129,7 +130,14 @@ struct breakpoint {
bool ptrace_bp; bool ptrace_bp;
}; };
/*
* While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
* rely on it safely synchronizing internals here; however, we can rely on it
* not requesting more breakpoints than available.
*/
static DEFINE_SPINLOCK(cpu_bps_lock);
static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
static DEFINE_SPINLOCK(task_bps_lock);
static LIST_HEAD(task_bps); static LIST_HEAD(task_bps);
static struct breakpoint *alloc_breakpoint(struct perf_event *bp) static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
...@@ -174,7 +182,9 @@ static int task_bps_add(struct perf_event *bp) ...@@ -174,7 +182,9 @@ static int task_bps_add(struct perf_event *bp)
if (IS_ERR(tmp)) if (IS_ERR(tmp))
return PTR_ERR(tmp); return PTR_ERR(tmp);
spin_lock(&task_bps_lock);
list_add(&tmp->list, &task_bps); list_add(&tmp->list, &task_bps);
spin_unlock(&task_bps_lock);
return 0; return 0;
} }
...@@ -182,6 +192,7 @@ static void task_bps_remove(struct perf_event *bp) ...@@ -182,6 +192,7 @@ static void task_bps_remove(struct perf_event *bp)
{ {
struct list_head *pos, *q; struct list_head *pos, *q;
spin_lock(&task_bps_lock);
list_for_each_safe(pos, q, &task_bps) { list_for_each_safe(pos, q, &task_bps) {
struct breakpoint *tmp = list_entry(pos, struct breakpoint, list); struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
...@@ -191,6 +202,7 @@ static void task_bps_remove(struct perf_event *bp) ...@@ -191,6 +202,7 @@ static void task_bps_remove(struct perf_event *bp)
break; break;
} }
} }
spin_unlock(&task_bps_lock);
} }
/* /*
...@@ -200,12 +212,17 @@ static void task_bps_remove(struct perf_event *bp) ...@@ -200,12 +212,17 @@ static void task_bps_remove(struct perf_event *bp)
static bool all_task_bps_check(struct perf_event *bp) static bool all_task_bps_check(struct perf_event *bp)
{ {
struct breakpoint *tmp; struct breakpoint *tmp;
bool ret = false;
spin_lock(&task_bps_lock);
list_for_each_entry(tmp, &task_bps, list) { list_for_each_entry(tmp, &task_bps, list) {
if (!can_co_exist(tmp, bp)) if (!can_co_exist(tmp, bp)) {
return true; ret = true;
break;
}
} }
return false; spin_unlock(&task_bps_lock);
return ret;
} }
/* /*
...@@ -215,13 +232,18 @@ static bool all_task_bps_check(struct perf_event *bp) ...@@ -215,13 +232,18 @@ static bool all_task_bps_check(struct perf_event *bp)
static bool same_task_bps_check(struct perf_event *bp) static bool same_task_bps_check(struct perf_event *bp)
{ {
struct breakpoint *tmp; struct breakpoint *tmp;
bool ret = false;
spin_lock(&task_bps_lock);
list_for_each_entry(tmp, &task_bps, list) { list_for_each_entry(tmp, &task_bps, list) {
if (tmp->bp->hw.target == bp->hw.target && if (tmp->bp->hw.target == bp->hw.target &&
!can_co_exist(tmp, bp)) !can_co_exist(tmp, bp)) {
return true; ret = true;
break;
}
} }
return false; spin_unlock(&task_bps_lock);
return ret;
} }
static int cpu_bps_add(struct perf_event *bp) static int cpu_bps_add(struct perf_event *bp)
...@@ -234,6 +256,7 @@ static int cpu_bps_add(struct perf_event *bp) ...@@ -234,6 +256,7 @@ static int cpu_bps_add(struct perf_event *bp)
if (IS_ERR(tmp)) if (IS_ERR(tmp))
return PTR_ERR(tmp); return PTR_ERR(tmp);
spin_lock(&cpu_bps_lock);
cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
for (i = 0; i < nr_wp_slots(); i++) { for (i = 0; i < nr_wp_slots(); i++) {
if (!cpu_bp[i]) { if (!cpu_bp[i]) {
...@@ -241,6 +264,7 @@ static int cpu_bps_add(struct perf_event *bp) ...@@ -241,6 +264,7 @@ static int cpu_bps_add(struct perf_event *bp)
break; break;
} }
} }
spin_unlock(&cpu_bps_lock);
return 0; return 0;
} }
...@@ -249,6 +273,7 @@ static void cpu_bps_remove(struct perf_event *bp) ...@@ -249,6 +273,7 @@ static void cpu_bps_remove(struct perf_event *bp)
struct breakpoint **cpu_bp; struct breakpoint **cpu_bp;
int i = 0; int i = 0;
spin_lock(&cpu_bps_lock);
cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
for (i = 0; i < nr_wp_slots(); i++) { for (i = 0; i < nr_wp_slots(); i++) {
if (!cpu_bp[i]) if (!cpu_bp[i])
...@@ -260,19 +285,25 @@ static void cpu_bps_remove(struct perf_event *bp) ...@@ -260,19 +285,25 @@ static void cpu_bps_remove(struct perf_event *bp)
break; break;
} }
} }
spin_unlock(&cpu_bps_lock);
} }
static bool cpu_bps_check(int cpu, struct perf_event *bp) static bool cpu_bps_check(int cpu, struct perf_event *bp)
{ {
struct breakpoint **cpu_bp; struct breakpoint **cpu_bp;
bool ret = false;
int i; int i;
spin_lock(&cpu_bps_lock);
cpu_bp = per_cpu_ptr(cpu_bps, cpu); cpu_bp = per_cpu_ptr(cpu_bps, cpu);
for (i = 0; i < nr_wp_slots(); i++) { for (i = 0; i < nr_wp_slots(); i++) {
if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
return true; ret = true;
break;
}
} }
return false; spin_unlock(&cpu_bps_lock);
return ret;
} }
static bool all_cpu_bps_check(struct perf_event *bp) static bool all_cpu_bps_check(struct perf_event *bp)
...@@ -286,10 +317,6 @@ static bool all_cpu_bps_check(struct perf_event *bp) ...@@ -286,10 +317,6 @@ static bool all_cpu_bps_check(struct perf_event *bp)
return false; return false;
} }
/*
* We don't use any locks to serialize accesses to cpu_bps or task_bps
* because are already inside nr_bp_mutex.
*/
int arch_reserve_bp_slot(struct perf_event *bp) int arch_reserve_bp_slot(struct perf_event *bp)
{ {
int ret; int ret;
......
...@@ -2314,16 +2314,20 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ...@@ -2314,16 +2314,20 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
cpuhw = this_cpu_ptr(&cpu_hw_events); cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(event, cpuhw); power_pmu_bhrb_read(event, cpuhw);
data.br_stack = &cpuhw->bhrb_stack; data.br_stack = &cpuhw->bhrb_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
} }
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
ppmu->get_mem_data_src) ppmu->get_mem_data_src) {
ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs); ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
data.sample_flags |= PERF_SAMPLE_DATA_SRC;
}
if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE && if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
ppmu->get_mem_weight) ppmu->get_mem_weight) {
ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type); ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
if (perf_event_overflow(event, &data, regs)) if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0); power_pmu_stop(event, 0);
} else if (period) { } else if (period) {
......
...@@ -664,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event, ...@@ -664,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event,
raw.frag.data = cpuhw->stop; raw.frag.data = cpuhw->stop;
raw.size = raw.frag.size; raw.size = raw.frag.size;
data.raw = &raw; data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
} }
overflow = perf_event_overflow(event, &data, &regs); overflow = perf_event_overflow(event, &data, &regs);
......
...@@ -366,6 +366,7 @@ static int paicrypt_push_sample(void) ...@@ -366,6 +366,7 @@ static int paicrypt_push_sample(void)
raw.frag.data = cpump->save; raw.frag.data = cpump->save;
raw.size = raw.frag.size; raw.size = raw.frag.size;
data.raw = &raw; data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
} }
overflow = perf_event_overflow(event, &data, &regs); overflow = perf_event_overflow(event, &data, &regs);
......
...@@ -48,10 +48,7 @@ struct pmu; ...@@ -48,10 +48,7 @@ struct pmu;
/* Maximum number of UBC channels */ /* Maximum number of UBC channels */
#define HBP_NUM 2 #define HBP_NUM 2
static inline int hw_breakpoint_slots(int type) #define hw_breakpoint_slots(type) (HBP_NUM)
{
return HBP_NUM;
}
/* arch/sh/kernel/hw_breakpoint.c */ /* arch/sh/kernel/hw_breakpoint.c */
extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
......
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
obj-y += core.o probe.o obj-y += core.o probe.o utils.o
obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o
obj-y += amd/ obj-y += amd/
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
......
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_CPU_SUP_AMD) += core.o obj-$(CONFIG_CPU_SUP_AMD) += core.o lbr.o
obj-$(CONFIG_PERF_EVENTS_AMD_BRS) += brs.o obj-$(CONFIG_PERF_EVENTS_AMD_BRS) += brs.o
obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o
......
...@@ -81,7 +81,7 @@ static bool __init amd_brs_detect(void) ...@@ -81,7 +81,7 @@ static bool __init amd_brs_detect(void)
* a br_sel_map. Software filtering is not supported because it would not correlate well * a br_sel_map. Software filtering is not supported because it would not correlate well
* with a sampling period. * with a sampling period.
*/ */
int amd_brs_setup_filter(struct perf_event *event) static int amd_brs_setup_filter(struct perf_event *event)
{ {
u64 type = event->attr.branch_sample_type; u64 type = event->attr.branch_sample_type;
...@@ -96,6 +96,73 @@ int amd_brs_setup_filter(struct perf_event *event) ...@@ -96,6 +96,73 @@ int amd_brs_setup_filter(struct perf_event *event)
return 0; return 0;
} }
static inline int amd_is_brs_event(struct perf_event *e)
{
return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
}
int amd_brs_hw_config(struct perf_event *event)
{
int ret = 0;
/*
* Due to interrupt holding, BRS is not recommended in
* counting mode.
*/
if (!is_sampling_event(event))
return -EINVAL;
/*
* Due to the way BRS operates by holding the interrupt until
* lbr_nr entries have been captured, it does not make sense
* to allow sampling on BRS with an event that does not match
* what BRS is capturing, i.e., retired taken branches.
* Otherwise the correlation with the event's period is even
* more loose:
*
* With retired taken branch:
* Effective P = P + 16 + X
* With any other event:
* Effective P = P + Y + X
*
* Where X is the number of taken branches due to interrupt
* skid. Skid is large.
*
* Where Y is the occurences of the event while BRS is
* capturing the lbr_nr entries.
*
* By using retired taken branches, we limit the impact on the
* Y variable. We know it cannot be more than the depth of
* BRS.
*/
if (!amd_is_brs_event(event))
return -EINVAL;
/*
* BRS implementation does not work with frequency mode
* reprogramming of the period.
*/
if (event->attr.freq)
return -EINVAL;
/*
* The kernel subtracts BRS depth from period, so it must
* be big enough.
*/
if (event->attr.sample_period <= x86_pmu.lbr_nr)
return -EINVAL;
/*
* Check if we can allow PERF_SAMPLE_BRANCH_STACK
*/
ret = amd_brs_setup_filter(event);
/* only set in case of success */
if (!ret)
event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
return ret;
}
/* tos = top of stack, i.e., last valid entry written */ /* tos = top of stack, i.e., last valid entry written */
static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
{ {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -72,6 +72,10 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add); ...@@ -72,6 +72,10 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del); DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read); DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
DEFINE_STATIC_CALL_NULL(x86_pmu_set_period, *x86_pmu.set_period);
DEFINE_STATIC_CALL_NULL(x86_pmu_update, *x86_pmu.update);
DEFINE_STATIC_CALL_NULL(x86_pmu_limit_period, *x86_pmu.limit_period);
DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events); DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events);
DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints); DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints); DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
...@@ -116,9 +120,6 @@ u64 x86_perf_event_update(struct perf_event *event) ...@@ -116,9 +120,6 @@ u64 x86_perf_event_update(struct perf_event *event)
if (unlikely(!hwc->event_base)) if (unlikely(!hwc->event_base))
return 0; return 0;
if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
return x86_pmu.update_topdown_event(event);
/* /*
* Careful: an NMI might modify the previous event value. * Careful: an NMI might modify the previous event value.
* *
...@@ -621,8 +622,9 @@ int x86_pmu_hw_config(struct perf_event *event) ...@@ -621,8 +622,9 @@ int x86_pmu_hw_config(struct perf_event *event)
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
if (event->attr.sample_period && x86_pmu.limit_period) { if (event->attr.sample_period && x86_pmu.limit_period) {
if (x86_pmu.limit_period(event, event->attr.sample_period) > s64 left = event->attr.sample_period;
event->attr.sample_period) x86_pmu.limit_period(event, &left);
if (left > event->attr.sample_period)
return -EINVAL; return -EINVAL;
} }
...@@ -1354,7 +1356,7 @@ static void x86_pmu_enable(struct pmu *pmu) ...@@ -1354,7 +1356,7 @@ static void x86_pmu_enable(struct pmu *pmu)
static_call(x86_pmu_enable_all)(added); static_call(x86_pmu_enable_all)(added);
} }
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
/* /*
* Set the next IRQ period, based on the hwc->period_left value. * Set the next IRQ period, based on the hwc->period_left value.
...@@ -1370,10 +1372,6 @@ int x86_perf_event_set_period(struct perf_event *event) ...@@ -1370,10 +1372,6 @@ int x86_perf_event_set_period(struct perf_event *event)
if (unlikely(!hwc->event_base)) if (unlikely(!hwc->event_base))
return 0; return 0;
if (unlikely(is_topdown_count(event)) &&
x86_pmu.set_topdown_event_period)
return x86_pmu.set_topdown_event_period(event);
/* /*
* If we are way outside a reasonable range then just skip forward: * If we are way outside a reasonable range then just skip forward:
*/ */
...@@ -1399,10 +1397,9 @@ int x86_perf_event_set_period(struct perf_event *event) ...@@ -1399,10 +1397,9 @@ int x86_perf_event_set_period(struct perf_event *event)
if (left > x86_pmu.max_period) if (left > x86_pmu.max_period)
left = x86_pmu.max_period; left = x86_pmu.max_period;
if (x86_pmu.limit_period) static_call_cond(x86_pmu_limit_period)(event, &left);
left = x86_pmu.limit_period(event, left);
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; this_cpu_write(pmc_prev_left[idx], left);
/* /*
* The hw event starts counting from this event offset, * The hw event starts counting from this event offset,
...@@ -1419,16 +1416,6 @@ int x86_perf_event_set_period(struct perf_event *event) ...@@ -1419,16 +1416,6 @@ int x86_perf_event_set_period(struct perf_event *event)
if (is_counter_pair(hwc)) if (is_counter_pair(hwc))
wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff); wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
/*
* Due to erratum on certan cpu we need
* a second write to be sure the register
* is updated properly
*/
if (x86_pmu.perfctr_second_write) {
wrmsrl(hwc->event_base,
(u64)(-left) & x86_pmu.cntval_mask);
}
perf_event_update_userpage(event); perf_event_update_userpage(event);
return ret; return ret;
...@@ -1518,7 +1505,7 @@ static void x86_pmu_start(struct perf_event *event, int flags) ...@@ -1518,7 +1505,7 @@ static void x86_pmu_start(struct perf_event *event, int flags)
if (flags & PERF_EF_RELOAD) { if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
x86_perf_event_set_period(event); static_call(x86_pmu_set_period)(event);
} }
event->hw.state = 0; event->hw.state = 0;
...@@ -1610,7 +1597,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) ...@@ -1610,7 +1597,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
* Drain the remaining delta count out of a event * Drain the remaining delta count out of a event
* that we are disabling: * that we are disabling:
*/ */
x86_perf_event_update(event); static_call(x86_pmu_update)(event);
hwc->state |= PERF_HES_UPTODATE; hwc->state |= PERF_HES_UPTODATE;
} }
} }
...@@ -1700,7 +1687,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) ...@@ -1700,7 +1687,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
event = cpuc->events[idx]; event = cpuc->events[idx];
val = x86_perf_event_update(event); val = static_call(x86_pmu_update)(event);
if (val & (1ULL << (x86_pmu.cntval_bits - 1))) if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
continue; continue;
...@@ -1709,13 +1696,15 @@ int x86_pmu_handle_irq(struct pt_regs *regs) ...@@ -1709,13 +1696,15 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
*/ */
handled++; handled++;
if (!x86_perf_event_set_period(event)) if (!static_call(x86_pmu_set_period)(event))
continue; continue;
perf_sample_data_init(&data, 0, event->hw.last_period); perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event)) if (has_branch_stack(event)) {
data.br_stack = &cpuc->lbr_stack; data.br_stack = &cpuc->lbr_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (perf_event_overflow(event, &data, regs)) if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0); x86_pmu_stop(event, 0);
...@@ -2023,6 +2012,10 @@ static void x86_pmu_static_call_update(void) ...@@ -2023,6 +2012,10 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_del, x86_pmu.del); static_call_update(x86_pmu_del, x86_pmu.del);
static_call_update(x86_pmu_read, x86_pmu.read); static_call_update(x86_pmu_read, x86_pmu.read);
static_call_update(x86_pmu_set_period, x86_pmu.set_period);
static_call_update(x86_pmu_update, x86_pmu.update);
static_call_update(x86_pmu_limit_period, x86_pmu.limit_period);
static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events); static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints); static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints); static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
...@@ -2042,7 +2035,7 @@ static void x86_pmu_static_call_update(void) ...@@ -2042,7 +2035,7 @@ static void x86_pmu_static_call_update(void)
static void _x86_pmu_read(struct perf_event *event) static void _x86_pmu_read(struct perf_event *event)
{ {
x86_perf_event_update(event); static_call(x86_pmu_update)(event);
} }
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
...@@ -2149,6 +2142,12 @@ static int __init init_hw_perf_events(void) ...@@ -2149,6 +2142,12 @@ static int __init init_hw_perf_events(void)
if (!x86_pmu.guest_get_msrs) if (!x86_pmu.guest_get_msrs)
x86_pmu.guest_get_msrs = (void *)&__static_call_return0; x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
if (!x86_pmu.set_period)
x86_pmu.set_period = x86_perf_event_set_period;
if (!x86_pmu.update)
x86_pmu.update = x86_perf_event_update;
x86_pmu_static_call_update(); x86_pmu_static_call_update();
/* /*
...@@ -2670,7 +2669,9 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value) ...@@ -2670,7 +2669,9 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
return -EINVAL; return -EINVAL;
if (value && x86_pmu.limit_period) { if (value && x86_pmu.limit_period) {
if (x86_pmu.limit_period(event, value) > value) s64 left = value;
x86_pmu.limit_period(event, &left);
if (left > value)
return -EINVAL; return -EINVAL;
} }
......
...@@ -2199,6 +2199,12 @@ static void __intel_pmu_enable_all(int added, bool pmi) ...@@ -2199,6 +2199,12 @@ static void __intel_pmu_enable_all(int added, bool pmi)
u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
intel_pmu_lbr_enable_all(pmi); intel_pmu_lbr_enable_all(pmi);
if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) {
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val;
}
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
intel_ctrl & ~cpuc->intel_ctrl_guest_mask); intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
...@@ -2311,7 +2317,7 @@ static void intel_pmu_nhm_workaround(void) ...@@ -2311,7 +2317,7 @@ static void intel_pmu_nhm_workaround(void)
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
event = cpuc->events[i]; event = cpuc->events[i];
if (event) if (event)
x86_perf_event_update(event); static_call(x86_pmu_update)(event);
} }
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
...@@ -2326,7 +2332,7 @@ static void intel_pmu_nhm_workaround(void) ...@@ -2326,7 +2332,7 @@ static void intel_pmu_nhm_workaround(void)
event = cpuc->events[i]; event = cpuc->events[i];
if (event) { if (event) {
x86_perf_event_set_period(event); static_call(x86_pmu_set_period)(event);
__x86_pmu_enable_event(&event->hw, __x86_pmu_enable_event(&event->hw,
ARCH_PERFMON_EVENTSEL_ENABLE); ARCH_PERFMON_EVENTSEL_ENABLE);
} else } else
...@@ -2416,9 +2422,10 @@ static inline void intel_clear_masks(struct perf_event *event, int idx) ...@@ -2416,9 +2422,10 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
static void intel_pmu_disable_fixed(struct perf_event *event) static void intel_pmu_disable_fixed(struct perf_event *event)
{ {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
u64 ctrl_val, mask;
int idx = hwc->idx; int idx = hwc->idx;
u64 mask;
if (is_topdown_idx(idx)) { if (is_topdown_idx(idx)) {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
...@@ -2435,9 +2442,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event) ...@@ -2435,9 +2442,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
intel_clear_masks(event, idx); intel_clear_masks(event, idx);
mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4); mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
rdmsrl(hwc->config_base, ctrl_val); cpuc->fixed_ctrl_val &= ~mask;
ctrl_val &= ~mask;
wrmsrl(hwc->config_base, ctrl_val);
} }
static void intel_pmu_disable_event(struct perf_event *event) static void intel_pmu_disable_event(struct perf_event *event)
...@@ -2530,6 +2535,8 @@ static int adl_set_topdown_event_period(struct perf_event *event) ...@@ -2530,6 +2535,8 @@ static int adl_set_topdown_event_period(struct perf_event *event)
return icl_set_topdown_event_period(event); return icl_set_topdown_event_period(event);
} }
DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period);
static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
{ {
u32 val; u32 val;
...@@ -2680,6 +2687,7 @@ static u64 adl_update_topdown_event(struct perf_event *event) ...@@ -2680,6 +2687,7 @@ static u64 adl_update_topdown_event(struct perf_event *event)
return icl_update_topdown_event(event); return icl_update_topdown_event(event);
} }
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
static void intel_pmu_read_topdown_event(struct perf_event *event) static void intel_pmu_read_topdown_event(struct perf_event *event)
{ {
...@@ -2691,7 +2699,7 @@ static void intel_pmu_read_topdown_event(struct perf_event *event) ...@@ -2691,7 +2699,7 @@ static void intel_pmu_read_topdown_event(struct perf_event *event)
return; return;
perf_pmu_disable(event->pmu); perf_pmu_disable(event->pmu);
x86_pmu.update_topdown_event(event); static_call(intel_pmu_update_topdown_event)(event);
perf_pmu_enable(event->pmu); perf_pmu_enable(event->pmu);
} }
...@@ -2699,7 +2707,7 @@ static void intel_pmu_read_event(struct perf_event *event) ...@@ -2699,7 +2707,7 @@ static void intel_pmu_read_event(struct perf_event *event)
{ {
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_auto_reload_read(event); intel_pmu_auto_reload_read(event);
else if (is_topdown_count(event) && x86_pmu.update_topdown_event) else if (is_topdown_count(event))
intel_pmu_read_topdown_event(event); intel_pmu_read_topdown_event(event);
else else
x86_perf_event_update(event); x86_perf_event_update(event);
...@@ -2707,8 +2715,9 @@ static void intel_pmu_read_event(struct perf_event *event) ...@@ -2707,8 +2715,9 @@ static void intel_pmu_read_event(struct perf_event *event)
static void intel_pmu_enable_fixed(struct perf_event *event) static void intel_pmu_enable_fixed(struct perf_event *event)
{ {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
u64 ctrl_val, mask, bits = 0; u64 mask, bits = 0;
int idx = hwc->idx; int idx = hwc->idx;
if (is_topdown_idx(idx)) { if (is_topdown_idx(idx)) {
...@@ -2752,10 +2761,8 @@ static void intel_pmu_enable_fixed(struct perf_event *event) ...@@ -2752,10 +2761,8 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4); mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
} }
rdmsrl(hwc->config_base, ctrl_val); cpuc->fixed_ctrl_val &= ~mask;
ctrl_val &= ~mask; cpuc->fixed_ctrl_val |= bits;
ctrl_val |= bits;
wrmsrl(hwc->config_base, ctrl_val);
} }
static void intel_pmu_enable_event(struct perf_event *event) static void intel_pmu_enable_event(struct perf_event *event)
...@@ -2803,7 +2810,7 @@ static void intel_pmu_add_event(struct perf_event *event) ...@@ -2803,7 +2810,7 @@ static void intel_pmu_add_event(struct perf_event *event)
*/ */
int intel_pmu_save_and_restart(struct perf_event *event) int intel_pmu_save_and_restart(struct perf_event *event)
{ {
x86_perf_event_update(event); static_call(x86_pmu_update)(event);
/* /*
* For a checkpointed counter always reset back to 0. This * For a checkpointed counter always reset back to 0. This
* avoids a situation where the counter overflows, aborts the * avoids a situation where the counter overflows, aborts the
...@@ -2815,9 +2822,25 @@ int intel_pmu_save_and_restart(struct perf_event *event) ...@@ -2815,9 +2822,25 @@ int intel_pmu_save_and_restart(struct perf_event *event)
wrmsrl(event->hw.event_base, 0); wrmsrl(event->hw.event_base, 0);
local64_set(&event->hw.prev_count, 0); local64_set(&event->hw.prev_count, 0);
} }
return static_call(x86_pmu_set_period)(event);
}
static int intel_pmu_set_period(struct perf_event *event)
{
if (unlikely(is_topdown_count(event)))
return static_call(intel_pmu_set_topdown_event_period)(event);
return x86_perf_event_set_period(event); return x86_perf_event_set_period(event);
} }
static u64 intel_pmu_update(struct perf_event *event)
{
if (unlikely(is_topdown_count(event)))
return static_call(intel_pmu_update_topdown_event)(event);
return x86_perf_event_update(event);
}
static void intel_pmu_reset(void) static void intel_pmu_reset(void)
{ {
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
...@@ -2980,8 +3003,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) ...@@ -2980,8 +3003,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/ */
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) { if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
handled++; handled++;
if (x86_pmu.update_topdown_event) static_call(intel_pmu_update_topdown_event)(NULL);
x86_pmu.update_topdown_event(NULL);
} }
/* /*
...@@ -3004,8 +3026,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) ...@@ -3004,8 +3026,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period); perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event)) if (has_branch_stack(event)) {
data.br_stack = &cpuc->lbr_stack; data.br_stack = &cpuc->lbr_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (perf_event_overflow(event, &data, regs)) if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0); x86_pmu_stop(event, 0);
...@@ -3853,9 +3877,6 @@ static int intel_pmu_hw_config(struct perf_event *event) ...@@ -3853,9 +3877,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
} }
if (x86_pmu.pebs_aliases) if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event); x86_pmu.pebs_aliases(event);
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
} }
if (needs_branch_stack(event)) { if (needs_branch_stack(event)) {
...@@ -4334,28 +4355,25 @@ static u8 adl_get_hybrid_cpu_type(void) ...@@ -4334,28 +4355,25 @@ static u8 adl_get_hybrid_cpu_type(void)
* Therefore the effective (average) period matches the requested period, * Therefore the effective (average) period matches the requested period,
* despite coarser hardware granularity. * despite coarser hardware granularity.
*/ */
static u64 bdw_limit_period(struct perf_event *event, u64 left) static void bdw_limit_period(struct perf_event *event, s64 *left)
{ {
if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
X86_CONFIG(.event=0xc0, .umask=0x01)) { X86_CONFIG(.event=0xc0, .umask=0x01)) {
if (left < 128) if (*left < 128)
left = 128; *left = 128;
left &= ~0x3fULL; *left &= ~0x3fULL;
} }
return left;
} }
static u64 nhm_limit_period(struct perf_event *event, u64 left) static void nhm_limit_period(struct perf_event *event, s64 *left)
{ {
return max(left, 32ULL); *left = max(*left, 32LL);
} }
static u64 spr_limit_period(struct perf_event *event, u64 left) static void spr_limit_period(struct perf_event *event, s64 *left)
{ {
if (event->attr.precise_ip == 3) if (event->attr.precise_ip == 3)
return max(left, 128ULL); *left = max(*left, 128LL);
return left;
} }
PMU_FORMAT_ATTR(event, "config:0-7" ); PMU_FORMAT_ATTR(event, "config:0-7" );
...@@ -4794,6 +4812,8 @@ static __initconst const struct x86_pmu intel_pmu = { ...@@ -4794,6 +4812,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.add = intel_pmu_add_event, .add = intel_pmu_add_event,
.del = intel_pmu_del_event, .del = intel_pmu_del_event,
.read = intel_pmu_read_event, .read = intel_pmu_read_event,
.set_period = intel_pmu_set_period,
.update = intel_pmu_update,
.hw_config = intel_pmu_hw_config, .hw_config = intel_pmu_hw_config,
.schedule_events = x86_schedule_events, .schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
...@@ -6312,8 +6332,10 @@ __init int intel_pmu_init(void) ...@@ -6312,8 +6332,10 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_pt_coexist = true; x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem); intel_pmu_pebs_data_source_skl(pmem);
x86_pmu.num_topdown_events = 4; x86_pmu.num_topdown_events = 4;
x86_pmu.update_topdown_event = icl_update_topdown_event; static_call_update(intel_pmu_update_topdown_event,
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period; &icl_update_topdown_event);
static_call_update(intel_pmu_set_topdown_event_period,
&icl_set_topdown_event_period);
pr_cont("Icelake events, "); pr_cont("Icelake events, ");
name = "icelake"; name = "icelake";
break; break;
...@@ -6348,8 +6370,10 @@ __init int intel_pmu_init(void) ...@@ -6348,8 +6370,10 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_pt_coexist = true; x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem); intel_pmu_pebs_data_source_skl(pmem);
x86_pmu.num_topdown_events = 8; x86_pmu.num_topdown_events = 8;
x86_pmu.update_topdown_event = icl_update_topdown_event; static_call_update(intel_pmu_update_topdown_event,
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period; &icl_update_topdown_event);
static_call_update(intel_pmu_set_topdown_event_period,
&icl_set_topdown_event_period);
pr_cont("Sapphire Rapids events, "); pr_cont("Sapphire Rapids events, ");
name = "sapphire_rapids"; name = "sapphire_rapids";
break; break;
...@@ -6358,6 +6382,7 @@ __init int intel_pmu_init(void) ...@@ -6358,6 +6382,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ALDERLAKE_L: case INTEL_FAM6_ALDERLAKE_L:
case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE:
case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_P:
case INTEL_FAM6_RAPTORLAKE_S:
/* /*
* Alder Lake has 2 types of CPU, core and atom. * Alder Lake has 2 types of CPU, core and atom.
* *
...@@ -6382,8 +6407,10 @@ __init int intel_pmu_init(void) ...@@ -6382,8 +6407,10 @@ __init int intel_pmu_init(void)
intel_pmu_pebs_data_source_adl(); intel_pmu_pebs_data_source_adl();
x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.pebs_latency_data = adl_latency_data_small;
x86_pmu.num_topdown_events = 8; x86_pmu.num_topdown_events = 8;
x86_pmu.update_topdown_event = adl_update_topdown_event; static_call_update(intel_pmu_update_topdown_event,
x86_pmu.set_topdown_event_period = adl_set_topdown_event_period; &adl_update_topdown_event);
static_call_update(intel_pmu_set_topdown_event_period,
&adl_set_topdown_event_period);
x86_pmu.filter_match = intel_pmu_filter_match; x86_pmu.filter_match = intel_pmu_filter_match;
x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.get_event_constraints = adl_get_event_constraints;
......
...@@ -685,6 +685,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { ...@@ -685,6 +685,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates),
{ }, { },
}; };
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
......
...@@ -1540,14 +1540,18 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, ...@@ -1540,14 +1540,18 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
/* /*
* Use latency for weight (only avail with PEBS-LL) * Use latency for weight (only avail with PEBS-LL)
*/ */
if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
data->weight.full = pebs->lat; data->weight.full = pebs->lat;
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
/* /*
* data.data_src encodes the data source * data.data_src encodes the data source
*/ */
if (sample_type & PERF_SAMPLE_DATA_SRC) if (sample_type & PERF_SAMPLE_DATA_SRC) {
data->data_src.val = get_data_src(event, pebs->dse); data->data_src.val = get_data_src(event, pebs->dse);
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}
/* /*
* We must however always use iregs for the unwinder to stay sane; the * We must however always use iregs for the unwinder to stay sane; the
...@@ -1555,8 +1559,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, ...@@ -1555,8 +1559,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and * previous PMI context or an (I)RET happened between the record and
* PMI. * PMI.
*/ */
if (sample_type & PERF_SAMPLE_CALLCHAIN) if (sample_type & PERF_SAMPLE_CALLCHAIN) {
data->callchain = perf_callchain(event, iregs); data->callchain = perf_callchain(event, iregs);
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
/* /*
* We use the interrupt regs as a base because the PEBS record does not * We use the interrupt regs as a base because the PEBS record does not
...@@ -1628,17 +1634,22 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, ...@@ -1628,17 +1634,22 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
if ((sample_type & PERF_SAMPLE_ADDR_TYPE) && if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
x86_pmu.intel_cap.pebs_format >= 1) x86_pmu.intel_cap.pebs_format >= 1) {
data->addr = pebs->dla; data->addr = pebs->dla;
data->sample_flags |= PERF_SAMPLE_ADDR;
}
if (x86_pmu.intel_cap.pebs_format >= 2) { if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */ /* Only set the TSX weight when no memory weight. */
if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning); data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
if (sample_type & PERF_SAMPLE_TRANSACTION) }
if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
pebs->ax); pebs->ax);
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
}
} }
/* /*
...@@ -1648,11 +1659,15 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, ...@@ -1648,11 +1659,15 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
* We can only do this for the default trace clock. * We can only do this for the default trace clock.
*/ */
if (x86_pmu.intel_cap.pebs_format >= 3 && if (x86_pmu.intel_cap.pebs_format >= 3 &&
event->attr.use_clockid == 0) event->attr.use_clockid == 0) {
data->time = native_sched_clock_from_tsc(pebs->tsc); data->time = native_sched_clock_from_tsc(pebs->tsc);
data->sample_flags |= PERF_SAMPLE_TIME;
}
if (has_branch_stack(event)) if (has_branch_stack(event)) {
data->br_stack = &cpuc->lbr_stack; data->br_stack = &cpuc->lbr_stack;
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
} }
static void adaptive_pebs_save_regs(struct pt_regs *regs, static void adaptive_pebs_save_regs(struct pt_regs *regs,
...@@ -1710,8 +1725,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, ...@@ -1710,8 +1725,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
perf_sample_data_init(data, 0, event->hw.last_period); perf_sample_data_init(data, 0, event->hw.last_period);
data->period = event->hw.last_period; data->period = event->hw.last_period;
if (event->attr.use_clockid == 0) if (event->attr.use_clockid == 0) {
data->time = native_sched_clock_from_tsc(basic->tsc); data->time = native_sched_clock_from_tsc(basic->tsc);
data->sample_flags |= PERF_SAMPLE_TIME;
}
/* /*
* We must however always use iregs for the unwinder to stay sane; the * We must however always use iregs for the unwinder to stay sane; the
...@@ -1719,8 +1736,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, ...@@ -1719,8 +1736,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and * previous PMI context or an (I)RET happened between the record and
* PMI. * PMI.
*/ */
if (sample_type & PERF_SAMPLE_CALLCHAIN) if (sample_type & PERF_SAMPLE_CALLCHAIN) {
data->callchain = perf_callchain(event, iregs); data->callchain = perf_callchain(event, iregs);
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
*regs = *iregs; *regs = *iregs;
/* The ip in basic is EventingIP */ /* The ip in basic is EventingIP */
...@@ -1771,17 +1790,24 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, ...@@ -1771,17 +1790,24 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?: data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
intel_get_tsx_weight(meminfo->tsx_tuning); intel_get_tsx_weight(meminfo->tsx_tuning);
} }
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
} }
if (sample_type & PERF_SAMPLE_DATA_SRC) if (sample_type & PERF_SAMPLE_DATA_SRC) {
data->data_src.val = get_data_src(event, meminfo->aux); data->data_src.val = get_data_src(event, meminfo->aux);
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}
if (sample_type & PERF_SAMPLE_ADDR_TYPE) if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
data->addr = meminfo->address; data->addr = meminfo->address;
data->sample_flags |= PERF_SAMPLE_ADDR;
}
if (sample_type & PERF_SAMPLE_TRANSACTION) if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
gprs ? gprs->ax : 0); gprs ? gprs->ax : 0);
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
}
} }
if (format_size & PEBS_DATACFG_XMMS) { if (format_size & PEBS_DATACFG_XMMS) {
...@@ -1800,6 +1826,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, ...@@ -1800,6 +1826,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) { if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr); intel_pmu_store_pebs_lbrs(lbr);
data->br_stack = &cpuc->lbr_stack; data->br_stack = &cpuc->lbr_stack;
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
} }
} }
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
#include <asm/perf_event.h> #include <asm/perf_event.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/insn.h>
#include "../perf_event.h" #include "../perf_event.h"
...@@ -65,65 +64,6 @@ ...@@ -65,65 +64,6 @@
#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
/*
* x86control flow change classification
* x86control flow changes include branches, interrupts, traps, faults
*/
enum {
X86_BR_NONE = 0, /* unknown */
X86_BR_USER = 1 << 0, /* branch target is user */
X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
X86_BR_CALL = 1 << 2, /* call */
X86_BR_RET = 1 << 3, /* return */
X86_BR_SYSCALL = 1 << 4, /* syscall */
X86_BR_SYSRET = 1 << 5, /* syscall return */
X86_BR_INT = 1 << 6, /* sw interrupt */
X86_BR_IRET = 1 << 7, /* return from interrupt */
X86_BR_JCC = 1 << 8, /* conditional */
X86_BR_JMP = 1 << 9, /* jump */
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
X86_BR_ABORT = 1 << 12,/* transaction abort */
X86_BR_IN_TX = 1 << 13,/* in transaction */
X86_BR_NO_TX = 1 << 14,/* not in transaction */
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
X86_BR_CALL_STACK = 1 << 16,/* call stack */
X86_BR_IND_JMP = 1 << 17,/* indirect jump */
X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
#define X86_BR_ANY \
(X86_BR_CALL |\
X86_BR_RET |\
X86_BR_SYSCALL |\
X86_BR_SYSRET |\
X86_BR_INT |\
X86_BR_IRET |\
X86_BR_JCC |\
X86_BR_JMP |\
X86_BR_IRQ |\
X86_BR_ABORT |\
X86_BR_IND_CALL |\
X86_BR_IND_JMP |\
X86_BR_ZERO_CALL)
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
#define X86_BR_ANY_CALL \
(X86_BR_CALL |\
X86_BR_IND_CALL |\
X86_BR_ZERO_CALL |\
X86_BR_SYSCALL |\
X86_BR_IRQ |\
X86_BR_INT)
/* /*
* Intel LBR_CTL bits * Intel LBR_CTL bits
* *
...@@ -1151,219 +1091,6 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) ...@@ -1151,219 +1091,6 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
return ret; return ret;
} }
/*
* return the type of control flow change at address "from"
* instruction is not necessarily a branch (in case of interrupt).
*
* The branch type returned also includes the priv level of the
* target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
*
* If a branch type is unknown OR the instruction cannot be
* decoded (e.g., text page not present), then X86_BR_NONE is
* returned.
*/
static int branch_type(unsigned long from, unsigned long to, int abort)
{
struct insn insn;
void *addr;
int bytes_read, bytes_left;
int ret = X86_BR_NONE;
int ext, to_plm, from_plm;
u8 buf[MAX_INSN_SIZE];
int is64 = 0;
to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
/*
* maybe zero if lbr did not fill up after a reset by the time
* we get a PMU interrupt
*/
if (from == 0 || to == 0)
return X86_BR_NONE;
if (abort)
return X86_BR_ABORT | to_plm;
if (from_plm == X86_BR_USER) {
/*
* can happen if measuring at the user level only
* and we interrupt in a kernel thread, e.g., idle.
*/
if (!current->mm)
return X86_BR_NONE;
/* may fail if text not present */
bytes_left = copy_from_user_nmi(buf, (void __user *)from,
MAX_INSN_SIZE);
bytes_read = MAX_INSN_SIZE - bytes_left;
if (!bytes_read)
return X86_BR_NONE;
addr = buf;
} else {
/*
* The LBR logs any address in the IP, even if the IP just
* faulted. This means userspace can control the from address.
* Ensure we don't blindly read any address by validating it is
* a known text address.
*/
if (kernel_text_address(from)) {
addr = (void *)from;
/*
* Assume we can get the maximum possible size
* when grabbing kernel data. This is not
* _strictly_ true since we could possibly be
* executing up next to a memory hole, but
* it is very unlikely to be a problem.
*/
bytes_read = MAX_INSN_SIZE;
} else {
return X86_BR_NONE;
}
}
/*
* decoder needs to know the ABI especially
* on 64-bit systems running 32-bit apps
*/
#ifdef CONFIG_X86_64
is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
#endif
insn_init(&insn, addr, bytes_read, is64);
if (insn_get_opcode(&insn))
return X86_BR_ABORT;
switch (insn.opcode.bytes[0]) {
case 0xf:
switch (insn.opcode.bytes[1]) {
case 0x05: /* syscall */
case 0x34: /* sysenter */
ret = X86_BR_SYSCALL;
break;
case 0x07: /* sysret */
case 0x35: /* sysexit */
ret = X86_BR_SYSRET;
break;
case 0x80 ... 0x8f: /* conditional */
ret = X86_BR_JCC;
break;
default:
ret = X86_BR_NONE;
}
break;
case 0x70 ... 0x7f: /* conditional */
ret = X86_BR_JCC;
break;
case 0xc2: /* near ret */
case 0xc3: /* near ret */
case 0xca: /* far ret */
case 0xcb: /* far ret */
ret = X86_BR_RET;
break;
case 0xcf: /* iret */
ret = X86_BR_IRET;
break;
case 0xcc ... 0xce: /* int */
ret = X86_BR_INT;
break;
case 0xe8: /* call near rel */
if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
/* zero length call */
ret = X86_BR_ZERO_CALL;
break;
}
fallthrough;
case 0x9a: /* call far absolute */
ret = X86_BR_CALL;
break;
case 0xe0 ... 0xe3: /* loop jmp */
ret = X86_BR_JCC;
break;
case 0xe9 ... 0xeb: /* jmp */
ret = X86_BR_JMP;
break;
case 0xff: /* call near absolute, call far absolute ind */
if (insn_get_modrm(&insn))
return X86_BR_ABORT;
ext = (insn.modrm.bytes[0] >> 3) & 0x7;
switch (ext) {
case 2: /* near ind call */
case 3: /* far ind call */
ret = X86_BR_IND_CALL;
break;
case 4:
case 5:
ret = X86_BR_IND_JMP;
break;
}
break;
default:
ret = X86_BR_NONE;
}
/*
* interrupts, traps, faults (and thus ring transition) may
* occur on any instructions. Thus, to classify them correctly,
* we need to first look at the from and to priv levels. If they
* are different and to is in the kernel, then it indicates
* a ring transition. If the from instruction is not a ring
* transition instr (syscall, systenter, int), then it means
* it was a irq, trap or fault.
*
* we have no way of detecting kernel to kernel faults.
*/
if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
&& ret != X86_BR_SYSCALL && ret != X86_BR_INT)
ret = X86_BR_IRQ;
/*
* branch priv level determined by target as
* is done by HW when LBR_SELECT is implemented
*/
if (ret != X86_BR_NONE)
ret |= to_plm;
return ret;
}
#define X86_BR_TYPE_MAP_MAX 16
static int branch_map[X86_BR_TYPE_MAP_MAX] = {
PERF_BR_CALL, /* X86_BR_CALL */
PERF_BR_RET, /* X86_BR_RET */
PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
PERF_BR_SYSRET, /* X86_BR_SYSRET */
PERF_BR_UNKNOWN, /* X86_BR_INT */
PERF_BR_ERET, /* X86_BR_IRET */
PERF_BR_COND, /* X86_BR_JCC */
PERF_BR_UNCOND, /* X86_BR_JMP */
PERF_BR_IRQ, /* X86_BR_IRQ */
PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
PERF_BR_UNKNOWN, /* X86_BR_ABORT */
PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
PERF_BR_CALL, /* X86_BR_ZERO_CALL */
PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
PERF_BR_IND, /* X86_BR_IND_JMP */
};
static int
common_branch_type(int type)
{
int i;
type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
if (type) {
i = __ffs(type);
if (i < X86_BR_TYPE_MAP_MAX)
return branch_map[i];
}
return PERF_BR_UNKNOWN;
}
enum { enum {
ARCH_LBR_BR_TYPE_JCC = 0, ARCH_LBR_BR_TYPE_JCC = 0,
ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1,
......
...@@ -1006,6 +1006,29 @@ static void p4_pmu_enable_all(int added) ...@@ -1006,6 +1006,29 @@ static void p4_pmu_enable_all(int added)
} }
} }
static int p4_pmu_set_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
s64 left = this_cpu_read(pmc_prev_left[hwc->idx]);
int ret;
ret = x86_perf_event_set_period(event);
if (hwc->event_base) {
/*
* This handles erratum N15 in intel doc 249199-029,
* the counter may not be updated correctly on write
* so we need a second write operation to do the trick
* (the official workaround didn't work)
*
* the former idea is taken from OProfile code
*/
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
}
return ret;
}
static int p4_pmu_handle_irq(struct pt_regs *regs) static int p4_pmu_handle_irq(struct pt_regs *regs)
{ {
struct perf_sample_data data; struct perf_sample_data data;
...@@ -1044,7 +1067,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) ...@@ -1044,7 +1067,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
/* event overflow for sure */ /* event overflow for sure */
perf_sample_data_init(&data, 0, hwc->last_period); perf_sample_data_init(&data, 0, hwc->last_period);
if (!x86_perf_event_set_period(event)) if (!static_call(x86_pmu_set_period)(event))
continue; continue;
...@@ -1316,6 +1339,9 @@ static __initconst const struct x86_pmu p4_pmu = { ...@@ -1316,6 +1339,9 @@ static __initconst const struct x86_pmu p4_pmu = {
.enable_all = p4_pmu_enable_all, .enable_all = p4_pmu_enable_all,
.enable = p4_pmu_enable_event, .enable = p4_pmu_enable_event,
.disable = p4_pmu_disable_event, .disable = p4_pmu_disable_event,
.set_period = p4_pmu_set_period,
.eventsel = MSR_P4_BPU_CCCR0, .eventsel = MSR_P4_BPU_CCCR0,
.perfctr = MSR_P4_BPU_PERFCTR0, .perfctr = MSR_P4_BPU_PERFCTR0,
.event_map = p4_pmu_event_map, .event_map = p4_pmu_event_map,
...@@ -1334,15 +1360,6 @@ static __initconst const struct x86_pmu p4_pmu = { ...@@ -1334,15 +1360,6 @@ static __initconst const struct x86_pmu p4_pmu = {
.max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
.hw_config = p4_hw_config, .hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events, .schedule_events = p4_pmu_schedule_events,
/*
* This handles erratum N15 in intel doc 249199-029,
* the counter may not be updated correctly on write
* so we need a second write operation to do the trick
* (the official workaround didn't work)
*
* the former idea is taken from OProfile code
*/
.perfctr_second_write = 1,
.format_attrs = intel_p4_formats_attr, .format_attrs = intel_p4_formats_attr,
}; };
......
...@@ -1831,6 +1831,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { ...@@ -1831,6 +1831,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{}, {},
......
...@@ -106,6 +106,7 @@ static bool test_intel(int idx, void *data) ...@@ -106,6 +106,7 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ALDERLAKE_N: case INTEL_FAM6_ALDERLAKE_N:
case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE:
case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_P:
case INTEL_FAM6_RAPTORLAKE_S:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true; return true;
break; break;
......
...@@ -64,27 +64,25 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) ...@@ -64,27 +64,25 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
return ((ecode & c->cmask) - c->code) <= (u64)c->size; return ((ecode & c->cmask) - c->code) <= (u64)c->size;
} }
#define PERF_ARCH(name, val) \
PERF_X86_EVENT_##name = val,
/* /*
* struct hw_perf_event.flags flags * struct hw_perf_event.flags flags
*/ */
#define PERF_X86_EVENT_PEBS_LDLAT 0x00001 /* ld+ldlat data address sampling */ enum {
#define PERF_X86_EVENT_PEBS_ST 0x00002 /* st data address sampling */ #include "perf_event_flags.h"
#define PERF_X86_EVENT_PEBS_ST_HSW 0x00004 /* haswell style datala, store */ };
#define PERF_X86_EVENT_PEBS_LD_HSW 0x00008 /* haswell style datala, load */
#define PERF_X86_EVENT_PEBS_NA_HSW 0x00010 /* haswell style datala, unknown */ #undef PERF_ARCH
#define PERF_X86_EVENT_EXCL 0x00020 /* HT exclusivity on counter */
#define PERF_X86_EVENT_DYNAMIC 0x00040 /* dynamic alloc'd constraint */ #define PERF_ARCH(name, val) \
static_assert((PERF_X86_EVENT_##name & PERF_EVENT_FLAG_ARCH) == \
#define PERF_X86_EVENT_EXCL_ACCT 0x00100 /* accounted EXCL event */ PERF_X86_EVENT_##name);
#define PERF_X86_EVENT_AUTO_RELOAD 0x00200 /* use PEBS auto-reload */
#define PERF_X86_EVENT_LARGE_PEBS 0x00400 /* use large PEBS */ #include "perf_event_flags.h"
#define PERF_X86_EVENT_PEBS_VIA_PT 0x00800 /* use PT buffer for PEBS */
#define PERF_X86_EVENT_PAIR 0x01000 /* Large Increment per Cycle */ #undef PERF_ARCH
#define PERF_X86_EVENT_LBR_SELECT 0x02000 /* Save/Restore MSR_LBR_SELECT */
#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */
#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */
#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */
#define PERF_X86_EVENT_PEBS_LAT_HYBRID 0x20000 /* ld and st lat for hybrid */
static inline bool is_topdown_count(struct perf_event *event) static inline bool is_topdown_count(struct perf_event *event)
{ {
...@@ -272,6 +270,10 @@ struct cpu_hw_events { ...@@ -272,6 +270,10 @@ struct cpu_hw_events {
u64 active_pebs_data_cfg; u64 active_pebs_data_cfg;
int pebs_record_size; int pebs_record_size;
/* Intel Fixed counter configuration */
u64 fixed_ctrl_val;
u64 active_fixed_ctrl_val;
/* /*
* Intel LBR bits * Intel LBR bits
*/ */
...@@ -745,6 +747,8 @@ struct x86_pmu { ...@@ -745,6 +747,8 @@ struct x86_pmu {
void (*add)(struct perf_event *); void (*add)(struct perf_event *);
void (*del)(struct perf_event *); void (*del)(struct perf_event *);
void (*read)(struct perf_event *event); void (*read)(struct perf_event *event);
int (*set_period)(struct perf_event *event);
u64 (*update)(struct perf_event *event);
int (*hw_config)(struct perf_event *event); int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel; unsigned eventsel;
...@@ -780,8 +784,7 @@ struct x86_pmu { ...@@ -780,8 +784,7 @@ struct x86_pmu {
struct event_constraint *event_constraints; struct event_constraint *event_constraints;
struct x86_pmu_quirk *quirks; struct x86_pmu_quirk *quirks;
int perfctr_second_write; void (*limit_period)(struct perf_event *event, s64 *l);
u64 (*limit_period)(struct perf_event *event, u64 l);
/* PMI handler bits */ /* PMI handler bits */
unsigned int late_ack :1, unsigned int late_ack :1,
...@@ -889,8 +892,6 @@ struct x86_pmu { ...@@ -889,8 +892,6 @@ struct x86_pmu {
* Intel perf metrics * Intel perf metrics
*/ */
int num_topdown_events; int num_topdown_events;
u64 (*update_topdown_event)(struct perf_event *event);
int (*set_topdown_event_period)(struct perf_event *event);
/* /*
* perf task context (i.e. struct perf_event_context::task_ctx_data) * perf task context (i.e. struct perf_event_context::task_ctx_data)
...@@ -1044,6 +1045,9 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ ...@@ -1044,6 +1045,9 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
struct pmu *x86_get_pmu(unsigned int cpu); struct pmu *x86_get_pmu(unsigned int cpu);
extern struct x86_pmu x86_pmu __read_mostly; extern struct x86_pmu x86_pmu __read_mostly;
DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period);
DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update);
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
{ {
if (static_cpu_has(X86_FEATURE_ARCH_LBR)) if (static_cpu_has(X86_FEATURE_ARCH_LBR))
...@@ -1059,6 +1063,7 @@ static inline bool x86_pmu_has_lbr_callstack(void) ...@@ -1059,6 +1063,7 @@ static inline bool x86_pmu_has_lbr_callstack(void)
} }
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
int x86_perf_event_set_period(struct perf_event *event); int x86_perf_event_set_period(struct perf_event *event);
...@@ -1210,6 +1215,70 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) ...@@ -1210,6 +1215,70 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
regs->ip = ip; regs->ip = ip;
} }
/*
* x86control flow change classification
* x86control flow changes include branches, interrupts, traps, faults
*/
enum {
X86_BR_NONE = 0, /* unknown */
X86_BR_USER = 1 << 0, /* branch target is user */
X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
X86_BR_CALL = 1 << 2, /* call */
X86_BR_RET = 1 << 3, /* return */
X86_BR_SYSCALL = 1 << 4, /* syscall */
X86_BR_SYSRET = 1 << 5, /* syscall return */
X86_BR_INT = 1 << 6, /* sw interrupt */
X86_BR_IRET = 1 << 7, /* return from interrupt */
X86_BR_JCC = 1 << 8, /* conditional */
X86_BR_JMP = 1 << 9, /* jump */
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
X86_BR_ABORT = 1 << 12,/* transaction abort */
X86_BR_IN_TX = 1 << 13,/* in transaction */
X86_BR_NO_TX = 1 << 14,/* not in transaction */
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
X86_BR_CALL_STACK = 1 << 16,/* call stack */
X86_BR_IND_JMP = 1 << 17,/* indirect jump */
X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
#define X86_BR_ANY \
(X86_BR_CALL |\
X86_BR_RET |\
X86_BR_SYSCALL |\
X86_BR_SYSRET |\
X86_BR_INT |\
X86_BR_IRET |\
X86_BR_JCC |\
X86_BR_JMP |\
X86_BR_IRQ |\
X86_BR_ABORT |\
X86_BR_IND_CALL |\
X86_BR_IND_JMP |\
X86_BR_ZERO_CALL)
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
#define X86_BR_ANY_CALL \
(X86_BR_CALL |\
X86_BR_IND_CALL |\
X86_BR_ZERO_CALL |\
X86_BR_SYSCALL |\
X86_BR_IRQ |\
X86_BR_INT)
int common_branch_type(int type);
int branch_type(unsigned long from, unsigned long to, int abort);
int branch_type_fused(unsigned long from, unsigned long to, int abort,
int *offset);
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
ssize_t intel_event_sysfs_show(char *page, u64 config); ssize_t intel_event_sysfs_show(char *page, u64 config);
...@@ -1232,7 +1301,20 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu) ...@@ -1232,7 +1301,20 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
int amd_pmu_init(void); int amd_pmu_init(void);
int amd_pmu_lbr_init(void);
void amd_pmu_lbr_reset(void);
void amd_pmu_lbr_read(void);
void amd_pmu_lbr_add(struct perf_event *event);
void amd_pmu_lbr_del(struct perf_event *event);
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
void amd_pmu_lbr_enable_all(void);
void amd_pmu_lbr_disable_all(void);
int amd_pmu_lbr_hw_config(struct perf_event *event);
#ifdef CONFIG_PERF_EVENTS_AMD_BRS #ifdef CONFIG_PERF_EVENTS_AMD_BRS
#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
int amd_brs_init(void); int amd_brs_init(void);
void amd_brs_disable(void); void amd_brs_disable(void);
void amd_brs_enable(void); void amd_brs_enable(void);
...@@ -1241,7 +1323,7 @@ void amd_brs_disable_all(void); ...@@ -1241,7 +1323,7 @@ void amd_brs_disable_all(void);
void amd_brs_drain(void); void amd_brs_drain(void);
void amd_brs_lopwr_init(void); void amd_brs_lopwr_init(void);
void amd_brs_disable_all(void); void amd_brs_disable_all(void);
int amd_brs_setup_filter(struct perf_event *event); int amd_brs_hw_config(struct perf_event *event);
void amd_brs_reset(void); void amd_brs_reset(void);
static inline void amd_pmu_brs_add(struct perf_event *event) static inline void amd_pmu_brs_add(struct perf_event *event)
...@@ -1277,7 +1359,7 @@ static inline void amd_brs_enable(void) {} ...@@ -1277,7 +1359,7 @@ static inline void amd_brs_enable(void) {}
static inline void amd_brs_drain(void) {} static inline void amd_brs_drain(void) {}
static inline void amd_brs_lopwr_init(void) {} static inline void amd_brs_lopwr_init(void) {}
static inline void amd_brs_disable_all(void) {} static inline void amd_brs_disable_all(void) {}
static inline int amd_brs_setup_filter(struct perf_event *event) static inline int amd_brs_hw_config(struct perf_event *event)
{ {
return 0; return 0;
} }
......
/*
* struct hw_perf_event.flags flags
*/
PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */
PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */
PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */
PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */
PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */
PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */
PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */
/* 0x00080 */
PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */
PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */
PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */
PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */
PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */
PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */
PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
// SPDX-License-Identifier: GPL-2.0
#include <asm/insn.h>
#include "perf_event.h"
static int decode_branch_type(struct insn *insn)
{
int ext;
if (insn_get_opcode(insn))
return X86_BR_ABORT;
switch (insn->opcode.bytes[0]) {
case 0xf:
switch (insn->opcode.bytes[1]) {
case 0x05: /* syscall */
case 0x34: /* sysenter */
return X86_BR_SYSCALL;
case 0x07: /* sysret */
case 0x35: /* sysexit */
return X86_BR_SYSRET;
case 0x80 ... 0x8f: /* conditional */
return X86_BR_JCC;
}
return X86_BR_NONE;
case 0x70 ... 0x7f: /* conditional */
return X86_BR_JCC;
case 0xc2: /* near ret */
case 0xc3: /* near ret */
case 0xca: /* far ret */
case 0xcb: /* far ret */
return X86_BR_RET;
case 0xcf: /* iret */
return X86_BR_IRET;
case 0xcc ... 0xce: /* int */
return X86_BR_INT;
case 0xe8: /* call near rel */
if (insn_get_immediate(insn) || insn->immediate1.value == 0) {
/* zero length call */
return X86_BR_ZERO_CALL;
}
fallthrough;
case 0x9a: /* call far absolute */
return X86_BR_CALL;
case 0xe0 ... 0xe3: /* loop jmp */
return X86_BR_JCC;
case 0xe9 ... 0xeb: /* jmp */
return X86_BR_JMP;
case 0xff: /* call near absolute, call far absolute ind */
if (insn_get_modrm(insn))
return X86_BR_ABORT;
ext = (insn->modrm.bytes[0] >> 3) & 0x7;
switch (ext) {
case 2: /* near ind call */
case 3: /* far ind call */
return X86_BR_IND_CALL;
case 4:
case 5:
return X86_BR_IND_JMP;
}
return X86_BR_NONE;
}
return X86_BR_NONE;
}
/*
* return the type of control flow change at address "from"
* instruction is not necessarily a branch (in case of interrupt).
*
* The branch type returned also includes the priv level of the
* target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
*
* If a branch type is unknown OR the instruction cannot be
* decoded (e.g., text page not present), then X86_BR_NONE is
* returned.
*
* While recording branches, some processors can report the "from"
* address to be that of an instruction preceding the actual branch
* when instruction fusion occurs. If fusion is expected, attempt to
* find the type of the first branch instruction within the next
* MAX_INSN_SIZE bytes and if found, provide the offset between the
* reported "from" address and the actual branch instruction address.
*/
static int get_branch_type(unsigned long from, unsigned long to, int abort,
bool fused, int *offset)
{
struct insn insn;
void *addr;
int bytes_read, bytes_left, insn_offset;
int ret = X86_BR_NONE;
int to_plm, from_plm;
u8 buf[MAX_INSN_SIZE];
int is64 = 0;
/* make sure we initialize offset */
if (offset)
*offset = 0;
to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
/*
* maybe zero if lbr did not fill up after a reset by the time
* we get a PMU interrupt
*/
if (from == 0 || to == 0)
return X86_BR_NONE;
if (abort)
return X86_BR_ABORT | to_plm;
if (from_plm == X86_BR_USER) {
/*
* can happen if measuring at the user level only
* and we interrupt in a kernel thread, e.g., idle.
*/
if (!current->mm)
return X86_BR_NONE;
/* may fail if text not present */
bytes_left = copy_from_user_nmi(buf, (void __user *)from,
MAX_INSN_SIZE);
bytes_read = MAX_INSN_SIZE - bytes_left;
if (!bytes_read)
return X86_BR_NONE;
addr = buf;
} else {
/*
* The LBR logs any address in the IP, even if the IP just
* faulted. This means userspace can control the from address.
* Ensure we don't blindly read any address by validating it is
* a known text address.
*/
if (kernel_text_address(from)) {
addr = (void *)from;
/*
* Assume we can get the maximum possible size
* when grabbing kernel data. This is not
* _strictly_ true since we could possibly be
* executing up next to a memory hole, but
* it is very unlikely to be a problem.
*/
bytes_read = MAX_INSN_SIZE;
} else {
return X86_BR_NONE;
}
}
/*
* decoder needs to know the ABI especially
* on 64-bit systems running 32-bit apps
*/
#ifdef CONFIG_X86_64
is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
#endif
insn_init(&insn, addr, bytes_read, is64);
ret = decode_branch_type(&insn);
insn_offset = 0;
/* Check for the possibility of branch fusion */
while (fused && ret == X86_BR_NONE) {
/* Check for decoding errors */
if (insn_get_length(&insn) || !insn.length)
break;
insn_offset += insn.length;
bytes_read -= insn.length;
if (bytes_read < 0)
break;
insn_init(&insn, addr + insn_offset, bytes_read, is64);
ret = decode_branch_type(&insn);
}
if (offset)
*offset = insn_offset;
/*
* interrupts, traps, faults (and thus ring transition) may
* occur on any instructions. Thus, to classify them correctly,
* we need to first look at the from and to priv levels. If they
* are different and to is in the kernel, then it indicates
* a ring transition. If the from instruction is not a ring
* transition instr (syscall, systenter, int), then it means
* it was a irq, trap or fault.
*
* we have no way of detecting kernel to kernel faults.
*/
if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
&& ret != X86_BR_SYSCALL && ret != X86_BR_INT)
ret = X86_BR_IRQ;
/*
* branch priv level determined by target as
* is done by HW when LBR_SELECT is implemented
*/
if (ret != X86_BR_NONE)
ret |= to_plm;
return ret;
}
int branch_type(unsigned long from, unsigned long to, int abort)
{
return get_branch_type(from, to, abort, false, NULL);
}
int branch_type_fused(unsigned long from, unsigned long to, int abort,
int *offset)
{
return get_branch_type(from, to, abort, true, offset);
}
#define X86_BR_TYPE_MAP_MAX 16
static int branch_map[X86_BR_TYPE_MAP_MAX] = {
PERF_BR_CALL, /* X86_BR_CALL */
PERF_BR_RET, /* X86_BR_RET */
PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
PERF_BR_SYSRET, /* X86_BR_SYSRET */
PERF_BR_UNKNOWN, /* X86_BR_INT */
PERF_BR_ERET, /* X86_BR_IRET */
PERF_BR_COND, /* X86_BR_JCC */
PERF_BR_UNCOND, /* X86_BR_JMP */
PERF_BR_IRQ, /* X86_BR_IRQ */
PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
PERF_BR_UNKNOWN, /* X86_BR_ABORT */
PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
PERF_BR_NO_TX, /* X86_BR_NO_TX */
PERF_BR_CALL, /* X86_BR_ZERO_CALL */
PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
PERF_BR_IND, /* X86_BR_IND_JMP */
};
int common_branch_type(int type)
{
int i;
type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
if (type) {
i = __ffs(type);
if (i < X86_BR_TYPE_MAP_MAX)
return branch_map[i];
}
return PERF_BR_UNKNOWN;
}
...@@ -6,6 +6,22 @@ ...@@ -6,6 +6,22 @@
#include <asm/msr-index.h> #include <asm/msr-index.h>
/* IBS_OP_DATA2 DataSrc */
#define IBS_DATA_SRC_LOC_CACHE 2
#define IBS_DATA_SRC_DRAM 3
#define IBS_DATA_SRC_REM_CACHE 4
#define IBS_DATA_SRC_IO 7
/* IBS_OP_DATA2 DataSrc Extension */
#define IBS_DATA_SRC_EXT_LOC_CACHE 1
#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2
#define IBS_DATA_SRC_EXT_DRAM 3
#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5
#define IBS_DATA_SRC_EXT_PMEM 6
#define IBS_DATA_SRC_EXT_IO 7
#define IBS_DATA_SRC_EXT_EXT_MEM 8
#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12
/* /*
* IBS Hardware MSRs * IBS Hardware MSRs
*/ */
......
...@@ -96,7 +96,7 @@ ...@@ -96,7 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
/* FREE! ( 3*32+17) */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
......
...@@ -44,10 +44,7 @@ struct arch_hw_breakpoint { ...@@ -44,10 +44,7 @@ struct arch_hw_breakpoint {
/* Total number of available HW breakpoint registers */ /* Total number of available HW breakpoint registers */
#define HBP_NUM 4 #define HBP_NUM 4
static inline int hw_breakpoint_slots(int type) #define hw_breakpoint_slots(type) (HBP_NUM)
{
return HBP_NUM;
}
struct perf_event_attr; struct perf_event_attr;
struct perf_event; struct perf_event;
......
...@@ -590,6 +590,9 @@ ...@@ -590,6 +590,9 @@
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
/* AMD Last Branch Record MSRs */
#define MSR_AMD64_LBR_SELECT 0xc000010e
/* Fam 17h MSRs */ /* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9 #define MSR_F17H_IRPERF 0xc00000e9
...@@ -761,6 +764,8 @@ ...@@ -761,6 +764,8 @@
#define MSR_AMD_DBG_EXTN_CFG 0xc000010f #define MSR_AMD_DBG_EXTN_CFG 0xc000010f
#define MSR_AMD_SAMP_BR_FROM 0xc0010300 #define MSR_AMD_SAMP_BR_FROM 0xc0010300
#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6)
#define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8 #define MSR_IA32_APERF 0x000000e8
......
...@@ -207,7 +207,8 @@ union cpuid_0x80000022_ebx { ...@@ -207,7 +207,8 @@ union cpuid_0x80000022_ebx {
struct { struct {
/* Number of Core Performance Counters */ /* Number of Core Performance Counters */
unsigned int num_core_pmc:4; unsigned int num_core_pmc:4;
unsigned int reserved:6; /* Number of available LBR Stack Entries */
unsigned int lbr_v2_stack_sz:6;
/* Number of Data Fabric Counters */ /* Number of Data Fabric Counters */
unsigned int num_df_pmc:6; unsigned int num_df_pmc:6;
} split; } split;
......
...@@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = { ...@@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 } { 0, 0, 0, 0, 0 }
}; };
......
...@@ -44,7 +44,9 @@ ...@@ -44,7 +44,9 @@
* This allows us to perform the check, i.e, perfmon_capable(), * This allows us to perform the check, i.e, perfmon_capable(),
* in the context of the event owner, once, during the event_init(). * in the context of the event owner, once, during the event_init().
*/ */
#define SPE_PMU_HW_FLAGS_CX BIT(0) #define SPE_PMU_HW_FLAGS_CX 0x00001
static_assert((PERF_EVENT_FLAG_ARCH & SPE_PMU_HW_FLAGS_CX) == SPE_PMU_HW_FLAGS_CX);
static void set_spe_event_has_cx(struct perf_event *event) static void set_spe_event_has_cx(struct perf_event *event)
{ {
......
...@@ -74,12 +74,12 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, ...@@ -74,12 +74,12 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
extern int register_perf_hw_breakpoint(struct perf_event *bp); extern int register_perf_hw_breakpoint(struct perf_event *bp);
extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp);
extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events); extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events);
extern bool hw_breakpoint_is_used(void);
extern int dbg_reserve_bp_slot(struct perf_event *bp); extern int dbg_reserve_bp_slot(struct perf_event *bp);
extern int dbg_release_bp_slot(struct perf_event *bp); extern int dbg_release_bp_slot(struct perf_event *bp);
extern int reserve_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp);
extern void release_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp);
int hw_breakpoint_weight(struct perf_event *bp);
int arch_reserve_bp_slot(struct perf_event *bp); int arch_reserve_bp_slot(struct perf_event *bp);
void arch_release_bp_slot(struct perf_event *bp); void arch_release_bp_slot(struct perf_event *bp);
void arch_unregister_hw_breakpoint(struct perf_event *bp); void arch_unregister_hw_breakpoint(struct perf_event *bp);
...@@ -121,6 +121,8 @@ register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } ...@@ -121,6 +121,8 @@ register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
static inline void unregister_hw_breakpoint(struct perf_event *bp) { } static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
static inline void static inline void
unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { } unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { }
static inline bool hw_breakpoint_is_used(void) { return false; }
static inline int static inline int
reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
static inline void release_bp_slot(struct perf_event *bp) { } static inline void release_bp_slot(struct perf_event *bp) { }
......
...@@ -121,9 +121,15 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) ...@@ -121,9 +121,15 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
preempt_enable(); preempt_enable();
} }
extern bool percpu_is_read_locked(struct percpu_rw_semaphore *);
extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *);
extern void percpu_up_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *);
static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem)
{
return atomic_read(&sem->block);
}
extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
const char *, struct lock_class_key *); const char *, struct lock_class_key *);
......
...@@ -24,10 +24,11 @@ ...@@ -24,10 +24,11 @@
/* /*
* ARM PMU hw_event flags * ARM PMU hw_event flags
*/ */
/* Event uses a 64bit counter */ #define ARMPMU_EVT_64BIT 0x00001 /* Event uses a 64bit counter */
#define ARMPMU_EVT_64BIT 1 #define ARMPMU_EVT_47BIT 0x00002 /* Event uses a 47bit counter */
/* Event uses a 47bit counter */
#define ARMPMU_EVT_47BIT 2 static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT);
static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT);
#define HW_OP_UNSUPPORTED 0xFFFF #define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) PERF_COUNT_HW_CACHE_##_x #define C(_x) PERF_COUNT_HW_CACHE_##_x
......
...@@ -36,6 +36,7 @@ struct perf_guest_info_callbacks { ...@@ -36,6 +36,7 @@ struct perf_guest_info_callbacks {
}; };
#ifdef CONFIG_HAVE_HW_BREAKPOINT #ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <linux/rhashtable-types.h>
#include <asm/hw_breakpoint.h> #include <asm/hw_breakpoint.h>
#endif #endif
...@@ -60,6 +61,7 @@ struct perf_guest_info_callbacks { ...@@ -60,6 +61,7 @@ struct perf_guest_info_callbacks {
#include <linux/refcount.h> #include <linux/refcount.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/static_call.h> #include <linux/static_call.h>
#include <linux/lockdep.h>
#include <asm/local.h> #include <asm/local.h>
struct perf_callchain_entry { struct perf_callchain_entry {
...@@ -137,9 +139,11 @@ struct hw_perf_event_extra { ...@@ -137,9 +139,11 @@ struct hw_perf_event_extra {
* PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
* usage. * usage.
*/ */
#define PERF_EVENT_FLAG_ARCH 0x0000ffff #define PERF_EVENT_FLAG_ARCH 0x000fffff
#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 #define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
/** /**
* struct hw_perf_event - performance event hardware details: * struct hw_perf_event - performance event hardware details:
*/ */
...@@ -178,7 +182,7 @@ struct hw_perf_event { ...@@ -178,7 +182,7 @@ struct hw_perf_event {
* creation and event initalization. * creation and event initalization.
*/ */
struct arch_hw_breakpoint info; struct arch_hw_breakpoint info;
struct list_head bp_list; struct rhlist_head bp_list;
}; };
#endif #endif
struct { /* amd_iommu */ struct { /* amd_iommu */
...@@ -631,7 +635,23 @@ struct pmu_event_list { ...@@ -631,7 +635,23 @@ struct pmu_event_list {
struct list_head list; struct list_head list;
}; };
/*
* event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
* as such iteration must hold either lock. However, since ctx->lock is an IRQ
* safe lock, and is only held by the CPU doing the modification, having IRQs
* disabled is sufficient since it will hold-off the IPIs.
*/
#ifdef CONFIG_PROVE_LOCKING
#define lockdep_assert_event_ctx(event) \
WARN_ON_ONCE(__lockdep_enabled && \
(this_cpu_read(hardirqs_enabled) && \
lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
#else
#define lockdep_assert_event_ctx(event)
#endif
#define for_each_sibling_event(sibling, event) \ #define for_each_sibling_event(sibling, event) \
lockdep_assert_event_ctx(event); \
if ((event)->group_leader == (event)) \ if ((event)->group_leader == (event)) \
list_for_each_entry((sibling), &(event)->sibling_list, sibling_list) list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
...@@ -1007,18 +1027,20 @@ struct perf_sample_data { ...@@ -1007,18 +1027,20 @@ struct perf_sample_data {
* Fields set by perf_sample_data_init(), group so as to * Fields set by perf_sample_data_init(), group so as to
* minimize the cachelines touched. * minimize the cachelines touched.
*/ */
u64 addr; u64 sample_flags;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
u64 period; u64 period;
union perf_sample_weight weight;
u64 txn;
union perf_mem_data_src data_src;
/* /*
* The other fields, optionally {set,used} by * The other fields, optionally {set,used} by
* perf_{prepare,output}_sample(). * perf_{prepare,output}_sample().
*/ */
struct perf_branch_stack *br_stack;
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
u64 addr;
struct perf_raw_record *raw;
u64 type; u64 type;
u64 ip; u64 ip;
struct { struct {
...@@ -1056,13 +1078,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, ...@@ -1056,13 +1078,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
u64 addr, u64 period) u64 addr, u64 period)
{ {
/* remaining struct members initialized in perf_prepare_sample() */ /* remaining struct members initialized in perf_prepare_sample() */
data->addr = addr; data->sample_flags = PERF_SAMPLE_PERIOD;
data->raw = NULL;
data->br_stack = NULL;
data->period = period; data->period = period;
data->weight.full = 0;
data->data_src.val = PERF_MEM_NA; if (addr) {
data->txn = 0; data->addr = addr;
data->sample_flags |= PERF_SAMPLE_ADDR;
}
} }
/* /*
...@@ -1078,6 +1100,7 @@ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *b ...@@ -1078,6 +1100,7 @@ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *b
br->abort = 0; br->abort = 0;
br->cycles = 0; br->cycles = 0;
br->type = 0; br->type = 0;
br->spec = PERF_BR_SPEC_NA;
br->reserved = 0; br->reserved = 0;
} }
...@@ -1684,4 +1707,30 @@ static inline void perf_lopwr_cb(bool mode) ...@@ -1684,4 +1707,30 @@ static inline void perf_lopwr_cb(bool mode)
} }
#endif #endif
#ifdef CONFIG_PERF_EVENTS
static inline bool branch_sample_no_flags(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
}
static inline bool branch_sample_no_cycles(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
}
static inline bool branch_sample_type(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
}
static inline bool branch_sample_hw_index(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
}
static inline bool branch_sample_priv(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
}
#endif /* CONFIG_PERF_EVENTS */
#endif /* _LINUX_PERF_EVENT_H */ #endif /* _LINUX_PERF_EVENT_H */
...@@ -164,8 +164,6 @@ enum perf_event_sample_format { ...@@ -164,8 +164,6 @@ enum perf_event_sample_format {
PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24,
PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
}; };
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
...@@ -204,6 +202,8 @@ enum perf_branch_sample_type_shift { ...@@ -204,6 +202,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */
PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
}; };
...@@ -233,6 +233,8 @@ enum perf_branch_sample_type { ...@@ -233,6 +233,8 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
}; };
...@@ -253,9 +255,48 @@ enum { ...@@ -253,9 +255,48 @@ enum {
PERF_BR_COND_RET = 10, /* conditional function return */ PERF_BR_COND_RET = 10, /* conditional function return */
PERF_BR_ERET = 11, /* exception return */ PERF_BR_ERET = 11, /* exception return */
PERF_BR_IRQ = 12, /* irq */ PERF_BR_IRQ = 12, /* irq */
PERF_BR_SERROR = 13, /* system error */
PERF_BR_NO_TX = 14, /* not in transaction */
PERF_BR_EXTEND_ABI = 15, /* extend ABI */
PERF_BR_MAX, PERF_BR_MAX,
}; };
/*
* Common branch speculation outcome classification
*/
enum {
PERF_BR_SPEC_NA = 0, /* Not available */
PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
PERF_BR_SPEC_MAX,
};
enum {
PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
PERF_BR_NEW_MAX,
};
enum {
PERF_BR_PRIV_UNKNOWN = 0,
PERF_BR_PRIV_USER = 1,
PERF_BR_PRIV_KERNEL = 2,
PERF_BR_PRIV_HV = 3,
};
#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
#define PERF_SAMPLE_BRANCH_PLM_ALL \ #define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\ (PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\ PERF_SAMPLE_BRANCH_KERNEL|\
...@@ -1295,7 +1336,9 @@ union perf_mem_data_src { ...@@ -1295,7 +1336,9 @@ union perf_mem_data_src {
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
/* 5-0xa available */ /* 5-0x8 available */
#define PERF_MEM_LVLNUM_EXTN_MEM 0x09 /* Extension memory */
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
...@@ -1313,7 +1356,7 @@ union perf_mem_data_src { ...@@ -1313,7 +1356,7 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOP_SHIFT 19
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */ #define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
#define PERF_MEM_SNOOPX_SHIFT 38 #define PERF_MEM_SNOOPX_SHIFT 38
/* locked instruction */ /* locked instruction */
...@@ -1363,6 +1406,7 @@ union perf_mem_data_src { ...@@ -1363,6 +1406,7 @@ union perf_mem_data_src {
* abort: aborting a hardware transaction * abort: aborting a hardware transaction
* cycles: cycles from last branch (or 0 if not supported) * cycles: cycles from last branch (or 0 if not supported)
* type: branch type * type: branch type
* spec: branch speculation info (or 0 if not supported)
*/ */
struct perf_branch_entry { struct perf_branch_entry {
__u64 from; __u64 from;
...@@ -1373,7 +1417,10 @@ struct perf_branch_entry { ...@@ -1373,7 +1417,10 @@ struct perf_branch_entry {
abort:1, /* transaction abort */ abort:1, /* transaction abort */
cycles:16, /* cycle count to last branch */ cycles:16, /* cycle count to last branch */
type:4, /* branch type */ type:4, /* branch type */
reserved:40; spec:2, /* branch speculation info */
new_type:4, /* additional branch type */
priv:3, /* privilege level */
reserved:31;
}; };
union perf_sample_weight { union perf_sample_weight {
......
...@@ -338,7 +338,7 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, ...@@ -338,7 +338,7 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
int ret; int ret;
/* perf_sample_data doesn't have callchain, use bpf_get_stackid */ /* perf_sample_data doesn't have callchain, use bpf_get_stackid */
if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
return bpf_get_stackid((unsigned long)(ctx->regs), return bpf_get_stackid((unsigned long)(ctx->regs),
(unsigned long) map, flags, 0, 0); (unsigned long) map, flags, 0, 0);
...@@ -506,7 +506,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, ...@@ -506,7 +506,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
int err = -EINVAL; int err = -EINVAL;
__u64 nr_kernel; __u64 nr_kernel;
if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
......
...@@ -2,4 +2,5 @@ ...@@ -2,4 +2,5 @@
obj-y := core.o ring_buffer.o callchain.o obj-y := core.o ring_buffer.o callchain.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_HW_BREAKPOINT_KUNIT_TEST) += hw_breakpoint_test.o
obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_UPROBES) += uprobes.o
...@@ -1468,6 +1468,8 @@ static void __update_context_time(struct perf_event_context *ctx, bool adv) ...@@ -1468,6 +1468,8 @@ static void __update_context_time(struct perf_event_context *ctx, bool adv)
{ {
u64 now = perf_clock(); u64 now = perf_clock();
lockdep_assert_held(&ctx->lock);
if (adv) if (adv)
ctx->time += now - ctx->timestamp; ctx->time += now - ctx->timestamp;
ctx->timestamp = now; ctx->timestamp = now;
...@@ -2224,16 +2226,22 @@ static inline int __pmu_filter_match(struct perf_event *event) ...@@ -2224,16 +2226,22 @@ static inline int __pmu_filter_match(struct perf_event *event)
static inline int pmu_filter_match(struct perf_event *event) static inline int pmu_filter_match(struct perf_event *event)
{ {
struct perf_event *sibling; struct perf_event *sibling;
unsigned long flags;
int ret = 1;
if (!__pmu_filter_match(event)) if (!__pmu_filter_match(event))
return 0; return 0;
local_irq_save(flags);
for_each_sibling_event(sibling, event) { for_each_sibling_event(sibling, event) {
if (!__pmu_filter_match(sibling)) if (!__pmu_filter_match(sibling)) {
return 0; ret = 0;
break;
}
} }
local_irq_restore(flags);
return 1; return ret;
} }
static inline int static inline int
...@@ -6794,11 +6802,10 @@ static void perf_aux_sample_output(struct perf_event *event, ...@@ -6794,11 +6802,10 @@ static void perf_aux_sample_output(struct perf_event *event,
static void __perf_event_header__init_id(struct perf_event_header *header, static void __perf_event_header__init_id(struct perf_event_header *header,
struct perf_sample_data *data, struct perf_sample_data *data,
struct perf_event *event) struct perf_event *event,
u64 sample_type)
{ {
u64 sample_type = event->attr.sample_type; data->type = event->attr.sample_type;
data->type = sample_type;
header->size += event->id_header_size; header->size += event->id_header_size;
if (sample_type & PERF_SAMPLE_TID) { if (sample_type & PERF_SAMPLE_TID) {
...@@ -6827,7 +6834,7 @@ void perf_event_header__init_id(struct perf_event_header *header, ...@@ -6827,7 +6834,7 @@ void perf_event_header__init_id(struct perf_event_header *header,
struct perf_event *event) struct perf_event *event)
{ {
if (event->attr.sample_id_all) if (event->attr.sample_id_all)
__perf_event_header__init_id(header, data, event); __perf_event_header__init_id(header, data, event, event->attr.sample_type);
} }
static void __perf_event__output_id_sample(struct perf_output_handle *handle, static void __perf_event__output_id_sample(struct perf_output_handle *handle,
...@@ -6976,11 +6983,6 @@ static void perf_output_read(struct perf_output_handle *handle, ...@@ -6976,11 +6983,6 @@ static void perf_output_read(struct perf_output_handle *handle,
perf_output_read_one(handle, event, enabled, running); perf_output_read_one(handle, event, enabled, running);
} }
static inline bool perf_sample_save_hw_index(struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
}
void perf_output_sample(struct perf_output_handle *handle, void perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header, struct perf_event_header *header,
struct perf_sample_data *data, struct perf_sample_data *data,
...@@ -7062,14 +7064,14 @@ void perf_output_sample(struct perf_output_handle *handle, ...@@ -7062,14 +7064,14 @@ void perf_output_sample(struct perf_output_handle *handle,
} }
if (sample_type & PERF_SAMPLE_BRANCH_STACK) { if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
if (data->br_stack) { if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
size_t size; size_t size;
size = data->br_stack->nr size = data->br_stack->nr
* sizeof(struct perf_branch_entry); * sizeof(struct perf_branch_entry);
perf_output_put(handle, data->br_stack->nr); perf_output_put(handle, data->br_stack->nr);
if (perf_sample_save_hw_index(event)) if (branch_sample_hw_index(event))
perf_output_put(handle, data->br_stack->hw_idx); perf_output_put(handle, data->br_stack->hw_idx);
perf_output_copy(handle, data->br_stack->entries, size); perf_output_copy(handle, data->br_stack->entries, size);
} else { } else {
...@@ -7312,6 +7314,7 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -7312,6 +7314,7 @@ void perf_prepare_sample(struct perf_event_header *header,
struct pt_regs *regs) struct pt_regs *regs)
{ {
u64 sample_type = event->attr.sample_type; u64 sample_type = event->attr.sample_type;
u64 filtered_sample_type;
header->type = PERF_RECORD_SAMPLE; header->type = PERF_RECORD_SAMPLE;
header->size = sizeof(*header) + event->header_size; header->size = sizeof(*header) + event->header_size;
...@@ -7319,7 +7322,12 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -7319,7 +7322,12 @@ void perf_prepare_sample(struct perf_event_header *header,
header->misc = 0; header->misc = 0;
header->misc |= perf_misc_flags(regs); header->misc |= perf_misc_flags(regs);
__perf_event_header__init_id(header, data, event); /*
* Clear the sample flags that have already been done by the
* PMU driver.
*/
filtered_sample_type = sample_type & ~data->sample_flags;
__perf_event_header__init_id(header, data, event, filtered_sample_type);
if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE)) if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
data->ip = perf_instruction_pointer(regs); data->ip = perf_instruction_pointer(regs);
...@@ -7327,7 +7335,7 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -7327,7 +7335,7 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_CALLCHAIN) { if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1; int size = 1;
if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
data->callchain = perf_callchain(event, regs); data->callchain = perf_callchain(event, regs);
size += data->callchain->nr; size += data->callchain->nr;
...@@ -7339,7 +7347,7 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -7339,7 +7347,7 @@ void perf_prepare_sample(struct perf_event_header *header,
struct perf_raw_record *raw = data->raw; struct perf_raw_record *raw = data->raw;
int size; int size;
if (raw) { if (raw && (data->sample_flags & PERF_SAMPLE_RAW)) {
struct perf_raw_frag *frag = &raw->frag; struct perf_raw_frag *frag = &raw->frag;
u32 sum = 0; u32 sum = 0;
...@@ -7355,6 +7363,7 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -7355,6 +7363,7 @@ void perf_prepare_sample(struct perf_event_header *header,
frag->pad = raw->size - sum; frag->pad = raw->size - sum;
} else { } else {
size = sizeof(u64); size = sizeof(u64);
data->raw = NULL;
} }
header->size += size; header->size += size;
...@@ -7362,8 +7371,8 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -7362,8 +7371,8 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_BRANCH_STACK) { if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
int size = sizeof(u64); /* nr */ int size = sizeof(u64); /* nr */
if (data->br_stack) { if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
if (perf_sample_save_hw_index(event)) if (branch_sample_hw_index(event))
size += sizeof(u64); size += sizeof(u64);
size += data->br_stack->nr size += data->br_stack->nr
...@@ -7412,6 +7421,20 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -7412,6 +7421,20 @@ void perf_prepare_sample(struct perf_event_header *header,
header->size += size; header->size += size;
} }
if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
data->weight.full = 0;
if (filtered_sample_type & PERF_SAMPLE_DATA_SRC)
data->data_src.val = PERF_MEM_NA;
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
data->txn = 0;
if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
if (filtered_sample_type & PERF_SAMPLE_ADDR)
data->addr = 0;
}
if (sample_type & PERF_SAMPLE_REGS_INTR) { if (sample_type & PERF_SAMPLE_REGS_INTR) {
/* regs dump ABI info */ /* regs dump ABI info */
int size = sizeof(u64); int size = sizeof(u64);
...@@ -7427,7 +7450,8 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -7427,7 +7450,8 @@ void perf_prepare_sample(struct perf_event_header *header,
header->size += size; header->size += size;
} }
if (sample_type & PERF_SAMPLE_PHYS_ADDR) if (sample_type & PERF_SAMPLE_PHYS_ADDR &&
filtered_sample_type & PERF_SAMPLE_PHYS_ADDR)
data->phys_addr = perf_virt_to_phys(data->addr); data->phys_addr = perf_virt_to_phys(data->addr);
#ifdef CONFIG_CGROUP_PERF #ifdef CONFIG_CGROUP_PERF
...@@ -9998,8 +10022,16 @@ static void bpf_overflow_handler(struct perf_event *event, ...@@ -9998,8 +10022,16 @@ static void bpf_overflow_handler(struct perf_event *event,
goto out; goto out;
rcu_read_lock(); rcu_read_lock();
prog = READ_ONCE(event->prog); prog = READ_ONCE(event->prog);
if (prog) if (prog) {
if (prog->call_get_stack &&
(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
!(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) {
data->callchain = perf_callchain(event, regs);
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
ret = bpf_prog_run(prog, &ctx); ret = bpf_prog_run(prog, &ctx);
}
rcu_read_unlock(); rcu_read_unlock();
out: out:
__this_cpu_dec(bpf_prog_active); __this_cpu_dec(bpf_prog_active);
...@@ -10025,7 +10057,7 @@ static int perf_event_set_bpf_handler(struct perf_event *event, ...@@ -10025,7 +10057,7 @@ static int perf_event_set_bpf_handler(struct perf_event *event,
if (event->attr.precise_ip && if (event->attr.precise_ip &&
prog->call_get_stack && prog->call_get_stack &&
(!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) || (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) ||
event->attr.exclude_callchain_kernel || event->attr.exclude_callchain_kernel ||
event->attr.exclude_callchain_user)) { event->attr.exclude_callchain_user)) {
/* /*
...@@ -10942,7 +10974,7 @@ static ssize_t nr_addr_filters_show(struct device *dev, ...@@ -10942,7 +10974,7 @@ static ssize_t nr_addr_filters_show(struct device *dev,
{ {
struct pmu *pmu = dev_get_drvdata(dev); struct pmu *pmu = dev_get_drvdata(dev);
return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters); return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
} }
DEVICE_ATTR_RO(nr_addr_filters); DEVICE_ATTR_RO(nr_addr_filters);
...@@ -10953,7 +10985,7 @@ type_show(struct device *dev, struct device_attribute *attr, char *page) ...@@ -10953,7 +10985,7 @@ type_show(struct device *dev, struct device_attribute *attr, char *page)
{ {
struct pmu *pmu = dev_get_drvdata(dev); struct pmu *pmu = dev_get_drvdata(dev);
return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->type);
} }
static DEVICE_ATTR_RO(type); static DEVICE_ATTR_RO(type);
...@@ -10964,7 +10996,7 @@ perf_event_mux_interval_ms_show(struct device *dev, ...@@ -10964,7 +10996,7 @@ perf_event_mux_interval_ms_show(struct device *dev,
{ {
struct pmu *pmu = dev_get_drvdata(dev); struct pmu *pmu = dev_get_drvdata(dev);
return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms); return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->hrtimer_interval_ms);
} }
static DEFINE_MUTEX(mux_interval_mutex); static DEFINE_MUTEX(mux_interval_mutex);
...@@ -11718,11 +11750,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, ...@@ -11718,11 +11750,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->destroy(event); event->destroy(event);
module_put(pmu->module); module_put(pmu->module);
err_ns: err_ns:
if (event->ns)
put_pid_ns(event->ns);
if (event->hw.target) if (event->hw.target)
put_task_struct(event->hw.target); put_task_struct(event->hw.target);
kmem_cache_free(perf_event_cache, event); call_rcu(&event->rcu_head, free_event_rcu);
return ERR_PTR(err); return ERR_PTR(err);
} }
......
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/*
* KUnit test for hw_breakpoint constraints accounting logic.
*
* Copyright (C) 2022, Google LLC.
*/
#include <kunit/test.h>
#include <linux/cpumask.h>
#include <linux/hw_breakpoint.h>
#include <linux/kthread.h>
#include <linux/perf_event.h>
#include <asm/hw_breakpoint.h>
#define TEST_REQUIRES_BP_SLOTS(test, slots) \
do { \
if ((slots) > get_test_bp_slots()) { \
kunit_skip((test), "Requires breakpoint slots: %d > %d", slots, \
get_test_bp_slots()); \
} \
} while (0)
#define TEST_EXPECT_NOSPC(expr) KUNIT_EXPECT_EQ(test, -ENOSPC, PTR_ERR(expr))
#define MAX_TEST_BREAKPOINTS 512
static char break_vars[MAX_TEST_BREAKPOINTS];
static struct perf_event *test_bps[MAX_TEST_BREAKPOINTS];
static struct task_struct *__other_task;
static struct perf_event *register_test_bp(int cpu, struct task_struct *tsk, int idx)
{
struct perf_event_attr attr = {};
if (WARN_ON(idx < 0 || idx >= MAX_TEST_BREAKPOINTS))
return NULL;
hw_breakpoint_init(&attr);
attr.bp_addr = (unsigned long)&break_vars[idx];
attr.bp_len = HW_BREAKPOINT_LEN_1;
attr.bp_type = HW_BREAKPOINT_RW;
return perf_event_create_kernel_counter(&attr, cpu, tsk, NULL, NULL);
}
static void unregister_test_bp(struct perf_event **bp)
{
if (WARN_ON(IS_ERR(*bp)))
return;
if (WARN_ON(!*bp))
return;
unregister_hw_breakpoint(*bp);
*bp = NULL;
}
static int get_test_bp_slots(void)
{
static int slots;
if (!slots)
slots = hw_breakpoint_slots(TYPE_DATA);
return slots;
}
static void fill_one_bp_slot(struct kunit *test, int *id, int cpu, struct task_struct *tsk)
{
struct perf_event *bp = register_test_bp(cpu, tsk, *id);
KUNIT_ASSERT_NOT_NULL(test, bp);
KUNIT_ASSERT_FALSE(test, IS_ERR(bp));
KUNIT_ASSERT_NULL(test, test_bps[*id]);
test_bps[(*id)++] = bp;
}
/*
* Fills up the given @cpu/@tsk with breakpoints, only leaving @skip slots free.
*
* Returns true if this can be called again, continuing at @id.
*/
static bool fill_bp_slots(struct kunit *test, int *id, int cpu, struct task_struct *tsk, int skip)
{
for (int i = 0; i < get_test_bp_slots() - skip; ++i)
fill_one_bp_slot(test, id, cpu, tsk);
return *id + get_test_bp_slots() <= MAX_TEST_BREAKPOINTS;
}
static int dummy_kthread(void *arg)
{
return 0;
}
static struct task_struct *get_other_task(struct kunit *test)
{
struct task_struct *tsk;
if (__other_task)
return __other_task;
tsk = kthread_create(dummy_kthread, NULL, "hw_breakpoint_dummy_task");
KUNIT_ASSERT_FALSE(test, IS_ERR(tsk));
__other_task = tsk;
return __other_task;
}
static int get_test_cpu(int num)
{
int cpu;
WARN_ON(num < 0);
for_each_online_cpu(cpu) {
if (num-- <= 0)
break;
}
return cpu;
}
/* ===== Test cases ===== */
static void test_one_cpu(struct kunit *test)
{
int idx = 0;
fill_bp_slots(test, &idx, get_test_cpu(0), NULL, 0);
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
}
static void test_many_cpus(struct kunit *test)
{
int idx = 0;
int cpu;
/* Test that CPUs are independent. */
for_each_online_cpu(cpu) {
bool do_continue = fill_bp_slots(test, &idx, cpu, NULL, 0);
TEST_EXPECT_NOSPC(register_test_bp(cpu, NULL, idx));
if (!do_continue)
break;
}
}
static void test_one_task_on_all_cpus(struct kunit *test)
{
int idx = 0;
fill_bp_slots(test, &idx, -1, current, 0);
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
/* Remove one and adding back CPU-target should work. */
unregister_test_bp(&test_bps[0]);
fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
}
static void test_two_tasks_on_all_cpus(struct kunit *test)
{
int idx = 0;
/* Test that tasks are independent. */
fill_bp_slots(test, &idx, -1, current, 0);
fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
/* Remove one from first task and adding back CPU-target should not work. */
unregister_test_bp(&test_bps[0]);
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
}
static void test_one_task_on_one_cpu(struct kunit *test)
{
int idx = 0;
fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
/*
* Remove one and adding back CPU-target should work; this case is
* special vs. above because the task's constraints are CPU-dependent.
*/
unregister_test_bp(&test_bps[0]);
fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
}
static void test_one_task_mixed(struct kunit *test)
{
int idx = 0;
TEST_REQUIRES_BP_SLOTS(test, 3);
fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
fill_bp_slots(test, &idx, -1, current, 1);
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
/* Transition from CPU-dependent pinned count to CPU-independent. */
unregister_test_bp(&test_bps[0]);
unregister_test_bp(&test_bps[1]);
fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
}
static void test_two_tasks_on_one_cpu(struct kunit *test)
{
int idx = 0;
fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
fill_bp_slots(test, &idx, get_test_cpu(0), get_other_task(test), 0);
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
/* Can still create breakpoints on some other CPU. */
fill_bp_slots(test, &idx, get_test_cpu(1), NULL, 0);
}
static void test_two_tasks_on_one_all_cpus(struct kunit *test)
{
int idx = 0;
fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
/* Cannot create breakpoints on some other CPU either. */
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
}
static void test_task_on_all_and_one_cpu(struct kunit *test)
{
int tsk_on_cpu_idx, cpu_idx;
int idx = 0;
TEST_REQUIRES_BP_SLOTS(test, 3);
fill_bp_slots(test, &idx, -1, current, 2);
/* Transitioning from only all CPU breakpoints to mixed. */
tsk_on_cpu_idx = idx;
fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
fill_one_bp_slot(test, &idx, -1, current);
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
/* We should still be able to use up another CPU's slots. */
cpu_idx = idx;
fill_one_bp_slot(test, &idx, get_test_cpu(1), NULL);
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
/* Transitioning back to task target on all CPUs. */
unregister_test_bp(&test_bps[tsk_on_cpu_idx]);
/* Still have a CPU target breakpoint in get_test_cpu(1). */
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
/* Remove it and try again. */
unregister_test_bp(&test_bps[cpu_idx]);
fill_one_bp_slot(test, &idx, -1, current);
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
}
static struct kunit_case hw_breakpoint_test_cases[] = {
KUNIT_CASE(test_one_cpu),
KUNIT_CASE(test_many_cpus),
KUNIT_CASE(test_one_task_on_all_cpus),
KUNIT_CASE(test_two_tasks_on_all_cpus),
KUNIT_CASE(test_one_task_on_one_cpu),
KUNIT_CASE(test_one_task_mixed),
KUNIT_CASE(test_two_tasks_on_one_cpu),
KUNIT_CASE(test_two_tasks_on_one_all_cpus),
KUNIT_CASE(test_task_on_all_and_one_cpu),
{},
};
static int test_init(struct kunit *test)
{
/* Most test cases want 2 distinct CPUs. */
if (num_online_cpus() < 2)
return -EINVAL;
/* Want the system to not use breakpoints elsewhere. */
if (hw_breakpoint_is_used())
return -EBUSY;
return 0;
}
static void test_exit(struct kunit *test)
{
for (int i = 0; i < MAX_TEST_BREAKPOINTS; ++i) {
if (test_bps[i])
unregister_test_bp(&test_bps[i]);
}
if (__other_task) {
kthread_stop(__other_task);
__other_task = NULL;
}
/* Verify that internal state agrees that no breakpoints are in use. */
KUNIT_EXPECT_FALSE(test, hw_breakpoint_is_used());
}
static struct kunit_suite hw_breakpoint_test_suite = {
.name = "hw_breakpoint",
.test_cases = hw_breakpoint_test_cases,
.init = test_init,
.exit = test_exit,
};
kunit_test_suites(&hw_breakpoint_test_suite);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marco Elver <elver@google.com>");
...@@ -192,6 +192,12 @@ EXPORT_SYMBOL_GPL(__percpu_down_read); ...@@ -192,6 +192,12 @@ EXPORT_SYMBOL_GPL(__percpu_down_read);
__sum; \ __sum; \
}) })
bool percpu_is_read_locked(struct percpu_rw_semaphore *sem)
{
return per_cpu_sum(*sem->read_count) != 0 && !atomic_read(&sem->block);
}
EXPORT_SYMBOL_GPL(percpu_is_read_locked);
/* /*
* Return true if the modular sum of the sem->read_count per-CPU variable is * Return true if the modular sum of the sem->read_count per-CPU variable is
* zero. If this sum is zero, then it is stable due to the fact that if any * zero. If this sum is zero, then it is stable due to the fact that if any
......
...@@ -1706,6 +1706,9 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, ...@@ -1706,6 +1706,9 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE)) if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE))
return -EINVAL; return -EINVAL;
if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK)))
return -ENOENT;
if (unlikely(!br_stack)) if (unlikely(!br_stack))
return -ENOENT; return -ENOENT;
......
...@@ -2556,6 +2556,16 @@ config FORTIFY_KUNIT_TEST ...@@ -2556,6 +2556,16 @@ config FORTIFY_KUNIT_TEST
by the str*() and mem*() family of functions. For testing runtime by the str*() and mem*() family of functions. For testing runtime
traps of FORTIFY_SOURCE, see LKDTM's "FORTIFY_*" tests. traps of FORTIFY_SOURCE, see LKDTM's "FORTIFY_*" tests.
config HW_BREAKPOINT_KUNIT_TEST
bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS
depends on HAVE_HW_BREAKPOINT
depends on KUNIT=y
default KUNIT_ALL_TESTS
help
Tests for hw_breakpoint constraints accounting.
If unsure, say N.
config TEST_UDELAY config TEST_UDELAY
tristate "udelay test driver" tristate "udelay test driver"
help help
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment