Commit 42dec9a9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf event updates from Ingo Molnar:

 - Improve Intel uncore PMU support:

     - Parse uncore 'discovery tables' - a new hardware capability
       enumeration method introduced on the latest Intel platforms. This
       table is in a well-defined PCI namespace location and is read via
       MMIO. It is organized in an rbtree.

       These uncore tables will allow the discovery of standard counter
       blocks, but fancier counters still need to be enumerated
       explicitly.

     - Add Alder Lake support

     - Improve IIO stacks to PMON mapping support on Skylake servers

 - Add Intel Alder Lake PMU support - which requires the introduction of
   'hybrid' CPUs and PMUs. Alder Lake is a mix of Golden Cove ('big')
   and Gracemont ('small' - Atom derived) cores.

   The CPU-side feature set is entirely symmetrical - but on the PMU
   side there's core type dependent PMU functionality.

 - Reduce data loss with CPU level hardware tracing on Intel PT / AUX
   profiling, by fixing the AUX allocation watermark logic.

 - Improve ring buffer allocation on NUMA systems

 - Put 'struct perf_event' into their separate kmem_cache pool

 - Add support for synchronous signals for select perf events. The
   immediate motivation is to support low-overhead sampling-based race
   detection for user-space code. The feature consists of the following
   main changes:

     - Add thread-only event inheritance via
       perf_event_attr::inherit_thread, which limits inheritance of
       events to CLONE_THREAD.

     - Add the ability for events to not leak through exec(), via
       perf_event_attr::remove_on_exec.

     - Allow the generation of SIGTRAP via perf_event_attr::sigtrap,
       extend siginfo with an u64 ::si_perf, and add the breakpoint
       information to ::si_addr and ::si_perf if the event is
       PERF_TYPE_BREAKPOINT.

   The siginfo support is adequate for breakpoints right now - but the
   new field can be used to introduce support for other types of
   metadata passed over siginfo as well.

 - Misc fixes, cleanups and smaller updates.

* tag 'perf-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (53 commits)
  signal, perf: Add missing TRAP_PERF case in siginfo_layout()
  signal, perf: Fix siginfo_t by avoiding u64 on 32-bit architectures
  perf/x86: Allow for 8<num_fixed_counters<16
  perf/x86/rapl: Add support for Intel Alder Lake
  perf/x86/cstate: Add Alder Lake CPU support
  perf/x86/msr: Add Alder Lake CPU support
  perf/x86/intel/uncore: Add Alder Lake support
  perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
  perf/x86/intel: Add Alder Lake Hybrid support
  perf/x86: Support filter_match callback
  perf/x86/intel: Add attr_update for Hybrid PMUs
  perf/x86: Add structures for the attributes of Hybrid PMUs
  perf/x86: Register hybrid PMUs
  perf/x86: Factor out x86_pmu_show_pmu_cap
  perf/x86: Remove temporary pmu assignment in event_init
  perf/x86/intel: Factor out intel_pmu_check_extra_regs
  perf/x86/intel: Factor out intel_pmu_check_event_constraints
  perf/x86/intel: Factor out intel_pmu_check_num_counters
  perf/x86: Hybrid PMU support for extra_regs
  perf/x86: Hybrid PMU support for event constraints
  ...
parents 03b2cd72 ed8e5080
......@@ -622,6 +622,9 @@ static inline void siginfo_build_tests(void)
/* _sigfault._addr_pkey */
BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12);
/* _sigfault._perf */
BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x10);
/* _sigpoll */
BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c);
BUILD_BUG_ON(offsetof(siginfo_t, si_fd) != 0x10);
......
......@@ -81,12 +81,12 @@ static struct attribute_group amd_iommu_events_group = {
};
struct amd_iommu_event_desc {
struct kobj_attribute attr;
struct device_attribute attr;
const char *event;
};
static ssize_t _iommu_event_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
static ssize_t _iommu_event_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct amd_iommu_event_desc *event =
container_of(attr, struct amd_iommu_event_desc, attr);
......
......@@ -275,14 +275,14 @@ static struct attribute_group amd_uncore_attr_group = {
};
#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
static ssize_t __uncore_##_var##_show(struct device *dev, \
struct device_attribute *attr, \
char *page) \
{ \
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
return sprintf(page, _format "\n"); \
} \
static struct kobj_attribute format_attr_##_var = \
static struct device_attribute format_attr_##_var = \
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
......
This diff is collapsed.
......@@ -3,6 +3,6 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o
obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o uncore_discovery.o
obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
intel-cstate-objs := cstate.o
This diff is collapsed.
......@@ -40,7 +40,7 @@
* Model specific counters:
* MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00
* Available model: SLM,AMT,GLM,CNL,TNT
* Available model: SLM,AMT,GLM,CNL,TNT,ADL
* Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
......@@ -51,46 +51,49 @@
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
* TNT,RKL
* TNT,RKL,ADL
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
* ICL,TGL,RKL
* ICL,TGL,RKL,ADL
* Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
* KBL,CML,ICL,TGL,TNT,RKL
* KBL,CML,ICL,TGL,TNT,RKL,ADL
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
* ADL
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
* TNT,RKL
* TNT,RKL,ADL
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
* KBL,CML,ICL,TGL,RKL
* KBL,CML,ICL,TGL,RKL,ADL
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
* ADL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
* ADL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
* TNT,RKL
* TNT,RKL,ADL
* Scope: Package (physical package)
*
*/
......@@ -563,6 +566,20 @@ static const struct cstate_model icl_cstates __initconst = {
BIT(PERF_CSTATE_PKG_C10_RES),
};
static const struct cstate_model adl_cstates __initconst = {
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
BIT(PERF_CSTATE_CORE_C6_RES) |
BIT(PERF_CSTATE_CORE_C7_RES),
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
BIT(PERF_CSTATE_PKG_C3_RES) |
BIT(PERF_CSTATE_PKG_C6_RES) |
BIT(PERF_CSTATE_PKG_C7_RES) |
BIT(PERF_CSTATE_PKG_C8_RES) |
BIT(PERF_CSTATE_PKG_C9_RES) |
BIT(PERF_CSTATE_PKG_C10_RES),
};
static const struct cstate_model slm_cstates __initconst = {
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
BIT(PERF_CSTATE_CORE_C6_RES),
......@@ -650,6 +667,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
......
......@@ -779,6 +779,13 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
struct event_constraint intel_grt_pebs_event_constraints[] = {
/* Allow all events as PEBS with no flags */
INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
......@@ -959,13 +966,14 @@ struct event_constraint intel_spr_pebs_event_constraints[] = {
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
struct event_constraint *c;
if (!event->attr.precise_ip)
return NULL;
if (x86_pmu.pebs_constraints) {
for_each_event_constraint(c, x86_pmu.pebs_constraints) {
if (pebs_constraints) {
for_each_event_constraint(c, pebs_constraints) {
if (constraint_match(c, event->hw.config)) {
event->hw.flags |= c->flags;
return c;
......@@ -1007,6 +1015,8 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
{
struct debug_store *ds = cpuc->ds;
int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
u64 threshold;
int reserved;
......@@ -1014,9 +1024,9 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
return;
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
reserved = max_pebs_events + num_counters_fixed;
else
reserved = x86_pmu.max_pebs_events;
reserved = max_pebs_events;
if (cpuc->n_pebs == cpuc->n_large_pebs) {
threshold = ds->pebs_absolute_maximum -
......@@ -2071,6 +2081,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
{
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
struct debug_store *ds = cpuc->ds;
struct perf_event *event;
void *base, *at, *top;
......@@ -2085,9 +2097,9 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
ds->pebs_index = ds->pebs_buffer_base;
mask = ((1ULL << x86_pmu.max_pebs_events) - 1) |
(((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
mask = ((1ULL << max_pebs_events) - 1) |
(((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
if (unlikely(base >= top)) {
intel_pmu_pebs_event_update_no_drain(cpuc, size);
......@@ -2191,7 +2203,7 @@ void __init intel_ds_init(void)
PERF_SAMPLE_TIME;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
pebs_qual = "-baseline";
x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
} else {
/* Only basic record supported */
x86_pmu.large_pebs_flags &=
......@@ -2204,9 +2216,9 @@ void __init intel_ds_init(void)
}
pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
if (x86_pmu.intel_cap.pebs_output_pt_available) {
if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
pr_cont("PEBS-via-PT, ");
x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
}
break;
......
......@@ -705,7 +705,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
void release_lbr_buffers(void)
{
struct kmem_cache *kmem_cache = x86_get_pmu()->task_ctx_cache;
struct kmem_cache *kmem_cache;
struct cpu_hw_events *cpuc;
int cpu;
......@@ -714,6 +714,7 @@ void release_lbr_buffers(void)
for_each_possible_cpu(cpu) {
cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
if (kmem_cache && cpuc->lbr_xsave) {
kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
cpuc->lbr_xsave = NULL;
......@@ -1609,7 +1610,7 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
if (lbr_from_signext_quirk_needed())
static_branch_enable(&lbr_from_quirk_key);
......@@ -1629,7 +1630,7 @@ __init void intel_pmu_lbr_init_skl(void)
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
/*
* SW branch filter usage:
......@@ -1726,7 +1727,7 @@ static bool is_arch_lbr_xsave_available(void)
void __init intel_pmu_arch_lbr_init(void)
{
struct pmu *pmu = x86_get_pmu();
struct pmu *pmu = x86_get_pmu(smp_processor_id());
union cpuid28_eax eax;
union cpuid28_ebx ebx;
union cpuid28_ecx ecx;
......
......@@ -947,7 +947,7 @@ static void p4_pmu_enable_pebs(u64 config)
(void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
}
static void p4_pmu_enable_event(struct perf_event *event)
static void __p4_pmu_enable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int thread = p4_ht_config_thread(hwc->config);
......@@ -983,6 +983,16 @@ static void p4_pmu_enable_event(struct perf_event *event)
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}
static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(X86_PMC_IDX_MAX)], p4_running);
static void p4_pmu_enable_event(struct perf_event *event)
{
int idx = event->hw.idx;
__set_bit(idx, per_cpu(p4_running, smp_processor_id()));
__p4_pmu_enable_event(event);
}
static void p4_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
......@@ -992,7 +1002,7 @@ static void p4_pmu_enable_all(int added)
struct perf_event *event = cpuc->events[idx];
if (!test_bit(idx, cpuc->active_mask))
continue;
p4_pmu_enable_event(event);
__p4_pmu_enable_event(event);
}
}
......@@ -1012,7 +1022,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
if (!test_bit(idx, cpuc->active_mask)) {
/* catch in-flight IRQs */
if (__test_and_clear_bit(idx, cpuc->running))
if (__test_and_clear_bit(idx, per_cpu(p4_running, smp_processor_id())))
handled++;
continue;
}
......
This diff is collapsed.
......@@ -42,6 +42,7 @@ struct intel_uncore_pmu;
struct intel_uncore_box;
struct uncore_event_desc;
struct freerunning_counters;
struct intel_uncore_topology;
struct intel_uncore_type {
const char *name;
......@@ -50,6 +51,7 @@ struct intel_uncore_type {
int perf_ctr_bits;
int fixed_ctr_bits;
int num_freerunning_types;
int type_id;
unsigned perf_ctr;
unsigned event_ctl;
unsigned event_mask;
......@@ -57,6 +59,7 @@ struct intel_uncore_type {
unsigned fixed_ctr;
unsigned fixed_ctl;
unsigned box_ctl;
u64 *box_ctls; /* Unit ctrl addr of the first box of each die */
union {
unsigned msr_offset;
unsigned mmio_offset;
......@@ -65,7 +68,12 @@ struct intel_uncore_type {
unsigned num_shared_regs:8;
unsigned single_fixed:1;
unsigned pair_ctr_ctl:1;
unsigned *msr_offsets;
union {
unsigned *msr_offsets;
unsigned *pci_offsets;
unsigned *mmio_offsets;
};
unsigned *box_ids;
struct event_constraint unconstrainted;
struct event_constraint *constraints;
struct intel_uncore_pmu *pmus;
......@@ -80,7 +88,7 @@ struct intel_uncore_type {
* to identify which platform component each PMON block of that type is
* supposed to monitor.
*/
u64 *topology;
struct intel_uncore_topology *topology;
/*
* Optional callbacks for managing mapping of Uncore units to PMONs
*/
......@@ -169,6 +177,11 @@ struct freerunning_counters {
unsigned *box_offsets;
};
struct intel_uncore_topology {
u64 configuration;
int segment;
};
struct pci2phy_map {
struct list_head list;
int segment;
......@@ -177,6 +190,7 @@ struct pci2phy_map {
struct pci2phy_map *__find_pci2phy_map(int segment);
int uncore_pcibus_to_dieid(struct pci_bus *bus);
int uncore_die_to_segment(int die);
ssize_t uncore_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
......@@ -547,6 +561,7 @@ uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
extern struct intel_uncore_type *empty_uncore[];
extern struct intel_uncore_type **uncore_msr_uncores;
extern struct intel_uncore_type **uncore_pci_uncores;
extern struct intel_uncore_type **uncore_mmio_uncores;
......@@ -567,6 +582,7 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
void adl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only */
/* Generic device ID of a discovery table device */
#define UNCORE_DISCOVERY_TABLE_DEVICE 0x09a7
/* Capability ID for a discovery table device */
#define UNCORE_EXT_CAP_ID_DISCOVERY 0x23
/* First DVSEC offset */
#define UNCORE_DISCOVERY_DVSEC_OFFSET 0x8
/* Mask of the supported discovery entry type */
#define UNCORE_DISCOVERY_DVSEC_ID_MASK 0xffff
/* PMON discovery entry type ID */
#define UNCORE_DISCOVERY_DVSEC_ID_PMON 0x1
/* Second DVSEC offset */
#define UNCORE_DISCOVERY_DVSEC2_OFFSET 0xc
/* Mask of the discovery table BAR offset */
#define UNCORE_DISCOVERY_DVSEC2_BIR_MASK 0x7
/* Discovery table BAR base offset */
#define UNCORE_DISCOVERY_BIR_BASE 0x10
/* Discovery table BAR step */
#define UNCORE_DISCOVERY_BIR_STEP 0x4
/* Mask of the discovery table offset */
#define UNCORE_DISCOVERY_MASK 0xf
/* Global discovery table size */
#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20
#define UNCORE_DISCOVERY_PCI_DOMAIN(data) ((data >> 28) & 0x7)
#define UNCORE_DISCOVERY_PCI_BUS(data) ((data >> 20) & 0xff)
#define UNCORE_DISCOVERY_PCI_DEVFN(data) ((data >> 12) & 0xff)
#define UNCORE_DISCOVERY_PCI_BOX_CTRL(data) (data & 0xfff)
#define uncore_discovery_invalid_unit(unit) \
(!unit.table1 || !unit.ctl || !unit.table3 || \
unit.table1 == -1ULL || unit.ctl == -1ULL || \
unit.table3 == -1ULL)
#define GENERIC_PMON_CTL_EV_SEL_MASK 0x000000ff
#define GENERIC_PMON_CTL_UMASK_MASK 0x0000ff00
#define GENERIC_PMON_CTL_EDGE_DET (1 << 18)
#define GENERIC_PMON_CTL_INVERT (1 << 23)
#define GENERIC_PMON_CTL_TRESH_MASK 0xff000000
#define GENERIC_PMON_RAW_EVENT_MASK (GENERIC_PMON_CTL_EV_SEL_MASK | \
GENERIC_PMON_CTL_UMASK_MASK | \
GENERIC_PMON_CTL_EDGE_DET | \
GENERIC_PMON_CTL_INVERT | \
GENERIC_PMON_CTL_TRESH_MASK)
#define GENERIC_PMON_BOX_CTL_FRZ (1 << 0)
#define GENERIC_PMON_BOX_CTL_RST_CTRL (1 << 8)
#define GENERIC_PMON_BOX_CTL_RST_CTRS (1 << 9)
#define GENERIC_PMON_BOX_CTL_INT (GENERIC_PMON_BOX_CTL_RST_CTRL | \
GENERIC_PMON_BOX_CTL_RST_CTRS)
enum uncore_access_type {
UNCORE_ACCESS_MSR = 0,
UNCORE_ACCESS_MMIO,
UNCORE_ACCESS_PCI,
UNCORE_ACCESS_MAX,
};
struct uncore_global_discovery {
union {
u64 table1;
struct {
u64 type : 8,
stride : 8,
max_units : 10,
__reserved_1 : 36,
access_type : 2;
};
};
u64 ctl; /* Global Control Address */
union {
u64 table3;
struct {
u64 status_offset : 8,
num_status : 16,
__reserved_2 : 40;
};
};
};
struct uncore_unit_discovery {
union {
u64 table1;
struct {
u64 num_regs : 8,
ctl_offset : 8,
bit_width : 8,
ctr_offset : 8,
status_offset : 8,
__reserved_1 : 22,
access_type : 2;
};
};
u64 ctl; /* Unit Control Address */
union {
u64 table3;
struct {
u64 box_type : 16,
box_id : 16,
__reserved_2 : 32;
};
};
};
struct intel_uncore_discovery_type {
struct rb_node node;
enum uncore_access_type access_type;
u64 box_ctrl; /* Unit ctrl addr of the first box */
u64 *box_ctrl_die; /* Unit ctrl addr of the first box of each die */
u16 type; /* Type ID of the uncore block */
u8 num_counters;
u8 counter_width;
u8 ctl_offset; /* Counter Control 0 offset */
u8 ctr_offset; /* Counter 0 offset */
u16 num_boxes; /* number of boxes for the uncore block */
unsigned int *ids; /* Box IDs */
unsigned int *box_offset; /* Box offset */
};
bool intel_uncore_has_discovery_tables(void);
void intel_uncore_clear_discovery_tables(void);
void intel_uncore_generic_uncore_cpu_init(void);
int intel_uncore_generic_uncore_pci_init(void);
void intel_uncore_generic_uncore_mmio_init(void);
......@@ -62,6 +62,8 @@
#define PCI_DEVICE_ID_INTEL_TGL_H_IMC 0x9a36
#define PCI_DEVICE_ID_INTEL_RKL_1_IMC 0x4c43
#define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53
#define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660
#define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
......@@ -131,12 +133,33 @@
#define ICL_UNC_ARB_PER_CTR 0x3b1
#define ICL_UNC_ARB_PERFEVTSEL 0x3b3
/* ADL uncore global control */
#define ADL_UNC_PERF_GLOBAL_CTL 0x2ff0
#define ADL_UNC_FIXED_CTR_CTRL 0x2fde
#define ADL_UNC_FIXED_CTR 0x2fdf
/* ADL Cbo register */
#define ADL_UNC_CBO_0_PER_CTR0 0x2002
#define ADL_UNC_CBO_0_PERFEVTSEL0 0x2000
#define ADL_UNC_CTL_THRESHOLD 0x3f000000
#define ADL_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
SNB_UNC_CTL_UMASK_MASK | \
SNB_UNC_CTL_EDGE_DET | \
SNB_UNC_CTL_INVERT | \
ADL_UNC_CTL_THRESHOLD)
/* ADL ARB register */
#define ADL_UNC_ARB_PER_CTR0 0x2FD2
#define ADL_UNC_ARB_PERFEVTSEL0 0x2FD0
#define ADL_UNC_ARB_MSR_OFFSET 0x8
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
DEFINE_UNCORE_FORMAT_ATTR(threshold, threshold, "config:24-29");
/* Sandy Bridge uncore support */
static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
......@@ -422,6 +445,106 @@ void tgl_uncore_cpu_init(void)
skl_uncore_msr_ops.init_box = rkl_uncore_msr_init_box;
}
static void adl_uncore_msr_init_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
}
static void adl_uncore_msr_enable_box(struct intel_uncore_box *box)
{
wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
}
static void adl_uncore_msr_disable_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0);
}
static void adl_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0);
}
static struct intel_uncore_ops adl_uncore_msr_ops = {
.init_box = adl_uncore_msr_init_box,
.enable_box = adl_uncore_msr_enable_box,
.disable_box = adl_uncore_msr_disable_box,
.exit_box = adl_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
.read_counter = uncore_msr_read_counter,
};
static struct attribute *adl_uncore_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
&format_attr_threshold.attr,
NULL,
};
static const struct attribute_group adl_uncore_format_group = {
.name = "format",
.attrs = adl_uncore_formats_attr,
};
static struct intel_uncore_type adl_uncore_cbox = {
.name = "cbox",
.num_counters = 2,
.perf_ctr_bits = 44,
.perf_ctr = ADL_UNC_CBO_0_PER_CTR0,
.event_ctl = ADL_UNC_CBO_0_PERFEVTSEL0,
.event_mask = ADL_UNC_RAW_EVENT_MASK,
.msr_offset = ICL_UNC_CBO_MSR_OFFSET,
.ops = &adl_uncore_msr_ops,
.format_group = &adl_uncore_format_group,
};
static struct intel_uncore_type adl_uncore_arb = {
.name = "arb",
.num_counters = 2,
.num_boxes = 2,
.perf_ctr_bits = 44,
.perf_ctr = ADL_UNC_ARB_PER_CTR0,
.event_ctl = ADL_UNC_ARB_PERFEVTSEL0,
.event_mask = SNB_UNC_RAW_EVENT_MASK,
.msr_offset = ADL_UNC_ARB_MSR_OFFSET,
.constraints = snb_uncore_arb_constraints,
.ops = &adl_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
};
static struct intel_uncore_type adl_uncore_clockbox = {
.name = "clock",
.num_counters = 1,
.num_boxes = 1,
.fixed_ctr_bits = 48,
.fixed_ctr = ADL_UNC_FIXED_CTR,
.fixed_ctl = ADL_UNC_FIXED_CTR_CTRL,
.single_fixed = 1,
.event_mask = SNB_UNC_CTL_EV_SEL_MASK,
.format_group = &icl_uncore_clock_format_group,
.ops = &adl_uncore_msr_ops,
.event_descs = icl_uncore_events,
};
static struct intel_uncore_type *adl_msr_uncores[] = {
&adl_uncore_cbox,
&adl_uncore_arb,
&adl_uncore_clockbox,
NULL,
};
void adl_uncore_cpu_init(void)
{
adl_uncore_cbox.num_boxes = icl_get_cbox_num();
uncore_msr_uncores = adl_msr_uncores;
}
enum {
SNB_PCI_UNCORE_IMC,
};
......@@ -1203,6 +1326,14 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_H_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_1_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* end: all zeroes */ }
};
......
......@@ -3675,7 +3675,8 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die)
{
return pmu->type->topology[die] >> (pmu->pmu_idx * BUS_NUM_STRIDE);
return pmu->type->topology[die].configuration >>
(pmu->pmu_idx * BUS_NUM_STRIDE);
}
static umode_t
......@@ -3688,19 +3689,14 @@ skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
}
static ssize_t skx_iio_mapping_show(struct device *dev,
struct device_attribute *attr, char *buf)
struct device_attribute *attr, char *buf)
{
struct pci_bus *bus = pci_find_next_bus(NULL);
struct intel_uncore_pmu *uncore_pmu = dev_to_uncore_pmu(dev);
struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev);
struct dev_ext_attribute *ea = to_dev_ext_attribute(attr);
long die = (long)ea->var;
/*
* Current implementation is for single segment configuration hence it's
* safe to take the segment value from the first available root bus.
*/
return sprintf(buf, "%04x:%02x\n", pci_domain_nr(bus),
skx_iio_stack(uncore_pmu, die));
return sprintf(buf, "%04x:%02x\n", pmu->type->topology[die].segment,
skx_iio_stack(pmu, die));
}
static int skx_msr_cpu_bus_read(int cpu, u64 *topology)
......@@ -3737,34 +3733,32 @@ static int die_to_cpu(int die)
static int skx_iio_get_topology(struct intel_uncore_type *type)
{
int i, ret;
struct pci_bus *bus = NULL;
/*
* Verified single-segment environments only; disabled for multiple
* segment topologies for now except VMD domains.
* VMD domains start at 0x10000 to not clash with ACPI _SEG domains.
*/
while ((bus = pci_find_next_bus(bus))
&& (!pci_domain_nr(bus) || pci_domain_nr(bus) > 0xffff))
;
if (bus)
return -EPERM;
int die, ret = -EPERM;
type->topology = kcalloc(uncore_max_dies(), sizeof(u64), GFP_KERNEL);
type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology),
GFP_KERNEL);
if (!type->topology)
return -ENOMEM;
for (i = 0; i < uncore_max_dies(); i++) {
ret = skx_msr_cpu_bus_read(die_to_cpu(i), &type->topology[i]);
if (ret) {
kfree(type->topology);
type->topology = NULL;
return ret;
}
for (die = 0; die < uncore_max_dies(); die++) {
ret = skx_msr_cpu_bus_read(die_to_cpu(die),
&type->topology[die].configuration);
if (ret)
break;
ret = uncore_die_to_segment(die);
if (ret < 0)
break;
type->topology[die].segment = ret;
}
return 0;
if (ret < 0) {
kfree(type->topology);
type->topology = NULL;
}
return ret;
}
static struct attribute_group skx_iio_mapping_group = {
......@@ -3785,7 +3779,7 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
struct dev_ext_attribute *eas = NULL;
ret = skx_iio_get_topology(type);
if (ret)
if (ret < 0)
goto clear_attr_update;
ret = -ENOMEM;
......
......@@ -100,6 +100,8 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_TIGERLAKE_L:
case INTEL_FAM6_TIGERLAKE:
case INTEL_FAM6_ROCKETLAKE:
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
......
......@@ -15,6 +15,7 @@
#include <linux/perf_event.h>
#include <asm/intel_ds.h>
#include <asm/cpu.h>
/* To enable MSR tracing please use the generic trace points. */
......@@ -228,7 +229,6 @@ struct cpu_hw_events {
*/
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int enabled;
int n_events; /* the # of events in the below arrays */
......@@ -327,6 +327,8 @@ struct cpu_hw_events {
int n_pair; /* Large increment events */
void *kfree_on_online[X86_PERF_KFREE_MAX];
struct pmu *pmu;
};
#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \
......@@ -630,6 +632,71 @@ enum {
x86_lbr_exclusive_max,
};
struct x86_hybrid_pmu {
struct pmu pmu;
const char *name;
u8 cpu_type;
cpumask_t supported_cpus;
union perf_capabilities intel_cap;
u64 intel_ctrl;
int max_pebs_events;
int num_counters;
int num_counters_fixed;
struct event_constraint unconstrained;
u64 hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
u64 hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
struct event_constraint *event_constraints;
struct event_constraint *pebs_constraints;
struct extra_reg *extra_regs;
};
static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
{
return container_of(pmu, struct x86_hybrid_pmu, pmu);
}
extern struct static_key_false perf_is_hybrid;
#define is_hybrid() static_branch_unlikely(&perf_is_hybrid)
#define hybrid(_pmu, _field) \
(*({ \
typeof(&x86_pmu._field) __Fp = &x86_pmu._field; \
\
if (is_hybrid() && (_pmu)) \
__Fp = &hybrid_pmu(_pmu)->_field; \
\
__Fp; \
}))
#define hybrid_var(_pmu, _var) \
(*({ \
typeof(&_var) __Fp = &_var; \
\
if (is_hybrid() && (_pmu)) \
__Fp = &hybrid_pmu(_pmu)->_var; \
\
__Fp; \
}))
enum hybrid_pmu_type {
hybrid_big = 0x40,
hybrid_small = 0x20,
hybrid_big_small = hybrid_big | hybrid_small,
};
#define X86_HYBRID_PMU_ATOM_IDX 0
#define X86_HYBRID_PMU_CORE_IDX 1
#define X86_HYBRID_NUM_PMUS 2
/*
* struct x86_pmu - generic x86 pmu
*/
......@@ -816,6 +883,19 @@ struct x86_pmu {
int (*check_period) (struct perf_event *event, u64 period);
int (*aux_output_match) (struct perf_event *event);
int (*filter_match)(struct perf_event *event);
/*
* Hybrid support
*
* Most PMU capabilities are the same among different hybrid PMUs.
* The global x86_pmu saves the architecture capabilities, which
* are available for all PMUs. The hybrid_pmu only includes the
* unique capabilities.
*/
int num_hybrid_pmus;
struct x86_hybrid_pmu *hybrid_pmu;
u8 (*get_hybrid_cpu_type) (void);
};
struct x86_perf_task_context_opt {
......@@ -905,7 +985,23 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \
.event_str_ht = ht, \
}
struct pmu *x86_get_pmu(void);
#define EVENT_ATTR_STR_HYBRID(_name, v, str, _pmu) \
static struct perf_pmu_events_hybrid_attr event_attr_##v = { \
.attr = __ATTR(_name, 0444, events_hybrid_sysfs_show, NULL),\
.id = 0, \
.event_str = str, \
.pmu_type = _pmu, \
}
#define FORMAT_HYBRID_PTR(_id) (&format_attr_hybrid_##_id.attr.attr)
#define FORMAT_ATTR_HYBRID(_name, _pmu) \
static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
.attr = __ATTR_RO(_name), \
.pmu_type = _pmu, \
}
struct pmu *x86_get_pmu(unsigned int cpu);
extern struct x86_pmu x86_pmu __read_mostly;
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
......@@ -964,6 +1060,9 @@ static inline int x86_pmu_rdpmc_index(int index)
return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
}
bool check_hw_exists(struct pmu *pmu, int num_counters,
int num_counters_fixed);
int x86_add_exclusive(unsigned int what);
void x86_del_exclusive(unsigned int what);
......@@ -1027,6 +1126,11 @@ void x86_pmu_enable_event(struct perf_event *event);
int x86_pmu_handle_irq(struct pt_regs *regs);
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
u64 intel_ctrl);
void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu);
extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained;
......@@ -1067,10 +1171,15 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
ssize_t events_hybrid_sysfs_show(struct device *dev,
struct device_attribute *attr,
char *page);
static inline bool fixed_counter_disabled(int i)
static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
{
return !(x86_pmu.intel_ctrl >> (i + INTEL_PMC_IDX_FIXED));
u64 intel_ctrl = hybrid(pmu, intel_ctrl);
return !(intel_ctrl >> (i + INTEL_PMC_IDX_FIXED));
}
#ifdef CONFIG_CPU_SUP_AMD
......@@ -1154,6 +1263,8 @@ extern struct event_constraint intel_glm_pebs_event_constraints[];
extern struct event_constraint intel_glp_pebs_event_constraints[];
extern struct event_constraint intel_grt_pebs_event_constraints[];
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
extern struct event_constraint intel_westmere_pebs_event_constraints[];
......
......@@ -800,6 +800,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h),
......
......@@ -46,6 +46,7 @@ extern void switch_to_sld(unsigned long tifn);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip);
extern void handle_bus_lock(struct pt_regs *regs);
u8 get_this_hybrid_cpu_type(void);
#else
static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
static inline void switch_to_sld(unsigned long tifn) {}
......@@ -60,6 +61,11 @@ static inline bool handle_guest_split_lock(unsigned long ip)
}
static inline void handle_bus_lock(struct pt_regs *regs) {}
static inline u8 get_this_hybrid_cpu_type(void)
{
return 0;
}
#endif
#ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
......
......@@ -379,6 +379,7 @@
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
......
......@@ -185,6 +185,9 @@
#define MSR_PEBS_DATA_CFG 0x000003f2
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
#define PERF_CAP_METRICS_IDX 15
#define PERF_CAP_PT_IDX 16
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
#define MSR_IA32_RTIT_CTL 0x00000570
......
......@@ -1268,3 +1268,19 @@ void __init sld_setup(struct cpuinfo_x86 *c)
sld_state_setup();
sld_state_show();
}
#define X86_HYBRID_CPU_TYPE_ID_SHIFT 24
/**
* get_this_hybrid_cpu_type() - Get the type of this hybrid CPU
*
* Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in
* a hybrid processor. If the processor is not hybrid, returns 0.
*/
u8 get_this_hybrid_cpu_type(void)
{
if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
return 0;
return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
}
......@@ -29,7 +29,7 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(NSIGFPE != 15);
BUILD_BUG_ON(NSIGSEGV != 9);
BUILD_BUG_ON(NSIGBUS != 5);
BUILD_BUG_ON(NSIGTRAP != 5);
BUILD_BUG_ON(NSIGTRAP != 6);
BUILD_BUG_ON(NSIGCHLD != 6);
BUILD_BUG_ON(NSIGSYS != 2);
......@@ -138,6 +138,9 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20);
BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14);
BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x18);
BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf) != 0x10);
CHECK_CSI_OFFSET(_sigpoll);
CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int));
CHECK_SI_SIZE (_sigpoll, 4*sizeof(int));
......
......@@ -134,6 +134,10 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
#endif
new.ssi_addr_lsb = (short) kinfo->si_addr_lsb;
break;
case SIL_PERF_EVENT:
new.ssi_addr = (long) kinfo->si_addr;
new.ssi_perf = kinfo->si_perf;
break;
case SIL_CHLD:
new.ssi_pid = kinfo->si_pid;
new.ssi_uid = kinfo->si_uid;
......
......@@ -236,6 +236,8 @@ typedef struct compat_siginfo {
char _dummy_pkey[__COMPAT_ADDR_BND_PKEY_PAD];
u32 _pkey;
} _addr_pkey;
/* used when si_code=TRAP_PERF */
compat_ulong_t _perf;
};
} _sigfault;
......
......@@ -260,15 +260,16 @@ struct perf_event;
/**
* pmu::capabilities flags
*/
#define PERF_PMU_CAP_NO_INTERRUPT 0x01
#define PERF_PMU_CAP_NO_NMI 0x02
#define PERF_PMU_CAP_AUX_NO_SG 0x04
#define PERF_PMU_CAP_EXTENDED_REGS 0x08
#define PERF_PMU_CAP_EXCLUSIVE 0x10
#define PERF_PMU_CAP_ITRACE 0x20
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
#define PERF_PMU_CAP_NO_EXCLUDE 0x80
#define PERF_PMU_CAP_AUX_OUTPUT 0x100
#define PERF_PMU_CAP_NO_INTERRUPT 0x0001
#define PERF_PMU_CAP_NO_NMI 0x0002
#define PERF_PMU_CAP_AUX_NO_SG 0x0004
#define PERF_PMU_CAP_EXTENDED_REGS 0x0008
#define PERF_PMU_CAP_EXCLUSIVE 0x0010
#define PERF_PMU_CAP_ITRACE 0x0020
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x0040
#define PERF_PMU_CAP_NO_EXCLUDE 0x0080
#define PERF_PMU_CAP_AUX_OUTPUT 0x0100
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0200
struct perf_output_handle;
......@@ -607,6 +608,7 @@ struct swevent_hlist {
#define PERF_ATTACH_TASK_DATA 0x08
#define PERF_ATTACH_ITRACE 0x10
#define PERF_ATTACH_SCHED_CB 0x20
#define PERF_ATTACH_CHILD 0x40
struct perf_cgroup;
struct perf_buffer;
......@@ -734,6 +736,7 @@ struct perf_event {
int pending_wakeup;
int pending_kill;
int pending_disable;
unsigned long pending_addr; /* SIGTRAP */
struct irq_work pending;
atomic_t event_limit;
......@@ -957,7 +960,7 @@ extern void __perf_event_task_sched_in(struct task_struct *prev,
struct task_struct *task);
extern void __perf_event_task_sched_out(struct task_struct *prev,
struct task_struct *next);
extern int perf_event_init_task(struct task_struct *child);
extern int perf_event_init_task(struct task_struct *child, u64 clone_flags);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
extern void perf_event_delayed_put(struct task_struct *task);
......@@ -1176,30 +1179,24 @@ DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
* which is guaranteed by us not actually scheduling inside other swevents
* because those disable preemption.
*/
static __always_inline void
perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
{
if (static_key_false(&perf_swevent_enabled[event_id])) {
struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
perf_fetch_caller_regs(regs);
___perf_sw_event(event_id, nr, regs, addr);
}
perf_fetch_caller_regs(regs);
___perf_sw_event(event_id, nr, regs, addr);
}
extern struct static_key_false perf_sched_events;
static __always_inline bool
perf_sw_migrate_enabled(void)
static __always_inline bool __perf_sw_enabled(int swevt)
{
if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
return true;
return false;
return static_key_false(&perf_swevent_enabled[swevt]);
}
static inline void perf_event_task_migrate(struct task_struct *task)
{
if (perf_sw_migrate_enabled())
if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
task->sched_migrated = 1;
}
......@@ -1209,11 +1206,9 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
if (static_branch_unlikely(&perf_sched_events))
__perf_event_task_sched_in(prev, task);
if (perf_sw_migrate_enabled() && task->sched_migrated) {
struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
perf_fetch_caller_regs(regs);
___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
task->sched_migrated) {
__perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
task->sched_migrated = 0;
}
}
......@@ -1221,7 +1216,15 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
static inline void perf_event_task_sched_out(struct task_struct *prev,
struct task_struct *next)
{
perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
__perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
#ifdef CONFIG_CGROUP_PERF
if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
perf_cgroup_from_task(prev, NULL) !=
perf_cgroup_from_task(next, NULL))
__perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
#endif
if (static_branch_unlikely(&perf_sched_events))
__perf_event_task_sched_out(prev, next);
......@@ -1448,7 +1451,8 @@ perf_event_task_sched_in(struct task_struct *prev,
static inline void
perf_event_task_sched_out(struct task_struct *prev,
struct task_struct *next) { }
static inline int perf_event_init_task(struct task_struct *child) { return 0; }
static inline int perf_event_init_task(struct task_struct *child,
u64 clone_flags) { return 0; }
static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
static inline void perf_event_delayed_put(struct task_struct *task) { }
......@@ -1477,8 +1481,6 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh)
static inline void
perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
static inline void
perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { }
static inline void
perf_bp_event(struct perf_event *event, void *data) { }
static inline int perf_register_guest_info_callbacks
......@@ -1548,6 +1550,18 @@ struct perf_pmu_events_ht_attr {
const char *event_str_noht;
};
struct perf_pmu_events_hybrid_attr {
struct device_attribute attr;
u64 id;
const char *event_str;
u64 pmu_type;
};
struct perf_pmu_format_hybrid_attr {
struct device_attribute attr;
u64 pmu_type;
};
ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
......
......@@ -43,6 +43,7 @@ enum siginfo_layout {
SIL_FAULT_MCEERR,
SIL_FAULT_BNDERR,
SIL_FAULT_PKUERR,
SIL_PERF_EVENT,
SIL_CHLD,
SIL_RT,
SIL_SYS,
......
......@@ -91,6 +91,8 @@ union __sifields {
char _dummy_pkey[__ADDR_BND_PKEY_PAD];
__u32 _pkey;
} _addr_pkey;
/* used when si_code=TRAP_PERF */
unsigned long _perf;
};
} _sigfault;
......@@ -155,6 +157,7 @@ typedef struct siginfo {
#define si_lower _sifields._sigfault._addr_bnd._lower
#define si_upper _sifields._sigfault._addr_bnd._upper
#define si_pkey _sifields._sigfault._addr_pkey._pkey
#define si_perf _sifields._sigfault._perf
#define si_band _sifields._sigpoll._band
#define si_fd _sifields._sigpoll._fd
#define si_call_addr _sifields._sigsys._call_addr
......@@ -253,7 +256,8 @@ typedef struct siginfo {
#define TRAP_BRANCH 3 /* process taken branch trap */
#define TRAP_HWBKPT 4 /* hardware breakpoint/watchpoint */
#define TRAP_UNK 5 /* undiagnosed trap */
#define NSIGTRAP 5
#define TRAP_PERF 6 /* perf event with sigtrap=1 */
#define NSIGTRAP 6
/*
* There is an additional set of SIGTRAP si_codes used by ptrace
......
......@@ -37,6 +37,21 @@ enum perf_type_id {
PERF_TYPE_MAX, /* non-ABI */
};
/*
* attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
* PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA
* AA: hardware event ID
* EEEEEEEE: PMU type ID
* PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB
* BB: hardware cache ID
* CC: hardware cache op ID
* DD: hardware cache op result ID
* EEEEEEEE: PMU type ID
* If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
*/
#define PERF_PMU_TYPE_SHIFT 32
#define PERF_HW_EVENT_MASK 0xffffffff
/*
* Generalized performance event event_id types, used by the
* attr.event_id parameter of the sys_perf_event_open()
......@@ -112,6 +127,7 @@ enum perf_sw_ids {
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_DUMMY = 9,
PERF_COUNT_SW_BPF_OUTPUT = 10,
PERF_COUNT_SW_CGROUP_SWITCHES = 11,
PERF_COUNT_SW_MAX, /* non-ABI */
};
......@@ -311,6 +327,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
/*
* Hardware event_id to monitor via a performance monitoring event:
......@@ -389,7 +406,10 @@ struct perf_event_attr {
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
build_id : 1, /* use build id in mmap2 events */
__reserved_1 : 29;
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
__reserved_1 : 26;
union {
__u32 wakeup_events; /* wakeup every n events */
......@@ -441,6 +461,12 @@ struct perf_event_attr {
__u16 __reserved_2;
__u32 aux_sample_size;
__u32 __reserved_3;
/*
* User provided data if sigtrap=1, passed back to user via
* siginfo_t::si_perf, e.g. to permit user to identify the event.
*/
__u64 sig_data;
};
/*
......
......@@ -39,6 +39,8 @@ struct signalfd_siginfo {
__s32 ssi_syscall;
__u64 ssi_call_addr;
__u32 ssi_arch;
__u32 __pad3;
__u64 ssi_perf;
/*
* Pad strcture to 128 bytes. Remember to update the
......@@ -49,7 +51,7 @@ struct signalfd_siginfo {
* comes out of a read(2) and we really don't want to have
* a compat on read(2).
*/
__u8 __pad[28];
__u8 __pad[16];
};
......
This diff is collapsed.
......@@ -674,21 +674,26 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
if (!has_aux(event))
return -EOPNOTSUPP;
/*
* We need to start with the max_order that fits in nr_pages,
* not the other way around, hence ilog2() and not get_order.
*/
max_order = ilog2(nr_pages);
/*
* PMU requests more than one contiguous chunks of memory
* for SW double buffering
*/
if (!overwrite) {
if (!max_order)
return -EINVAL;
/*
* Watermark defaults to half the buffer, and so does the
* max_order, to aid PMU drivers in double buffering.
*/
if (!watermark)
watermark = nr_pages << (PAGE_SHIFT - 1);
max_order--;
/*
* Use aux_watermark as the basis for chunking to
* help PMU drivers honor the watermark.
*/
max_order = get_order(watermark);
} else {
/*
* We need to start with the max_order that fits in nr_pages,
* not the other way around, hence ilog2() and not get_order.
*/
max_order = ilog2(nr_pages);
watermark = 0;
}
rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL,
......@@ -743,9 +748,6 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
rb->aux_overwrite = overwrite;
rb->aux_watermark = watermark;
if (!rb->aux_watermark && !rb->aux_overwrite)
rb->aux_watermark = nr_pages << (PAGE_SHIFT - 1);
out:
if (!ret)
rb->aux_pgoff = pgoff;
......@@ -804,7 +806,7 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
{
struct perf_buffer *rb;
unsigned long size;
int i;
int i, node;
size = sizeof(struct perf_buffer);
size += nr_pages * sizeof(void *);
......@@ -812,7 +814,8 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER)
goto fail;
rb = kzalloc(size, GFP_KERNEL);
node = (cpu == -1) ? cpu : cpu_to_node(cpu);
rb = kzalloc_node(size, GFP_KERNEL, node);
if (!rb)
goto fail;
......@@ -906,11 +909,13 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
struct perf_buffer *rb;
unsigned long size;
void *all_buf;
int node;
size = sizeof(struct perf_buffer);
size += sizeof(void *);
rb = kzalloc(size, GFP_KERNEL);
node = (cpu == -1) ? cpu : cpu_to_node(cpu);
rb = kzalloc_node(size, GFP_KERNEL, node);
if (!rb)
goto fail;
......
......@@ -2084,7 +2084,7 @@ static __latent_entropy struct task_struct *copy_process(
if (retval)
goto bad_fork_cleanup_policy;
retval = perf_event_init_task(p);
retval = perf_event_init_task(p, clone_flags);
if (retval)
goto bad_fork_cleanup_policy;
retval = audit_alloc(p);
......
......@@ -1197,6 +1197,7 @@ static inline bool has_si_pid_and_uid(struct kernel_siginfo *info)
case SIL_FAULT_MCEERR:
case SIL_FAULT_BNDERR:
case SIL_FAULT_PKUERR:
case SIL_PERF_EVENT:
case SIL_SYS:
ret = false;
break;
......@@ -2529,6 +2530,7 @@ static void hide_si_addr_tag_bits(struct ksignal *ksig)
case SIL_FAULT_MCEERR:
case SIL_FAULT_BNDERR:
case SIL_FAULT_PKUERR:
case SIL_PERF_EVENT:
ksig->info.si_addr = arch_untagged_si_addr(
ksig->info.si_addr, ksig->sig, ksig->info.si_code);
break;
......@@ -3210,6 +3212,8 @@ enum siginfo_layout siginfo_layout(unsigned sig, int si_code)
else if ((sig == SIGSEGV) && (si_code == SEGV_PKUERR))
layout = SIL_FAULT_PKUERR;
#endif
else if ((sig == SIGTRAP) && (si_code == TRAP_PERF))
layout = SIL_PERF_EVENT;
}
else if (si_code <= NSIGPOLL)
layout = SIL_POLL;
......@@ -3339,6 +3343,10 @@ void copy_siginfo_to_external32(struct compat_siginfo *to,
#endif
to->si_pkey = from->si_pkey;
break;
case SIL_PERF_EVENT:
to->si_addr = ptr_to_compat(from->si_addr);
to->si_perf = from->si_perf;
break;
case SIL_CHLD:
to->si_pid = from->si_pid;
to->si_uid = from->si_uid;
......@@ -3419,6 +3427,10 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
#endif
to->si_pkey = from->si_pkey;
break;
case SIL_PERF_EVENT:
to->si_addr = compat_ptr(from->si_addr);
to->si_perf = from->si_perf;
break;
case SIL_CHLD:
to->si_pid = from->si_pid;
to->si_uid = from->si_uid;
......@@ -4599,6 +4611,7 @@ static inline void siginfo_buildtime_checks(void)
CHECK_OFFSET(si_lower);
CHECK_OFFSET(si_upper);
CHECK_OFFSET(si_pkey);
CHECK_OFFSET(si_perf);
/* sigpoll */
CHECK_OFFSET(si_band);
......
......@@ -776,6 +776,12 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
}
if (!opts->auxtrace_snapshot_mode && !opts->auxtrace_sample_mode) {
u32 aux_watermark = opts->auxtrace_mmap_pages * page_size / 4;
intel_pt_evsel->core.attr.aux_watermark = aux_watermark;
}
intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
"tsc", &tsc_bit);
......
# SPDX-License-Identifier: GPL-2.0-only
sigtrap_threads
remove_on_exec
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include
LDFLAGS += -lpthread
TEST_GEN_PROGS := sigtrap_threads remove_on_exec
include ../lib.mk
// SPDX-License-Identifier: GPL-2.0
/*
* Test for remove_on_exec.
*
* Copyright (C) 2021, Google LLC.
*/
#define _GNU_SOURCE
/* We need the latest siginfo from the kernel repo. */
#include <sys/types.h>
#include <asm/siginfo.h>
#define __have_siginfo_t 1
#define __have_sigval_t 1
#define __have_sigevent_t 1
#define __siginfo_t_defined
#define __sigval_t_defined
#define __sigevent_t_defined
#define _BITS_SIGINFO_CONSTS_H 1
#define _BITS_SIGEVENT_CONSTS_H 1
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <linux/perf_event.h>
#include <pthread.h>
#include <signal.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <unistd.h>
#include "../kselftest_harness.h"
static volatile int signal_count;
static struct perf_event_attr make_event_attr(void)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.size = sizeof(attr),
.config = PERF_COUNT_HW_INSTRUCTIONS,
.sample_period = 1000,
.exclude_kernel = 1,
.exclude_hv = 1,
.disabled = 1,
.inherit = 1,
/*
* Children normally retain their inherited event on exec; with
* remove_on_exec, we'll remove their event, but the parent and
* any other non-exec'd children will keep their events.
*/
.remove_on_exec = 1,
.sigtrap = 1,
};
return attr;
}
static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext)
{
if (info->si_code != TRAP_PERF) {
fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code);
return;
}
signal_count++;
}
FIXTURE(remove_on_exec)
{
struct sigaction oldact;
int fd;
};
FIXTURE_SETUP(remove_on_exec)
{
struct perf_event_attr attr = make_event_attr();
struct sigaction action = {};
signal_count = 0;
/* Initialize sigtrap handler. */
action.sa_flags = SA_SIGINFO | SA_NODEFER;
action.sa_sigaction = sigtrap_handler;
sigemptyset(&action.sa_mask);
ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0);
/* Initialize perf event. */
self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
ASSERT_NE(self->fd, -1);
}
FIXTURE_TEARDOWN(remove_on_exec)
{
close(self->fd);
sigaction(SIGTRAP, &self->oldact, NULL);
}
/* Verify event propagates to fork'd child. */
TEST_F(remove_on_exec, fork_only)
{
int status;
pid_t pid = fork();
if (pid == 0) {
ASSERT_EQ(signal_count, 0);
ASSERT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
while (!signal_count);
_exit(42);
}
while (!signal_count); /* Child enables event. */
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(WEXITSTATUS(status), 42);
}
/*
* Verify that event does _not_ propagate to fork+exec'd child; event enabled
* after fork+exec.
*/
TEST_F(remove_on_exec, fork_exec_then_enable)
{
pid_t pid_exec, pid_only_fork;
int pipefd[2];
int tmp;
/*
* Non-exec child, to ensure exec does not affect inherited events of
* other children.
*/
pid_only_fork = fork();
if (pid_only_fork == 0) {
/* Block until parent enables event. */
while (!signal_count);
_exit(42);
}
ASSERT_NE(pipe(pipefd), -1);
pid_exec = fork();
if (pid_exec == 0) {
ASSERT_NE(dup2(pipefd[1], STDOUT_FILENO), -1);
close(pipefd[0]);
execl("/proc/self/exe", "exec_child", NULL);
_exit((perror("exec failed"), 1));
}
close(pipefd[1]);
ASSERT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Child is running. */
/* Wait for exec'd child to start spinning. */
EXPECT_EQ(read(pipefd[0], &tmp, sizeof(int)), sizeof(int));
EXPECT_EQ(tmp, 42);
close(pipefd[0]);
/* Now we can enable the event, knowing the child is doing work. */
EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
/* If the event propagated to the exec'd child, it will exit normally... */
usleep(100000); /* ... give time for event to trigger (in case of bug). */
EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */
EXPECT_EQ(kill(pid_exec, SIGKILL), 0);
/* Verify removal from child did not affect this task's event. */
tmp = signal_count;
while (signal_count == tmp); /* Should not hang! */
/* Nor should it have affected the first child. */
EXPECT_EQ(waitpid(pid_only_fork, &tmp, 0), pid_only_fork);
EXPECT_EQ(WEXITSTATUS(tmp), 42);
}
/*
* Verify that event does _not_ propagate to fork+exec'd child; event enabled
* before fork+exec.
*/
TEST_F(remove_on_exec, enable_then_fork_exec)
{
pid_t pid_exec;
int tmp;
EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
pid_exec = fork();
if (pid_exec == 0) {
execl("/proc/self/exe", "exec_child", NULL);
_exit((perror("exec failed"), 1));
}
/*
* The child may exit abnormally at any time if the event propagated and
* a SIGTRAP is sent before the handler was set up.
*/
usleep(100000); /* ... give time for event to trigger (in case of bug). */
EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */
EXPECT_EQ(kill(pid_exec, SIGKILL), 0);
/* Verify removal from child did not affect this task's event. */
tmp = signal_count;
while (signal_count == tmp); /* Should not hang! */
}
TEST_F(remove_on_exec, exec_stress)
{
pid_t pids[30];
int i, tmp;
for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) {
pids[i] = fork();
if (pids[i] == 0) {
execl("/proc/self/exe", "exec_child", NULL);
_exit((perror("exec failed"), 1));
}
/* Some forked with event disabled, rest with enabled. */
if (i > 10)
EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
}
usleep(100000); /* ... give time for event to trigger (in case of bug). */
for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) {
/* All children should still be running. */
EXPECT_EQ(waitpid(pids[i], &tmp, WNOHANG), 0);
EXPECT_EQ(kill(pids[i], SIGKILL), 0);
}
/* Verify event is still alive. */
tmp = signal_count;
while (signal_count == tmp);
}
/* For exec'd child. */
static void exec_child(void)
{
struct sigaction action = {};
const int val = 42;
/* Set up sigtrap handler in case we erroneously receive a trap. */
action.sa_flags = SA_SIGINFO | SA_NODEFER;
action.sa_sigaction = sigtrap_handler;
sigemptyset(&action.sa_mask);
if (sigaction(SIGTRAP, &action, NULL))
_exit((perror("sigaction failed"), 1));
/* Signal parent that we're starting to spin. */
if (write(STDOUT_FILENO, &val, sizeof(int)) == -1)
_exit((perror("write failed"), 1));
/* Should hang here until killed. */
while (!signal_count);
}
#define main test_main
TEST_HARNESS_MAIN
#undef main
int main(int argc, char *argv[])
{
if (!strcmp(argv[0], "exec_child")) {
exec_child();
return 1;
}
return test_main(argc, argv);
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment