Commit 42e7ddba authored by Will Deacon's avatar Will Deacon

Merge branch 'for-next/perf' into for-next/core

* for-next/perf: (41 commits)
  arm64: Add USER_STACKTRACE support
  drivers/perf: hisi: hns3: Actually use devm_add_action_or_reset()
  drivers/perf: hisi: hns3: Fix out-of-bound access when valid event group
  drivers/perf: hisi_pcie: Fix out-of-bound access when valid event group
  perf/arm-spe: Assign parents for event_source device
  perf/arm-smmuv3: Assign parents for event_source device
  perf/arm-dsu: Assign parents for event_source device
  perf/arm-dmc620: Assign parents for event_source device
  perf/arm-ccn: Assign parents for event_source device
  perf/arm-cci: Assign parents for event_source device
  perf/alibaba_uncore: Assign parents for event_source device
  perf/arm_pmu: Assign parents for event_source devices
  perf/imx_ddr: Assign parents for event_source devices
  perf/qcom: Assign parents for event_source devices
  Documentation: qcom-pmu: Use /sys/bus/event_source/devices paths
  perf/riscv: Assign parents for event_source devices
  perf/thunderx2: Assign parents for event_source devices
  Documentation: thunderx2-pmu: Use /sys/bus/event_source/devices paths
  perf/xgene: Assign parents for event_source devices
  Documentation: xgene-pmu: Use /sys/bus/event_source/devices paths
  ...
parents a5a5ce57 410e471f
......@@ -20,7 +20,6 @@ interrupt, and the PMU driver shall register perf PMU drivers like L3C,
HHA and DDRC etc. The available events and configuration options shall
be described in the sysfs, see:
/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
The "perf list" command shall list the available events from sysfs.
......
......@@ -16,7 +16,7 @@ HNS3 PMU driver
The HNS3 PMU driver registers a perf PMU with the name of its sicl id.::
/sys/devices/hns3_pmu_sicl_<sicl_id>
/sys/bus/event_source/devices/hns3_pmu_sicl_<sicl_id>
PMU driver provides description of available events, filter modes, format,
identifier and cpumask in sysfs.
......@@ -40,9 +40,9 @@ device.
Example usage of checking event code and subevent code::
$# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time
$# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time
config=0x00204
$# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num
$# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num
config=0x10204
Each performance statistic has a pair of events to get two values to
......@@ -60,7 +60,7 @@ computation to calculate real performance data is:::
Example usage of checking supported filter mode::
$# cat /sys/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num
$# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num
filter mode supported: global/port/port-tc/func/func-queue/
Example usage of perf::
......
......@@ -10,7 +10,7 @@ There is one logical L2 PMU exposed, which aggregates the results from
the physical PMUs.
The driver provides a description of its available events and configuration
options in sysfs, see /sys/devices/l2cache_0.
options in sysfs, see /sys/bus/event_source/devices/l2cache_0.
The "format" directory describes the format of the events.
......
......@@ -9,7 +9,7 @@ PMU with device name l3cache_<socket>_<instance>. User space is responsible
for aggregating across slices.
The driver provides a description of its available events and configuration
options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs
options in sysfs, see /sys/bus/event_source/devices/l3cache*. Given that these are uncore PMUs
the driver also exposes a "cpumask" sysfs attribute which contains a mask
consisting of one CPU per socket which will be used to handle all the PMU
events on that socket.
......
......@@ -22,7 +22,7 @@ The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
L3C devices. Each PMU can be used to count up to 4 (DMC/L3C) or up to 8
(CCPI2) events simultaneously. The PMUs provide a description of their
available events and configuration options under sysfs, see
/sys/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
/sys/bus/event_source/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
The driver does not support sampling, therefore "perf record" will not
work. Per-task perf sessions are also not supported.
......
......@@ -13,7 +13,7 @@ PMU (perf) driver
The xgene-pmu driver registers several perf PMU drivers. Each of the perf
driver provides description of its available events and configuration options
in sysfs, see /sys/devices/<l3cX/iobX/mcbX/mcX>/.
in sysfs, see /sys/bus/event_source/devices/<l3cX/iobX/mcbX/mcX>/.
The "format" directory describes format of the config (event ID),
config1 (agent ID) fields of the perf_event_attr structure. The "events"
......
......@@ -259,6 +259,7 @@ config ARM64
select TRACE_IRQFLAGS_SUPPORT
select TRACE_IRQFLAGS_NMI_SUPPORT
select HAVE_SOFTIRQ_ON_OWN_STACK
select USER_STACKTRACE_SUPPORT
help
ARM 64-bit (AArch64) Linux support.
......
......@@ -476,9 +476,10 @@ alternative_endif
*/
.macro reset_pmuserenr_el0, tmpreg
mrs \tmpreg, id_aa64dfr0_el1
sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
cmp \tmpreg, #1 // Skip if no PMU present
b.lt 9000f
ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI
ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne
b.eq 9000f // Skip if no PMU present or IMP_DEF
msr pmuserenr_el0, xzr // Disable PMU access from EL0
9000:
.endm
......
......@@ -59,13 +59,14 @@
.macro __init_el2_debug
mrs x1, id_aa64dfr0_el1
sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
cmp x0, #1
b.lt .Lskip_pmu_\@ // Skip if no PMU present
ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI
ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne
b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
.Lskip_pmu_\@:
csel x2, xzr, x0, lt // all PMU counters from EL1
csel x2, xzr, x0, eq // all PMU counters from EL1
/* Statistical profiling */
ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
......
......@@ -10,94 +10,12 @@
#include <asm/pointer_auth.h>
struct frame_tail {
struct frame_tail __user *fp;
unsigned long lr;
} __attribute__((packed));
/*
* Get the return address for a single stackframe and return a pointer to the
* next frame tail.
*/
static struct frame_tail __user *
user_backtrace(struct frame_tail __user *tail,
struct perf_callchain_entry_ctx *entry)
{
struct frame_tail buftail;
unsigned long err;
unsigned long lr;
/* Also check accessibility of one struct frame_tail beyond */
if (!access_ok(tail, sizeof(buftail)))
return NULL;
pagefault_disable();
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
pagefault_enable();
if (err)
return NULL;
lr = ptrauth_strip_user_insn_pac(buftail.lr);
perf_callchain_store(entry, lr);
/*
* Frame pointers should strictly progress back up the stack
* (towards higher addresses).
*/
if (tail >= buftail.fp)
return NULL;
return buftail.fp;
}
#ifdef CONFIG_COMPAT
/*
* The registers we're interested in are at the end of the variable
* length saved register structure. The fp points at the end of this
* structure so the address of this struct is:
* (struct compat_frame_tail *)(xxx->fp)-1
*
* This code has been adapted from the ARM OProfile support.
*/
struct compat_frame_tail {
compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
u32 sp;
u32 lr;
} __attribute__((packed));
static struct compat_frame_tail __user *
compat_user_backtrace(struct compat_frame_tail __user *tail,
struct perf_callchain_entry_ctx *entry)
static bool callchain_trace(void *data, unsigned long pc)
{
struct compat_frame_tail buftail;
unsigned long err;
/* Also check accessibility of one struct frame_tail beyond */
if (!access_ok(tail, sizeof(buftail)))
return NULL;
pagefault_disable();
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
pagefault_enable();
if (err)
return NULL;
perf_callchain_store(entry, buftail.lr);
/*
* Frame pointers should strictly progress back up the stack
* (towards higher addresses).
*/
if (tail + 1 >= (struct compat_frame_tail __user *)
compat_ptr(buftail.fp))
return NULL;
struct perf_callchain_entry_ctx *entry = data;
return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
return perf_callchain_store(entry, pc) == 0;
}
#endif /* CONFIG_COMPAT */
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
......@@ -107,35 +25,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
return;
}
perf_callchain_store(entry, regs->pc);
if (!compat_user_mode(regs)) {
/* AARCH64 mode */
struct frame_tail __user *tail;
tail = (struct frame_tail __user *)regs->regs[29];
while (entry->nr < entry->max_stack &&
tail && !((unsigned long)tail & 0x7))
tail = user_backtrace(tail, entry);
} else {
#ifdef CONFIG_COMPAT
/* AARCH32 compat mode */
struct compat_frame_tail __user *tail;
tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
while ((entry->nr < entry->max_stack) &&
tail && !((unsigned long)tail & 0x3))
tail = compat_user_backtrace(tail, entry);
#endif
}
}
static bool callchain_trace(void *data, unsigned long pc)
{
struct perf_callchain_entry_ctx *entry = data;
return perf_callchain_store(entry, pc) == 0;
arch_stack_walk_user(callchain_trace, entry, regs);
}
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
......
......@@ -324,3 +324,123 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
dump_backtrace(NULL, tsk, loglvl);
barrier();
}
/*
* The struct defined for userspace stack frame in AARCH64 mode.
*/
struct frame_tail {
struct frame_tail __user *fp;
unsigned long lr;
} __attribute__((packed));
/*
* Get the return address for a single stackframe and return a pointer to the
* next frame tail.
*/
static struct frame_tail __user *
unwind_user_frame(struct frame_tail __user *tail, void *cookie,
stack_trace_consume_fn consume_entry)
{
struct frame_tail buftail;
unsigned long err;
unsigned long lr;
/* Also check accessibility of one struct frame_tail beyond */
if (!access_ok(tail, sizeof(buftail)))
return NULL;
pagefault_disable();
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
pagefault_enable();
if (err)
return NULL;
lr = ptrauth_strip_user_insn_pac(buftail.lr);
if (!consume_entry(cookie, lr))
return NULL;
/*
* Frame pointers should strictly progress back up the stack
* (towards higher addresses).
*/
if (tail >= buftail.fp)
return NULL;
return buftail.fp;
}
#ifdef CONFIG_COMPAT
/*
* The registers we're interested in are at the end of the variable
* length saved register structure. The fp points at the end of this
* structure so the address of this struct is:
* (struct compat_frame_tail *)(xxx->fp)-1
*
* This code has been adapted from the ARM OProfile support.
*/
struct compat_frame_tail {
compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
u32 sp;
u32 lr;
} __attribute__((packed));
static struct compat_frame_tail __user *
unwind_compat_user_frame(struct compat_frame_tail __user *tail, void *cookie,
stack_trace_consume_fn consume_entry)
{
struct compat_frame_tail buftail;
unsigned long err;
/* Also check accessibility of one struct frame_tail beyond */
if (!access_ok(tail, sizeof(buftail)))
return NULL;
pagefault_disable();
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
pagefault_enable();
if (err)
return NULL;
if (!consume_entry(cookie, buftail.lr))
return NULL;
/*
* Frame pointers should strictly progress back up the stack
* (towards higher addresses).
*/
if (tail + 1 >= (struct compat_frame_tail __user *)
compat_ptr(buftail.fp))
return NULL;
return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
}
#endif /* CONFIG_COMPAT */
void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
const struct pt_regs *regs)
{
if (!consume_entry(cookie, regs->pc))
return;
if (!compat_user_mode(regs)) {
/* AARCH64 mode */
struct frame_tail __user *tail;
tail = (struct frame_tail __user *)regs->regs[29];
while (tail && !((unsigned long)tail & 0x7))
tail = unwind_user_frame(tail, cookie, consume_entry);
} else {
#ifdef CONFIG_COMPAT
/* AARCH32 compat mode */
struct compat_frame_tail __user *tail;
tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
while (tail && !((unsigned long)tail & 0x3))
tail = unwind_compat_user_frame(tail, cookie, consume_entry);
#endif
}
}
......@@ -709,6 +709,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev)
drw_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.event_init = ali_drw_pmu_event_init,
.add = ali_drw_pmu_add,
......@@ -746,18 +747,14 @@ static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
struct ali_drw_pmu_irq *irq;
struct ali_drw_pmu *drw_pmu;
unsigned int target;
int ret;
cpumask_t node_online_cpus;
irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node);
if (cpu != irq->cpu)
return 0;
ret = cpumask_and(&node_online_cpus,
cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask);
if (ret)
target = cpumask_any_but(&node_online_cpus, cpu);
else
target = cpumask_any_and_but(cpumask_of_node(cpu_to_node(cpu)),
cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
......
......@@ -492,6 +492,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev)
*pmu = (struct ddr_pmu) {
.pmu = {
.module = THIS_MODULE,
.parent = &pdev->dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.attr_groups = attr_groups,
......
......@@ -1409,6 +1409,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
cci_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = &pdev->dev,
.name = cci_pmu->model->name,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = cci_pmu_enable,
......
......@@ -1265,6 +1265,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
/* Perf driver registration */
ccn->dt.pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = ccn->dev,
.attr_groups = arm_ccn_pmu_attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = arm_ccn_pmu_event_init,
......
......@@ -1950,20 +1950,20 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_no
struct arm_cmn *cmn;
unsigned int target;
int node;
cpumask_t mask;
cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
if (cpu != cmn->cpu)
return 0;
node = dev_to_node(cmn->dev);
if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
target = cpumask_any(&mask);
else
target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target < nr_cpu_ids)
arm_cmn_migrate(cmn, target);
return 0;
}
......@@ -2482,6 +2482,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev));
cmn->pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = cmn->dev,
.attr_groups = arm_cmn_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
......
......@@ -1206,6 +1206,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
cspmu->pmu = (struct pmu){
.task_ctx_nr = perf_invalid_context,
.module = cspmu->impl.module,
.parent = cspmu->dev,
.pmu_enable = arm_cspmu_enable,
.pmu_disable = arm_cspmu_disable,
.event_init = arm_cspmu_event_init,
......@@ -1322,8 +1323,7 @@ static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
int dst;
struct cpumask online_supported;
unsigned int dst;
struct arm_cspmu *cspmu =
hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
......@@ -1333,9 +1333,8 @@ static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
return 0;
/* Choose a new CPU to migrate ownership of the PMU to */
cpumask_and(&online_supported, &cspmu->associated_cpus,
cpu_online_mask);
dst = cpumask_any_but(&online_supported, cpu);
dst = cpumask_any_and_but(&cspmu->associated_cpus,
cpu_online_mask, cpu);
if (dst >= nr_cpu_ids)
return 0;
......
......@@ -673,6 +673,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev)
dmc620_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = &pdev->dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.event_init = dmc620_pmu_event_init,
......
......@@ -230,15 +230,6 @@ static const struct attribute_group *dsu_pmu_attr_groups[] = {
NULL,
};
static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu)
{
struct cpumask online_supported;
cpumask_and(&online_supported,
&dsu_pmu->associated_cpus, cpu_online_mask);
return cpumask_any_but(&online_supported, cpu);
}
static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx)
{
return (idx < dsu_pmu->num_counters) ||
......@@ -751,6 +742,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
dsu_pmu->pmu = (struct pmu) {
.task_ctx_nr = perf_invalid_context,
.parent = &pdev->dev,
.module = THIS_MODULE,
.pmu_enable = dsu_pmu_enable,
.pmu_disable = dsu_pmu_disable,
......@@ -827,14 +819,16 @@ static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
int dst;
struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
cpuhp_node);
struct dsu_pmu *dsu_pmu;
unsigned int dst;
dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, cpuhp_node);
if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu))
return 0;
dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu);
dst = cpumask_any_and_but(&dsu_pmu->associated_cpus,
cpu_online_mask, cpu);
/* If there are no active CPUs in the DSU, leave IRQ disabled */
if (dst >= nr_cpu_ids)
return 0;
......
......@@ -196,6 +196,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
if (!pmu)
return -ENOMEM;
pmu->pmu.parent = &pdev->dev;
pmu->plat_device = pdev;
ret = pmu_parse_irqs(pmu);
......
......@@ -860,6 +860,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
smmu_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = smmu_pmu_enable,
.pmu_disable = smmu_pmu_disable,
......
......@@ -932,6 +932,7 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu)
spe_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = &spe_pmu->pdev->dev,
.capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
.attr_groups = arm_spe_pmu_attr_groups,
/*
......
......@@ -690,9 +690,8 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n
{
struct dwc_pcie_pmu *pcie_pmu;
struct pci_dev *pdev;
int node;
cpumask_t mask;
unsigned int target;
int node;
pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
/* Nothing to do if this CPU doesn't own the PMU */
......@@ -702,10 +701,9 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n
pcie_pmu->on_cpu = -1;
pdev = pcie_pmu->pdev;
node = dev_to_node(&pdev->dev);
if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
target = cpumask_any(&mask);
else
target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
......
......@@ -651,6 +651,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
*pmu = (struct ddr_pmu) {
.pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.attr_groups = attr_groups,
......
......@@ -350,15 +350,27 @@ static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event)
return false;
for (num = 0; num < counters; num++) {
/*
* If we find a related event, then it's a valid group
* since we don't need to allocate a new counter for it.
*/
if (hisi_pcie_pmu_cmp_event(event_group[num], sibling))
break;
}
/*
* Otherwise it's a new event but if there's no available counter,
* fail the check since we cannot schedule all the events in
* the group simultaneously.
*/
if (num == HISI_PCIE_MAX_COUNTERS)
return false;
if (num == counters)
event_group[counters++] = sibling;
}
return counters <= HISI_PCIE_MAX_COUNTERS;
return true;
}
static int hisi_pcie_pmu_event_init(struct perf_event *event)
......@@ -673,7 +685,6 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
unsigned int target;
cpumask_t mask;
int numa_node;
/* Nothing to do if this CPU doesn't own the PMU */
......@@ -684,10 +695,10 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
/* Choose a local CPU from all online cpus. */
numa_node = dev_to_node(&pcie_pmu->pdev->dev);
if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) &&
cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
target = cpumask_any(&mask);
else
target = cpumask_any_and_but(cpumask_of_node(numa_node),
cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
......@@ -807,6 +818,7 @@ static int hisi_pcie_alloc_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_
pcie_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
.parent = &pdev->dev,
.event_init = hisi_pcie_pmu_event_init,
.pmu_enable = hisi_pcie_pmu_enable,
.pmu_disable = hisi_pcie_pmu_disable,
......
......@@ -504,7 +504,6 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
node);
cpumask_t pmu_online_cpus;
unsigned int target;
if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus))
......@@ -518,9 +517,8 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
hisi_pmu->on_cpu = -1;
/* Choose a new CPU to migrate ownership of the PMU to */
cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus,
cpu_online_mask);
target = cpumask_any_but(&pmu_online_cpus, cpu);
target = cpumask_any_and_but(&hisi_pmu->associated_cpus,
cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
return 0;
......@@ -538,6 +536,7 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module)
struct pmu *pmu = &hisi_pmu->pmu;
pmu->module = module;
pmu->parent = hisi_pmu->dev;
pmu->task_ctx_nr = perf_invalid_context;
pmu->event_init = hisi_uncore_pmu_event_init;
pmu->pmu_enable = hisi_uncore_pmu_enable;
......
......@@ -1085,15 +1085,27 @@ static bool hns3_pmu_validate_event_group(struct perf_event *event)
return false;
for (num = 0; num < counters; num++) {
/*
* If we find a related event, then it's a valid group
* since we don't need to allocate a new counter for it.
*/
if (hns3_pmu_cmp_event(event_group[num], sibling))
break;
}
/*
* Otherwise it's a new event but if there's no available counter,
* fail the check since we cannot schedule all the events in
* the group simultaneously.
*/
if (num == HNS3_PMU_MAX_HW_EVENTS)
return false;
if (num == counters)
event_group[counters++] = sibling;
}
return counters <= HNS3_PMU_MAX_HW_EVENTS;
return true;
}
static u32 hns3_pmu_get_filter_condition(struct perf_event *event)
......@@ -1419,6 +1431,7 @@ static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
hns3_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
.parent = &pdev->dev,
.event_init = hns3_pmu_event_init,
.pmu_enable = hns3_pmu_enable,
.pmu_disable = hns3_pmu_disable,
......@@ -1515,7 +1528,7 @@ static int hns3_pmu_irq_register(struct pci_dev *pdev,
return ret;
}
ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev);
ret = devm_add_action_or_reset(&pdev->dev, hns3_pmu_free_irq, pdev);
if (ret) {
pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret);
return ret;
......
......@@ -801,9 +801,8 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct cluster_pmu *cluster;
struct l2cache_pmu *l2cache_pmu;
cpumask_t cluster_online_cpus;
struct cluster_pmu *cluster;
unsigned int target;
l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
......@@ -820,9 +819,8 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
cluster->on_cpu = -1;
/* Any other CPU for this cluster which is still online */
cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
cpu_online_mask);
target = cpumask_any_but(&cluster_online_cpus, cpu);
target = cpumask_any_and_but(&cluster->cluster_cpus,
cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
disable_irq(cluster->irq);
return 0;
......@@ -904,6 +902,7 @@ static int l2_cache_pmu_probe(struct platform_device *pdev)
l2cache_pmu->pmu = (struct pmu) {
/* suffix is instance id for future use with multiple sockets */
.name = "l2cache_0",
.parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = l2_cache_pmu_enable,
.pmu_disable = l2_cache_pmu_disable,
......
......@@ -748,6 +748,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
return -ENOMEM;
l3pmu->pmu = (struct pmu) {
.parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = qcom_l3_cache__pmu_enable,
......
......@@ -136,6 +136,7 @@ static int pmu_legacy_device_probe(struct platform_device *pdev)
pmu = riscv_pmu_alloc();
if (!pmu)
return -ENOMEM;
pmu->pmu.parent = &pdev->dev;
pmu_legacy_init(pmu);
return 0;
......
......@@ -1043,7 +1043,6 @@ static struct ctl_table sbi_pmu_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
{ }
};
static int pmu_sbi_device_probe(struct platform_device *pdev)
......@@ -1081,6 +1080,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
}
pmu->pmu.attr_groups = riscv_pmu_attr_groups;
pmu->pmu.parent = &pdev->dev;
pmu->cmask = cmask;
pmu->ctr_start = pmu_sbi_ctr_start;
pmu->ctr_stop = pmu_sbi_ctr_stop;
......
......@@ -504,24 +504,19 @@ static void tx2_uncore_event_update(struct perf_event *event)
static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev)
{
int i = 0;
struct acpi_tx2_pmu_device {
__u8 id[ACPI_ID_LEN];
enum tx2_uncore_type type;
} devices[] = {
struct acpi_device_id devices[] = {
{"CAV901D", PMU_TYPE_L3C},
{"CAV901F", PMU_TYPE_DMC},
{"CAV901E", PMU_TYPE_CCPI2},
{"", PMU_TYPE_INVALID}
{}
};
const struct acpi_device_id *id;
while (devices[i].type != PMU_TYPE_INVALID) {
if (!strcmp(acpi_device_hid(adev), devices[i].id))
break;
i++;
}
id = acpi_match_acpi_device(devices, adev);
if (!id)
return PMU_TYPE_INVALID;
return devices[i].type;
return (enum tx2_uncore_type)id->driver_data;
}
static bool tx2_uncore_validate_event(struct pmu *pmu,
......@@ -729,6 +724,7 @@ static int tx2_uncore_pmu_register(
/* Perf event registration */
tx2_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
.parent = tx2_pmu->dev,
.attr_groups = tx2_pmu->attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = tx2_uncore_event_init,
......@@ -932,9 +928,8 @@ static int tx2_uncore_pmu_online_cpu(unsigned int cpu,
static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
struct hlist_node *hpnode)
{
int new_cpu;
struct tx2_uncore_pmu *tx2_pmu;
struct cpumask cpu_online_mask_temp;
unsigned int new_cpu;
tx2_pmu = hlist_entry_safe(hpnode,
struct tx2_uncore_pmu, hpnode);
......@@ -945,11 +940,8 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
if (tx2_pmu->hrtimer_callback)
hrtimer_cancel(&tx2_pmu->hrtimer);
cpumask_copy(&cpu_online_mask_temp, cpu_online_mask);
cpumask_clear_cpu(cpu, &cpu_online_mask_temp);
new_cpu = cpumask_any_and(
cpumask_of_node(tx2_pmu->node),
&cpu_online_mask_temp);
new_cpu = cpumask_any_and_but(cpumask_of_node(tx2_pmu->node),
cpu_online_mask, cpu);
tx2_pmu->cpu = new_cpu;
if (new_cpu >= nr_cpu_ids)
......
......@@ -1102,6 +1102,7 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
/* Perf driver registration */
pmu_dev->pmu = (struct pmu) {
.parent = pmu_dev->parent->dev,
.attr_groups = pmu_dev->attr_groups,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = xgene_perf_pmu_enable,
......
......@@ -388,6 +388,29 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
return i;
}
/**
* cpumask_any_and_but - pick a "random" cpu from *mask1 & *mask2, but not this one.
* @mask1: the first input cpumask
* @mask2: the second input cpumask
* @cpu: the cpu to ignore
*
* Returns >= nr_cpu_ids if no cpus set.
*/
static inline
unsigned int cpumask_any_and_but(const struct cpumask *mask1,
const struct cpumask *mask2,
unsigned int cpu)
{
unsigned int i;
cpumask_check(cpu);
i = cpumask_first_and(mask1, mask2);
if (i != cpu)
return i;
return cpumask_next_and(cpu, mask1, mask2);
}
/**
* cpumask_nth - get the Nth cpu in a cpumask
* @srcp: the cpumask pointer
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment