Commit e525c37f authored by Alexey Brodkin's avatar Alexey Brodkin Committed by Vineet Gupta

ARCv2: perf: SMP support

* split off pmu info into singleton and per-cpu bits
* setup PMU on all cores
Acked-by: default avatarPeter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: default avatarAlexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
parent e6b1d126
...@@ -21,10 +21,22 @@ ...@@ -21,10 +21,22 @@
struct arc_pmu { struct arc_pmu {
struct pmu pmu; struct pmu pmu;
unsigned int irq;
int n_counters; int n_counters;
unsigned long used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)];
u64 max_period; u64 max_period;
int ev_hw_idx[PERF_COUNT_ARC_HW_MAX]; int ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
};
struct arc_pmu_cpu {
/*
* A 1 bit for an index indicates that the counter is being used for
* an event. A 0 means that the counter can be used.
*/
unsigned long used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)];
/*
* The events that are active on the PMU for the given index.
*/
struct perf_event *act_counter[ARC_PERF_MAX_COUNTERS]; struct perf_event *act_counter[ARC_PERF_MAX_COUNTERS];
}; };
...@@ -67,6 +79,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) ...@@ -67,6 +79,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
} }
static struct arc_pmu *arc_pmu; static struct arc_pmu *arc_pmu;
static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu);
/* read counter #idx; note that counter# != event# on ARC! */ /* read counter #idx; note that counter# != event# on ARC! */
static uint64_t arc_pmu_read_counter(int idx) static uint64_t arc_pmu_read_counter(int idx)
...@@ -304,10 +317,12 @@ static void arc_pmu_stop(struct perf_event *event, int flags) ...@@ -304,10 +317,12 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
static void arc_pmu_del(struct perf_event *event, int flags) static void arc_pmu_del(struct perf_event *event, int flags)
{ {
struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
arc_pmu_stop(event, PERF_EF_UPDATE); arc_pmu_stop(event, PERF_EF_UPDATE);
__clear_bit(event->hw.idx, arc_pmu->used_mask); __clear_bit(event->hw.idx, pmu_cpu->used_mask);
arc_pmu->act_counter[event->hw.idx] = 0; pmu_cpu->act_counter[event->hw.idx] = 0;
perf_event_update_userpage(event); perf_event_update_userpage(event);
} }
...@@ -315,22 +330,23 @@ static void arc_pmu_del(struct perf_event *event, int flags) ...@@ -315,22 +330,23 @@ static void arc_pmu_del(struct perf_event *event, int flags)
/* allocate hardware counter and optionally start counting */ /* allocate hardware counter and optionally start counting */
static int arc_pmu_add(struct perf_event *event, int flags) static int arc_pmu_add(struct perf_event *event, int flags)
{ {
struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx; int idx = hwc->idx;
if (__test_and_set_bit(idx, arc_pmu->used_mask)) { if (__test_and_set_bit(idx, pmu_cpu->used_mask)) {
idx = find_first_zero_bit(arc_pmu->used_mask, idx = find_first_zero_bit(pmu_cpu->used_mask,
arc_pmu->n_counters); arc_pmu->n_counters);
if (idx == arc_pmu->n_counters) if (idx == arc_pmu->n_counters)
return -EAGAIN; return -EAGAIN;
__set_bit(idx, arc_pmu->used_mask); __set_bit(idx, pmu_cpu->used_mask);
hwc->idx = idx; hwc->idx = idx;
} }
write_aux_reg(ARC_REG_PCT_INDEX, idx); write_aux_reg(ARC_REG_PCT_INDEX, idx);
arc_pmu->act_counter[idx] = event; pmu_cpu->act_counter[idx] = event;
if (is_sampling_event(event)) { if (is_sampling_event(event)) {
/* Mimic full counter overflow as other arches do */ /* Mimic full counter overflow as other arches do */
...@@ -357,7 +373,7 @@ static int arc_pmu_add(struct perf_event *event, int flags) ...@@ -357,7 +373,7 @@ static int arc_pmu_add(struct perf_event *event, int flags)
static irqreturn_t arc_pmu_intr(int irq, void *dev) static irqreturn_t arc_pmu_intr(int irq, void *dev)
{ {
struct perf_sample_data data; struct perf_sample_data data;
struct arc_pmu *arc_pmu = (struct arc_pmu *)dev; struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
struct pt_regs *regs; struct pt_regs *regs;
int active_ints; int active_ints;
int idx; int idx;
...@@ -369,7 +385,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) ...@@ -369,7 +385,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
regs = get_irq_regs(); regs = get_irq_regs();
for (idx = 0; idx < arc_pmu->n_counters; idx++) { for (idx = 0; idx < arc_pmu->n_counters; idx++) {
struct perf_event *event = arc_pmu->act_counter[idx]; struct perf_event *event = pmu_cpu->act_counter[idx];
struct hw_perf_event *hwc; struct hw_perf_event *hwc;
if (!(active_ints & (1 << idx))) if (!(active_ints & (1 << idx)))
...@@ -412,6 +428,17 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) ...@@ -412,6 +428,17 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
#endif /* CONFIG_ISA_ARCV2 */ #endif /* CONFIG_ISA_ARCV2 */
void arc_cpu_pmu_irq_init(void)
{
struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr,
"ARC perf counters", pmu_cpu);
/* Clear all pending interrupt flags */
write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
}
static int arc_pmu_device_probe(struct platform_device *pdev) static int arc_pmu_device_probe(struct platform_device *pdev)
{ {
struct arc_reg_pct_build pct_bcr; struct arc_reg_pct_build pct_bcr;
...@@ -488,18 +515,30 @@ static int arc_pmu_device_probe(struct platform_device *pdev) ...@@ -488,18 +515,30 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
if (has_interrupts) { if (has_interrupts) {
int irq = platform_get_irq(pdev, 0); int irq = platform_get_irq(pdev, 0);
unsigned long flags;
if (irq < 0) { if (irq < 0) {
pr_err("Cannot get IRQ number for the platform\n"); pr_err("Cannot get IRQ number for the platform\n");
return -ENODEV; return -ENODEV;
} }
ret = devm_request_irq(&pdev->dev, irq, arc_pmu_intr, 0, arc_pmu->irq = irq;
"arc-pmu", arc_pmu);
if (ret) { /*
pr_err("could not allocate PMU IRQ\n"); * arc_cpu_pmu_irq_init() needs to be called on all cores for
return ret; * their respective local PMU.
} * However we use opencoded on_each_cpu() to ensure it is called
* on core0 first, so that arc_request_percpu_irq() sets up
* AUTOEN etc. Otherwise enable_percpu_irq() fails to enable
* perf IRQ on non master cores.
* see arc_request_percpu_irq()
*/
preempt_disable();
local_irq_save(flags);
arc_cpu_pmu_irq_init();
local_irq_restore(flags);
smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1);
preempt_enable();
/* Clean all pending interrupt flags */ /* Clean all pending interrupt flags */
write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment