Commit d5049617 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc fixes from David Miller:
 "Some perf bug fixes from David Ahern, and the fix for that nasty
  memmove() bug"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Fix several bugs in memmove().
  sparc: Touch NMI watchdog when walking cpus and calling printk
  sparc: perf: Add support M7 processor
  sparc: perf: Make counting mode actually work
  sparc: perf: Remove redundant perf_pmu_{en|dis}able calls
parents bc465aa9 2077cef4
......@@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
unsigned long reg_val);
#endif
#define HV_FAST_M7_GET_PERFREG 0x43
#define HV_FAST_M7_SET_PERFREG 0x44
#ifndef __ASSEMBLY__
unsigned long sun4v_m7_get_perfreg(unsigned long reg_num,
unsigned long *reg_val);
unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
unsigned long reg_val);
#endif
/* Function numbers for HV_CORE_TRAP. */
#define HV_CORE_SET_VER 0x00
#define HV_CORE_PUTCHAR 0x01
......@@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
#define HV_GRP_SDIO 0x0108
#define HV_GRP_SDIO_ERR 0x0109
#define HV_GRP_REBOOT_DATA 0x0110
#define HV_GRP_M7_PERF 0x0114
#define HV_GRP_NIAG_PERF 0x0200
#define HV_GRP_FIRE_PERF 0x0201
#define HV_GRP_N2_CPU 0x0202
......
......@@ -48,6 +48,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_VT_CPU, },
{ .group = HV_GRP_T5_CPU, },
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
{ .group = HV_GRP_M7_PERF, },
};
static DEFINE_SPINLOCK(hvapi_lock);
......
......@@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg)
retl
nop
ENDPROC(sun4v_t5_set_perfreg)
ENTRY(sun4v_m7_get_perfreg)
mov %o1, %o4
mov HV_FAST_M7_GET_PERFREG, %o5
ta HV_FAST_TRAP
stx %o1, [%o4]
retl
nop
ENDPROC(sun4v_m7_get_perfreg)
ENTRY(sun4v_m7_set_perfreg)
mov HV_FAST_M7_SET_PERFREG, %o5
ta HV_FAST_TRAP
retl
nop
ENDPROC(sun4v_m7_set_perfreg)
......@@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = {
.pcr_nmi_disable = PCR_N4_PICNPT,
};
static u64 m7_pcr_read(unsigned long reg_num)
{
unsigned long val;
(void) sun4v_m7_get_perfreg(reg_num, &val);
return val;
}
static void m7_pcr_write(unsigned long reg_num, u64 val)
{
(void) sun4v_m7_set_perfreg(reg_num, val);
}
static const struct pcr_ops m7_pcr_ops = {
.read_pcr = m7_pcr_read,
.write_pcr = m7_pcr_write,
.read_pic = n4_pic_read,
.write_pic = n4_pic_write,
.nmi_picl_value = n4_picl_value,
.pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
PCR_N4_UTRACE | PCR_N4_TOE |
(26 << PCR_N4_SL_SHIFT)),
.pcr_nmi_disable = PCR_N4_PICNPT,
};
static unsigned long perf_hsvc_group;
static unsigned long perf_hsvc_major;
......@@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void)
perf_hsvc_group = HV_GRP_T5_CPU;
break;
case SUN4V_CHIP_SPARC_M7:
perf_hsvc_group = HV_GRP_M7_PERF;
break;
default:
return -ENODEV;
}
......@@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void)
pcr_ops = &n5_pcr_ops;
break;
case SUN4V_CHIP_SPARC_M7:
pcr_ops = &m7_pcr_ops;
break;
default:
ret = -ENODEV;
break;
......
......@@ -792,6 +792,42 @@ static const struct sparc_pmu niagara4_pmu = {
.num_pic_regs = 4,
};
static void sparc_m7_write_pmc(int idx, u64 val)
{
u64 pcr;
pcr = pcr_ops->read_pcr(idx);
/* ensure ov and ntc are reset */
pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
pcr_ops->write_pic(idx, val & 0xffffffff);
pcr_ops->write_pcr(idx, pcr);
}
static const struct sparc_pmu sparc_m7_pmu = {
.event_map = niagara4_event_map,
.cache_map = &niagara4_cache_map,
.max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
.read_pmc = sparc_vt_read_pmc,
.write_pmc = sparc_m7_write_pmc,
.upper_shift = 5,
.lower_shift = 5,
.event_mask = 0x7ff,
.user_bit = PCR_N4_UTRACE,
.priv_bit = PCR_N4_STRACE,
/* We explicitly don't support hypervisor tracing. */
.hv_bit = 0,
.irq_bit = PCR_N4_TOE,
.upper_nop = 0,
.lower_nop = 0,
.flags = 0,
.max_hw_events = 4,
.num_pcrs = 4,
.num_pic_regs = 4,
};
static const struct sparc_pmu *sparc_pmu __read_mostly;
static u64 event_encoding(u64 event_id, int idx)
......@@ -960,6 +996,8 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc)
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
}
static void sparc_pmu_start(struct perf_event *event, int flags);
/* On this PMU each PIC has it's own PCR control register. */
static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
{
......@@ -972,20 +1010,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
struct perf_event *cp = cpuc->event[i];
struct hw_perf_event *hwc = &cp->hw;
int idx = hwc->idx;
u64 enc;
if (cpuc->current_idx[i] != PIC_NO_INDEX)
continue;
sparc_perf_event_set_period(cp, hwc, idx);
cpuc->current_idx[i] = idx;
enc = perf_event_get_enc(cpuc->events[i]);
cpuc->pcr[idx] &= ~mask_for_index(idx);
if (hwc->state & PERF_HES_STOPPED)
cpuc->pcr[idx] |= nop_for_index(idx);
else
cpuc->pcr[idx] |= event_encoding(enc, idx);
sparc_pmu_start(cp, PERF_EF_RELOAD);
}
out:
for (i = 0; i < cpuc->n_events; i++) {
......@@ -1101,7 +1132,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
int i;
local_irq_save(flags);
perf_pmu_disable(event->pmu);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event[i]) {
......@@ -1127,7 +1157,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
}
}
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
......@@ -1361,7 +1390,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
unsigned long flags;
local_irq_save(flags);
perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
if (n0 >= sparc_pmu->max_hw_events)
......@@ -1394,7 +1422,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
ret = 0;
out:
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
}
......@@ -1667,6 +1694,10 @@ static bool __init supported_pmu(void)
sparc_pmu = &niagara4_pmu;
return true;
}
if (!strcmp(sparc_pmu_type, "sparc-m7")) {
sparc_pmu = &sparc_m7_pmu;
return true;
}
return false;
}
......
......@@ -287,6 +287,8 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n",
gp->tpc, gp->o7, gp->i7, gp->rpc);
}
touch_nmi_watchdog();
}
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
......@@ -362,6 +364,8 @@ static void pmu_snapshot_all_cpus(void)
(cpu == this_cpu ? '*' : ' '), cpu,
pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3],
pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]);
touch_nmi_watchdog();
}
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
......
......@@ -8,9 +8,11 @@
.text
ENTRY(memmove) /* o0=dst o1=src o2=len */
mov %o0, %g1
brz,pn %o2, 99f
mov %o0, %g1
cmp %o0, %o1
bleu,pt %xcc, memcpy
bleu,pt %xcc, 2f
add %o1, %o2, %g7
cmp %g7, %o0
bleu,pt %xcc, memcpy
......@@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
stb %g7, [%o0]
bne,pt %icc, 1b
sub %o0, 1, %o0
99:
retl
mov %g1, %o0
/* We can't just call memcpy for these memmove cases. On some
* chips the memcpy uses cache initializing stores and when dst
* and src are close enough, those can clobber the source data
* before we've loaded it in.
*/
2: or %o0, %o1, %g7
or %o2, %g7, %g7
andcc %g7, 0x7, %g0
bne,pn %xcc, 4f
nop
3: ldx [%o1], %g7
add %o1, 8, %o1
subcc %o2, 8, %o2
add %o0, 8, %o0
bne,pt %icc, 3b
stx %g7, [%o0 - 0x8]
ba,a,pt %xcc, 99b
4: ldub [%o1], %g7
add %o1, 1, %o1
subcc %o2, 1, %o2
add %o0, 1, %o0
bne,pt %icc, 4b
stb %g7, [%o0 - 0x1]
ba,a,pt %xcc, 99b
ENDPROC(memmove)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment