Commit 0ffa798d authored by Ingo Molnar's avatar Ingo Molnar

Merge branches 'perf/powerpc' and 'perf/bench' into perf/core

Merge reason: Both 'perf bench' and the pending PowerPC changes
              are now ready for the next merge window.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
......@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE
config HCALL_STATS
bool "Hypervisor call instrumentation"
depends on PPC_PSERIES && DEBUG_FS
depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
help
Adds code to keep track of the number of hypervisor calls made and
the amount of time spent in hypervisor calls. Wall time spent in
......
......@@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_PAGEALLOC is not set
CONFIG_HCALL_STATS=y
# CONFIG_HCALL_STATS is not set
# CONFIG_CODE_PATCHING_SELFTEST is not set
# CONFIG_FTR_FIXUP_SELFTEST is not set
# CONFIG_MSI_BITMAP_SELFTEST is not set
......
......@@ -19,6 +19,7 @@
#define _ASM_POWERPC_EMULATED_OPS_H
#include <asm/atomic.h>
#include <linux/perf_event.h>
#ifdef CONFIG_PPC_EMULATED_STATS
......@@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated;
extern void ppc_warn_emulated_print(const char *type);
#define PPC_WARN_EMULATED(type) \
#define __PPC_WARN_EMULATED(type) \
do { \
atomic_inc(&ppc_emulated.type.val); \
if (ppc_warn_emulated) \
......@@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type);
#else /* !CONFIG_PPC_EMULATED_STATS */
#define PPC_WARN_EMULATED(type) do { } while (0)
#define __PPC_WARN_EMULATED(type) do { } while (0)
#endif /* !CONFIG_PPC_EMULATED_STATS */
#define PPC_WARN_EMULATED(type, regs) \
do { \
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
1, 0, regs, 0); \
__PPC_WARN_EMULATED(type); \
} while (0)
#define PPC_WARN_ALIGNMENT(type, regs) \
do { \
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
1, 0, regs, regs->dar); \
__PPC_WARN_EMULATED(type); \
} while (0)
#endif /* _ASM_POWERPC_EMULATED_OPS_H */
......@@ -274,6 +274,8 @@ struct hcall_stats {
unsigned long num_calls; /* number of calls (on this CPU) */
unsigned long tb_total; /* total wall time (mftb) of calls. */
unsigned long purr_total; /* total cpu time (PURR) of calls. */
unsigned long tb_start;
unsigned long purr_start;
};
#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
......
......@@ -489,6 +489,8 @@
#define SPRN_MMCR1 798
#define SPRN_MMCRA 0x312
#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL
#define MMCRA_SDAR_ERAT_MISS 0x20000000UL
#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
......
#undef TRACE_SYSTEM
#define TRACE_SYSTEM powerpc
#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_POWERPC_H
#include <linux/tracepoint.h>
struct pt_regs;
TRACE_EVENT(irq_entry,
TP_PROTO(struct pt_regs *regs),
TP_ARGS(regs),
TP_STRUCT__entry(
__field(struct pt_regs *, regs)
),
TP_fast_assign(
__entry->regs = regs;
),
TP_printk("pt_regs=%p", __entry->regs)
);
TRACE_EVENT(irq_exit,
TP_PROTO(struct pt_regs *regs),
TP_ARGS(regs),
TP_STRUCT__entry(
__field(struct pt_regs *, regs)
),
TP_fast_assign(
__entry->regs = regs;
),
TP_printk("pt_regs=%p", __entry->regs)
);
TRACE_EVENT(timer_interrupt_entry,
TP_PROTO(struct pt_regs *regs),
TP_ARGS(regs),
TP_STRUCT__entry(
__field(struct pt_regs *, regs)
),
TP_fast_assign(
__entry->regs = regs;
),
TP_printk("pt_regs=%p", __entry->regs)
);
TRACE_EVENT(timer_interrupt_exit,
TP_PROTO(struct pt_regs *regs),
TP_ARGS(regs),
TP_STRUCT__entry(
__field(struct pt_regs *, regs)
),
TP_fast_assign(
__entry->regs = regs;
),
TP_printk("pt_regs=%p", __entry->regs)
);
#ifdef CONFIG_PPC_PSERIES
extern void hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void);
TRACE_EVENT_FN(hcall_entry,
TP_PROTO(unsigned long opcode, unsigned long *args),
TP_ARGS(opcode, args),
TP_STRUCT__entry(
__field(unsigned long, opcode)
),
TP_fast_assign(
__entry->opcode = opcode;
),
TP_printk("opcode=%lu", __entry->opcode),
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
TRACE_EVENT_FN(hcall_exit,
TP_PROTO(unsigned long opcode, unsigned long retval,
unsigned long *retbuf),
TP_ARGS(opcode, retval, retbuf),
TP_STRUCT__entry(
__field(unsigned long, opcode)
__field(unsigned long, retval)
),
TP_fast_assign(
__entry->opcode = opcode;
__entry->retval = retval;
),
TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
#endif
#endif /* _TRACE_POWERPC_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH asm
#define TRACE_INCLUDE_FILE trace
#include <trace/define_trace.h>
......@@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs)
#ifdef CONFIG_SPE
if ((instr >> 26) == 0x4) {
PPC_WARN_EMULATED(spe);
PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
#endif
......@@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs)
flags |= SPLT;
nb = 8;
}
PPC_WARN_EMULATED(vsx);
PPC_WARN_ALIGNMENT(vsx, regs);
return emulate_vsx(addr, reg, areg, regs, flags, nb);
}
#endif
......@@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs)
* the exception of DCBZ which is handled as a special case here
*/
if (instr == DCBZ) {
PPC_WARN_EMULATED(dcbz);
PPC_WARN_ALIGNMENT(dcbz, regs);
return emulate_dcbz(regs, addr);
}
if (unlikely(nb == 0))
......@@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs)
* function
*/
if (flags & M) {
PPC_WARN_EMULATED(multiple);
PPC_WARN_ALIGNMENT(multiple, regs);
return emulate_multiple(regs, addr, reg, nb,
flags, instr, swiz);
}
......@@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs)
/* Special case for 16-byte FP loads and stores */
if (nb == 16) {
PPC_WARN_EMULATED(fp_pair);
PPC_WARN_ALIGNMENT(fp_pair, regs);
return emulate_fp_pair(addr, reg, flags);
}
PPC_WARN_EMULATED(unaligned);
PPC_WARN_ALIGNMENT(unaligned, regs);
/* If we are loading, get the data from user space, else
* get it from register values
......
......@@ -551,7 +551,7 @@ restore:
BEGIN_FW_FTR_SECTION
ld r5,SOFTE(r1)
FW_FTR_SECTION_ELSE
b iseries_check_pending_irqs
b .Liseries_check_pending_irqs
ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2:
TRACE_AND_RESTORE_IRQ(r5);
......@@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
#endif /* CONFIG_PPC_BOOK3E */
iseries_check_pending_irqs:
.Liseries_check_pending_irqs:
#ifdef CONFIG_PPC_ISERIES
ld r5,SOFTE(r1)
cmpdi 0,r5,0
......
......@@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
* prolog code of the PerformanceMonitor one. A little
* trickery is thus necessary
*/
performance_monitor_pSeries_1:
. = 0xf00
b performance_monitor_pSeries
altivec_unavailable_pSeries_1:
. = 0xf20
b altivec_unavailable_pSeries
vsx_unavailable_pSeries_1:
. = 0xf40
b vsx_unavailable_pSeries
......
......@@ -70,6 +70,8 @@
#include <asm/firmware.h>
#include <asm/lv1call.h>
#endif
#define CREATE_TRACE_POINTS
#include <asm/trace.h>
int __irq_offset_value;
static int ppc_spurious_interrupts;
......@@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
unsigned int irq;
trace_irq_entry(regs);
irq_enter();
check_stack_overflow();
......@@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs)
timer_interrupt(regs);
}
#endif
trace_irq_exit(regs);
}
void __init init_IRQ(void)
......
......@@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
*/
if (record) {
struct perf_sample_data data = {
.addr = 0,
.addr = ~0ULL,
.period = event->hw.last_period,
};
......
......@@ -72,10 +72,6 @@
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
......
......@@ -72,10 +72,6 @@
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
......@@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[])
{
unsigned long mmcr1 = 0;
unsigned long mmcra = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp;
int i, isbus, bit, grsel;
......
......@@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[])
{
unsigned long mmcr1 = 0;
unsigned long mmcra = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
int i;
unsigned int pmc, ev, b, u, s, psel;
unsigned int ttmset = 0;
......
......@@ -50,10 +50,6 @@
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0xff
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
......@@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[])
{
unsigned long mmcr1 = 0;
unsigned long mmcra = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
unsigned int pmc, unit, combine, l2sel, psel;
unsigned int pmc_inuse = 0;
int i;
......
......@@ -83,10 +83,6 @@ static short mmcr1_adder_bits[8] = {
MMCR1_PMC8_ADDER_SEL_SH
};
/*
* Bits in MMCRA
*/
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
......
......@@ -660,6 +660,7 @@ late_initcall(check_cache_coherency);
#ifdef CONFIG_DEBUG_FS
struct dentry *powerpc_debugfs_root;
EXPORT_SYMBOL(powerpc_debugfs_root);
static int powerpc_debugfs_init(void)
{
......
......@@ -54,6 +54,7 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/perf_event.h>
#include <asm/trace.h>
#include <asm/io.h>
#include <asm/processor.h>
......@@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs)
struct clock_event_device *evt = &decrementer->event;
u64 now;
trace_timer_interrupt_entry(regs);
/* Ensure a positive value is written to the decrementer, or else
* some CPUs will continuue to take decrementer exceptions */
set_dec(DECREMENTER_MAX);
......@@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs)
now = decrementer->next_tb - now;
if (now <= DECREMENTER_MAX)
set_dec((int)now);
trace_timer_interrupt_exit(regs);
return;
}
old_regs = set_irq_regs(regs);
......@@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs)
irq_exit();
set_irq_regs(old_regs);
trace_timer_interrupt_exit(regs);
}
void wakeup_decrementer(void)
......
......@@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs)
/* Emulate the mfspr rD, PVR. */
if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
PPC_WARN_EMULATED(mfpvr);
PPC_WARN_EMULATED(mfpvr, regs);
rd = (instword >> 21) & 0x1f;
regs->gpr[rd] = mfspr(SPRN_PVR);
return 0;
......@@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs)
/* Emulating the dcba insn is just a no-op. */
if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
PPC_WARN_EMULATED(dcba);
PPC_WARN_EMULATED(dcba, regs);
return 0;
}
......@@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs)
int shift = (instword >> 21) & 0x1c;
unsigned long msk = 0xf0000000UL >> shift;
PPC_WARN_EMULATED(mcrxr);
PPC_WARN_EMULATED(mcrxr, regs);
regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
regs->xer &= ~0xf0000000UL;
return 0;
......@@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs)
/* Emulate load/store string insn. */
if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
PPC_WARN_EMULATED(string);
PPC_WARN_EMULATED(string, regs);
return emulate_string_inst(regs, instword);
}
/* Emulate the popcntb (Population Count Bytes) instruction. */
if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
PPC_WARN_EMULATED(popcntb);
PPC_WARN_EMULATED(popcntb, regs);
return emulate_popcntb_inst(regs, instword);
}
/* Emulate isel (Integer Select) instruction */
if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
PPC_WARN_EMULATED(isel);
PPC_WARN_EMULATED(isel, regs);
return emulate_isel(regs, instword);
}
......@@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs)
#ifdef CONFIG_MATH_EMULATION
errcode = do_mathemu(regs);
if (errcode >= 0)
PPC_WARN_EMULATED(math);
PPC_WARN_EMULATED(math, regs);
switch (errcode) {
case 0:
......@@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs)
#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
errcode = Soft_emulate_8xx(regs);
if (errcode >= 0)
PPC_WARN_EMULATED(8xx);
PPC_WARN_EMULATED(8xx, regs);
switch (errcode) {
case 0:
......@@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs)
flush_altivec_to_thread(current);
PPC_WARN_EMULATED(altivec);
PPC_WARN_EMULATED(altivec, regs);
err = emulate_altivec(regs);
if (err == 0) {
regs->nip += 4; /* skip emulated instruction */
......
......@@ -26,11 +26,11 @@ BEGIN_FTR_SECTION
srd r8,r5,r11
mtctr r8
setup:
.Lsetup:
dcbt r9,r4
dcbz r9,r3
add r9,r9,r12
bdnz setup
bdnz .Lsetup
END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
addi r3,r3,-8
srdi r8,r5,7 /* page is copied in 128 byte strides */
......
......@@ -14,68 +14,94 @@
#define STK_PARM(i) (48 + ((i)-3)*8)
#ifdef CONFIG_HCALL_STATS
#ifdef CONFIG_TRACEPOINTS
.section ".toc","aw"
.globl hcall_tracepoint_refcount
hcall_tracepoint_refcount:
.llong 0
.section ".text"
/*
* precall must preserve all registers. use unused STK_PARM()
* areas to save snapshots and opcode.
* areas to save snapshots and opcode. We branch around this
* in early init (eg when populating the MMU hashtable) by using an
* unconditional cpu feature.
*/
#define HCALL_INST_PRECALL \
std r3,STK_PARM(r3)(r1); /* save opcode */ \
mftb r0; /* get timebase and */ \
std r0,STK_PARM(r5)(r1); /* save for later */ \
#define HCALL_INST_PRECALL(FIRST_REG) \
BEGIN_FTR_SECTION; \
mfspr r0,SPRN_PURR; /* get PURR and */ \
std r0,STK_PARM(r6)(r1); /* save for later */ \
END_FTR_SECTION_IFSET(CPU_FTR_PURR);
b 1f; \
END_FTR_SECTION(0, 1); \
ld r12,hcall_tracepoint_refcount@toc(r2); \
cmpdi r12,0; \
beq+ 1f; \
mflr r0; \
std r3,STK_PARM(r3)(r1); \
std r4,STK_PARM(r4)(r1); \
std r5,STK_PARM(r5)(r1); \
std r6,STK_PARM(r6)(r1); \
std r7,STK_PARM(r7)(r1); \
std r8,STK_PARM(r8)(r1); \
std r9,STK_PARM(r9)(r1); \
std r10,STK_PARM(r10)(r1); \
std r0,16(r1); \
addi r4,r1,STK_PARM(FIRST_REG); \
stdu r1,-STACK_FRAME_OVERHEAD(r1); \
bl .__trace_hcall_entry; \
addi r1,r1,STACK_FRAME_OVERHEAD; \
ld r0,16(r1); \
ld r3,STK_PARM(r3)(r1); \
ld r4,STK_PARM(r4)(r1); \
ld r5,STK_PARM(r5)(r1); \
ld r6,STK_PARM(r6)(r1); \
ld r7,STK_PARM(r7)(r1); \
ld r8,STK_PARM(r8)(r1); \
ld r9,STK_PARM(r9)(r1); \
ld r10,STK_PARM(r10)(r1); \
mtlr r0; \
1:
/*
* postcall is performed immediately before function return which
* allows liberal use of volatile registers. We branch around this
* in early init (eg when populating the MMU hashtable) by using an
* unconditional cpu feature.
*/
#define HCALL_INST_POSTCALL \
#define __HCALL_INST_POSTCALL \
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
ld r4,STK_PARM(r3)(r1); /* validate opcode */ \
cmpldi cr7,r4,MAX_HCALL_OPCODE; \
bgt- cr7,1f; \
\
/* get time and PURR snapshots after hcall */ \
mftb r7; /* timebase after */ \
BEGIN_FTR_SECTION; \
mfspr r8,SPRN_PURR; /* PURR after */ \
ld r6,STK_PARM(r6)(r1); /* PURR before */ \
subf r6,r6,r8; /* delta */ \
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
ld r5,STK_PARM(r5)(r1); /* timebase before */ \
subf r5,r5,r7; /* time delta */ \
\
/* calculate address of stat structure r4 = opcode */ \
srdi r4,r4,2; /* index into array */ \
mulli r4,r4,HCALL_STAT_SIZE; \
LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
add r4,r4,r7; \
ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
add r4,r4,r7; \
\
/* update stats */ \
ld r7,HCALL_STAT_CALLS(r4); /* count */ \
addi r7,r7,1; \
std r7,HCALL_STAT_CALLS(r4); \
ld r7,HCALL_STAT_TB(r4); /* timebase */ \
add r7,r7,r5; \
std r7,HCALL_STAT_TB(r4); \
BEGIN_FTR_SECTION; \
ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
add r7,r7,r6; \
std r7,HCALL_STAT_PURR(r4); \
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
ld r12,hcall_tracepoint_refcount@toc(r2); \
cmpdi r12,0; \
beq+ 1f; \
mflr r0; \
ld r6,STK_PARM(r3)(r1); \
std r3,STK_PARM(r3)(r1); \
mr r4,r3; \
mr r3,r6; \
std r0,16(r1); \
stdu r1,-STACK_FRAME_OVERHEAD(r1); \
bl .__trace_hcall_exit; \
addi r1,r1,STACK_FRAME_OVERHEAD; \
ld r0,16(r1); \
ld r3,STK_PARM(r3)(r1); \
mtlr r0; \
1:
#define HCALL_INST_POSTCALL_NORETS \
li r5,0; \
__HCALL_INST_POSTCALL
#define HCALL_INST_POSTCALL(BUFREG) \
mr r5,BUFREG; \
__HCALL_INST_POSTCALL
#else
#define HCALL_INST_PRECALL
#define HCALL_INST_POSTCALL
#define HCALL_INST_PRECALL(FIRST_ARG)
#define HCALL_INST_POSTCALL_NORETS
#define HCALL_INST_POSTCALL(BUFREG)
#endif
.text
......@@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets)
mfcr r0
stw r0,8(r1)
HCALL_INST_PRECALL
HCALL_INST_PRECALL(r4)
HVSC /* invoke the hypervisor */
HCALL_INST_POSTCALL
HCALL_INST_POSTCALL_NORETS
lwz r0,8(r1)
mtcrf 0xff,r0
......@@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall)
mfcr r0
stw r0,8(r1)
HCALL_INST_PRECALL
HCALL_INST_PRECALL(r5)
std r4,STK_PARM(r4)(r1) /* Save ret buffer */
......@@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall)
std r6, 16(r12)
std r7, 24(r12)
HCALL_INST_POSTCALL
HCALL_INST_POSTCALL(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
......@@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9)
mfcr r0
stw r0,8(r1)
HCALL_INST_PRECALL
HCALL_INST_PRECALL(r5)
std r4,STK_PARM(r4)(r1) /* Save ret buffer */
......@@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9)
std r11,56(r12)
std r0, 64(r12)
HCALL_INST_POSTCALL
HCALL_INST_POSTCALL(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
......
......@@ -26,6 +26,7 @@
#include <asm/hvcall.h>
#include <asm/firmware.h>
#include <asm/cputable.h>
#include <asm/trace.h>
DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
......@@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = {
#define HCALL_ROOT_DIR "hcall_inst"
#define CPU_NAME_BUF_SIZE 32
static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
{
struct hcall_stats *h;
if (opcode > MAX_HCALL_OPCODE)
return;
h = &get_cpu_var(hcall_stats)[opcode / 4];
h->tb_start = mftb();
h->purr_start = mfspr(SPRN_PURR);
}
static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
unsigned long *retbuf)
{
struct hcall_stats *h;
if (opcode > MAX_HCALL_OPCODE)
return;
h = &__get_cpu_var(hcall_stats)[opcode / 4];
h->num_calls++;
h->tb_total = mftb() - h->tb_start;
h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
put_cpu_var(hcall_stats);
}
static int __init hcall_inst_init(void)
{
struct dentry *hcall_root;
......@@ -110,6 +140,14 @@ static int __init hcall_inst_init(void)
if (!firmware_has_feature(FW_FEATURE_LPAR))
return 0;
if (register_trace_hcall_entry(probe_hcall_entry))
return -EINVAL;
if (register_trace_hcall_exit(probe_hcall_exit)) {
unregister_trace_hcall_entry(probe_hcall_entry);
return -EINVAL;
}
hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
if (!hcall_root)
return -ENOMEM;
......
......@@ -39,6 +39,7 @@
#include <asm/cputable.h>
#include <asm/udbg.h>
#include <asm/smp.h>
#include <asm/trace.h>
#include "plpar_wrappers.h"
#include "pseries.h"
......@@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order)
EXPORT_SYMBOL(arch_free_page);
#endif
#ifdef CONFIG_TRACEPOINTS
/*
* We optimise our hcall path by placing hcall_tracepoint_refcount
* directly in the TOC so we can check if the hcall tracepoints are
* enabled via a single load.
*/
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
extern long hcall_tracepoint_refcount;
void hcall_tracepoint_regfunc(void)
{
hcall_tracepoint_refcount++;
}
void hcall_tracepoint_unregfunc(void)
{
hcall_tracepoint_refcount--;
}
void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
{
trace_hcall_entry(opcode, args);
}
void __trace_hcall_exit(long opcode, unsigned long retval,
unsigned long *retbuf)
{
trace_hcall_exit(opcode, retval, retbuf);
}
#endif
......@@ -106,6 +106,8 @@ enum perf_sw_ids {
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_MAX, /* non-ABI */
};
......
......@@ -102,6 +102,8 @@ enum perf_sw_ids {
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_MAX, /* non-ABI */
};
......
......@@ -4274,6 +4274,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
case PERF_COUNT_SW_CONTEXT_SWITCHES:
case PERF_COUNT_SW_CPU_MIGRATIONS:
case PERF_COUNT_SW_ALIGNMENT_FAULTS:
case PERF_COUNT_SW_EMULATION_FAULTS:
if (!event->parent) {
atomic_inc(&perf_swevent_enabled[event_id]);
event->destroy = sw_perf_event_destroy;
......
perf-bench(1)
============
NAME
----
perf-bench - General framework for benchmark suites
SYNOPSIS
--------
[verse]
'perf bench' [<common options>] <subsystem> <suite> [<options>]
DESCRIPTION
-----------
This 'perf bench' command is general framework for benchmark suites.
COMMON OPTIONS
--------------
-f::
--format=::
Specify format style.
Current available format styles are,
'default'::
Default style. This is mainly for human reading.
---------------------
% perf bench sched pipe # with no style specify
(executing 1000000 pipe operations between two tasks)
Total time:5.855 sec
5.855061 usecs/op
170792 ops/sec
---------------------
'simple'::
This simple style is friendly for automated
processing by scripts.
---------------------
% perf bench --format=simple sched pipe # specified simple
5.988
---------------------
SUBSYSTEM
---------
'sched'::
Scheduler and IPC mechanisms.
SUITES FOR 'sched'
~~~~~~~~~~~~~~~~~~
*messaging*::
Suite for evaluating performance of scheduler and IPC mechanisms.
Based on hackbench by Rusty Russell.
Options of *pipe*
^^^^^^^^^^^^^^^^^
-p::
--pipe::
Use pipe() instead of socketpair()
-t::
--thread::
Be multi thread instead of multi process
-g::
--group=::
Specify number of groups
-l::
--loop=::
Specify number of loops
Example of *messaging*
^^^^^^^^^^^^^^^^^^^^^^
---------------------
% perf bench sched messaging # run with default
options (20 sender and receiver processes per group)
(10 groups == 400 processes run)
Total time:0.308 sec
% perf bench sched messaging -t -g 20 # be multi-thread,with 20 groups
(20 sender and receiver threads per group)
(20 groups == 800 threads run)
Total time:0.582 sec
---------------------
*pipe*::
Suite for pipe() system call.
Based on pipe-test-1m.c by Ingo Molnar.
Options of *pipe*
^^^^^^^^^^^^^^^^^
-l::
--loop=::
Specify number of loops.
Example of *pipe*
^^^^^^^^^^^^^^^^^
---------------------
% perf bench sched pipe
(executing 1000000 pipe operations between two tasks)
Total time:8.091 sec
8.091833 usecs/op
123581 ops/sec
% perf bench sched pipe -l 1000 # loop 1000
(executing 1000 pipe operations between two tasks)
Total time:0.016 sec
16.948000 usecs/op
59004 ops/sec
---------------------
SEE ALSO
--------
linkperf:perf[1]
......@@ -421,6 +421,13 @@ LIB_OBJS += util/hist.o
LIB_OBJS += util/data_map.o
BUILTIN_OBJS += builtin-annotate.o
BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
BUILTIN_OBJS += builtin-list.o
......
#ifndef BENCH_H
#define BENCH_H
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
#define BENCH_FORMAT_SIMPLE_STR "simple"
#define BENCH_FORMAT_SIMPLE 1
#define BENCH_FORMAT_UNKNOWN -1
extern int bench_format;
#endif
/*
*
* builtin-bench-messaging.c
*
* messaging: Benchmark for scheduler and IPC mechanisms
*
* Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
* Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*
*/
#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
#include "../builtin.h"
#include "bench.h"
/* Test groups of 20 processes spraying to 20 receivers */
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
#define DATASIZE 100
static int use_pipes = 0;
static unsigned int loops = 100;
static unsigned int thread_mode = 0;
static unsigned int num_groups = 10;
struct sender_context {
unsigned int num_fds;
int ready_out;
int wakefd;
int out_fds[0];
};
struct receiver_context {
unsigned int num_packets;
int in_fds[2];
int ready_out;
int wakefd;
};
static void barf(const char *msg)
{
fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
exit(1);
}
static void fdpair(int fds[2])
{
if (use_pipes) {
if (pipe(fds) == 0)
return;
} else {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
return;
}
barf(use_pipes ? "pipe()" : "socketpair()");
}
/* Block until we're ready to go */
static void ready(int ready_out, int wakefd)
{
char dummy;
struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
/* Tell them we're ready. */
if (write(ready_out, &dummy, 1) != 1)
barf("CLIENT: ready write");
/* Wait for "GO" signal */
if (poll(&pollfd, 1, -1) != 1)
barf("poll");
}
/* Sender sprays loops messages down each file descriptor */
static void *sender(struct sender_context *ctx)
{
char data[DATASIZE];
unsigned int i, j;
ready(ctx->ready_out, ctx->wakefd);
/* Now pump to every receiver. */
for (i = 0; i < loops; i++) {
for (j = 0; j < ctx->num_fds; j++) {
int ret, done = 0;
again:
ret = write(ctx->out_fds[j], data + done,
sizeof(data)-done);
if (ret < 0)
barf("SENDER: write");
done += ret;
if (done < DATASIZE)
goto again;
}
}
return NULL;
}
/* One receiver per fd */
static void *receiver(struct receiver_context* ctx)
{
unsigned int i;
if (!thread_mode)
close(ctx->in_fds[1]);
/* Wait for start... */
ready(ctx->ready_out, ctx->wakefd);
/* Receive them all */
for (i = 0; i < ctx->num_packets; i++) {
char data[DATASIZE];
int ret, done = 0;
again:
ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
if (ret < 0)
barf("SERVER: read");
done += ret;
if (done < DATASIZE)
goto again;
}
return NULL;
}
static pthread_t create_worker(void *ctx, void *(*func)(void *))
{
pthread_attr_t attr;
pthread_t childid;
int err;
if (!thread_mode) {
/* process mode */
/* Fork the receiver. */
switch (fork()) {
case -1:
barf("fork()");
break;
case 0:
(*func) (ctx);
exit(0);
break;
default:
break;
}
return (pthread_t)0;
}
if (pthread_attr_init(&attr) != 0)
barf("pthread_attr_init:");
#ifndef __ia64__
if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
barf("pthread_attr_setstacksize");
#endif
err = pthread_create(&childid, &attr, func, ctx);
if (err != 0) {
fprintf(stderr, "pthread_create failed: %s (%d)\n",
strerror(err), err);
exit(-1);
}
return childid;
}
static void reap_worker(pthread_t id)
{
int proc_status;
void *thread_status;
if (!thread_mode) {
/* process mode */
wait(&proc_status);
if (!WIFEXITED(proc_status))
exit(1);
} else {
pthread_join(id, &thread_status);
}
}
/* One group of senders and receivers */
static unsigned int group(pthread_t *pth,
unsigned int num_fds,
int ready_out,
int wakefd)
{
unsigned int i;
struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
+ num_fds * sizeof(int));
if (!snd_ctx)
barf("malloc()");
for (i = 0; i < num_fds; i++) {
int fds[2];
struct receiver_context *ctx = malloc(sizeof(*ctx));
if (!ctx)
barf("malloc()");
/* Create the pipe between client and server */
fdpair(fds);
ctx->num_packets = num_fds * loops;
ctx->in_fds[0] = fds[0];
ctx->in_fds[1] = fds[1];
ctx->ready_out = ready_out;
ctx->wakefd = wakefd;
pth[i] = create_worker(ctx, (void *)receiver);
snd_ctx->out_fds[i] = fds[1];
if (!thread_mode)
close(fds[0]);
}
/* Now we have all the fds, fork the senders */
for (i = 0; i < num_fds; i++) {
snd_ctx->ready_out = ready_out;
snd_ctx->wakefd = wakefd;
snd_ctx->num_fds = num_fds;
pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
}
/* Close the fds we have left */
if (!thread_mode)
for (i = 0; i < num_fds; i++)
close(snd_ctx->out_fds[i]);
/* Return number of children to reap */
return num_fds * 2;
}
static const struct option options[] = {
OPT_BOOLEAN('p', "pipe", &use_pipes,
"Use pipe() instead of socketpair()"),
OPT_BOOLEAN('t', "thread", &thread_mode,
"Be multi thread instead of multi process"),
OPT_INTEGER('g', "group", &num_groups,
"Specify number of groups"),
OPT_INTEGER('l', "loop", &loops,
"Specify number of loops"),
OPT_END()
};
static const char * const bench_sched_message_usage[] = {
"perf bench sched messaging <options>",
NULL
};
int bench_sched_messaging(int argc, const char **argv,
const char *prefix __used)
{
unsigned int i, total_children;
struct timeval start, stop, diff;
unsigned int num_fds = 20;
int readyfds[2], wakefds[2];
char dummy;
pthread_t *pth_tab;
argc = parse_options(argc, argv, options,
bench_sched_message_usage, 0);
pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
if (!pth_tab)
barf("main:malloc()");
fdpair(readyfds);
fdpair(wakefds);
total_children = 0;
for (i = 0; i < num_groups; i++)
total_children += group(pth_tab+total_children, num_fds,
readyfds[1], wakefds[0]);
/* Wait for everyone to be ready */
for (i = 0; i < total_children; i++)
if (read(readyfds[0], &dummy, 1) != 1)
barf("Reading for readyfds");
gettimeofday(&start, NULL);
/* Kick them off */
if (write(wakefds[1], &dummy, 1) != 1)
barf("Writing to start them");
/* Reap them all */
for (i = 0; i < total_children; i++)
reap_worker(pth_tab[i]);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &diff);
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
printf("# %d sender and receiver %s per group\n",
num_fds, thread_mode ? "threads" : "processes");
printf("# %d groups == %d %s run\n\n",
num_groups, num_groups * 2 * num_fds,
thread_mode ? "threads" : "processes");
printf(" %14s: %lu.%03lu [sec]\n", "Total time",
diff.tv_sec, diff.tv_usec/1000);
break;
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
break;
default:
/* reaching here is something disaster */
fprintf(stderr, "Unknown format:%d\n", bench_format);
exit(1);
break;
}
return 0;
}
/*
*
* builtin-bench-pipe.c
*
* pipe: Benchmark for pipe()
*
* Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
* http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
* Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*
*/
#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
#include "../builtin.h"
#include "bench.h"
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/wait.h>
#include <linux/unistd.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/types.h>
#define LOOPS_DEFAULT 1000000
static int loops = LOOPS_DEFAULT;
static const struct option options[] = {
OPT_INTEGER('l', "loop", &loops,
"Specify number of loops"),
OPT_END()
};
static const char * const bench_sched_pipe_usage[] = {
"perf bench sched pipe <options>",
NULL
};
int bench_sched_pipe(int argc, const char **argv,
const char *prefix __used)
{
int pipe_1[2], pipe_2[2];
int m = 0, i;
struct timeval start, stop, diff;
unsigned long long result_usec = 0;
/*
* why does "ret" exist?
* discarding returned value of read(), write()
* causes error in building environment for perf
*/
int ret, wait_stat;
pid_t pid, retpid;
argc = parse_options(argc, argv, options,
bench_sched_pipe_usage, 0);
assert(!pipe(pipe_1));
assert(!pipe(pipe_2));
pid = fork();
assert(pid >= 0);
gettimeofday(&start, NULL);
if (!pid) {
for (i = 0; i < loops; i++) {
ret = read(pipe_1[0], &m, sizeof(int));
ret = write(pipe_2[1], &m, sizeof(int));
}
} else {
for (i = 0; i < loops; i++) {
ret = write(pipe_1[1], &m, sizeof(int));
ret = read(pipe_2[0], &m, sizeof(int));
}
}
gettimeofday(&stop, NULL);
timersub(&stop, &start, &diff);
if (pid) {
retpid = waitpid(pid, &wait_stat, 0);
assert((retpid == pid) && WIFEXITED(wait_stat));
return 0;
}
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
printf("# Extecuted %d pipe operations between two tasks\n\n",
loops);
result_usec = diff.tv_sec * 1000000;
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
diff.tv_sec, diff.tv_usec/1000);
printf(" %14lf usecs/op\n",
(double)result_usec / (double)loops);
printf(" %14d ops/sec\n",
(int)((double)loops /
((double)result_usec / (double)1000000)));
break;
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
diff.tv_sec, diff.tv_usec / 1000);
break;
default:
/* reaching here is something disaster */
fprintf(stderr, "Unknown format:%d\n", bench_format);
exit(1);
break;
}
return 0;
}
/*
*
* builtin-bench.c
*
* General benchmarking subsystem provided by perf
*
* Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*
*/
/*
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
*
*/
#include "perf.h"
#include "util/util.h"
#include "util/parse-options.h"
#include "builtin.h"
#include "bench/bench.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct bench_suite {
const char *name;
const char *summary;
int (*fn)(int, const char **, const char *);
};
static struct bench_suite sched_suites[] = {
{ "messaging",
"Benchmark for scheduler and IPC mechanisms",
bench_sched_messaging },
{ "pipe",
"Flood of communication over pipe() between two processes",
bench_sched_pipe },
{ NULL,
NULL,
NULL }
};
struct bench_subsys {
const char *name;
const char *summary;
struct bench_suite *suites;
};
static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
{ NULL,
NULL,
NULL }
};
static void dump_suites(int subsys_index)
{
int i;
printf("List of available suites for %s...\n\n",
subsystems[subsys_index].name);
for (i = 0; subsystems[subsys_index].suites[i].name; i++)
printf("\t%s: %s\n",
subsystems[subsys_index].suites[i].name,
subsystems[subsys_index].suites[i].summary);
printf("\n");
return;
}
static char *bench_format_str;
int bench_format = BENCH_FORMAT_DEFAULT;
static const struct option bench_options[] = {
OPT_STRING('f', "format", &bench_format_str, "default",
"Specify format style"),
OPT_END()
};
static const char * const bench_usage[] = {
"perf bench [<common options>] <subsystem> <suite> [<options>]",
NULL
};
static void print_usage(void)
{
int i;
printf("Usage: \n");
for (i = 0; bench_usage[i]; i++)
printf("\t%s\n", bench_usage[i]);
printf("\n");
printf("List of available subsystems...\n\n");
for (i = 0; subsystems[i].name; i++)
printf("\t%s: %s\n",
subsystems[i].name, subsystems[i].summary);
printf("\n");
}
static int bench_str2int(char *str)
{
if (!str)
return BENCH_FORMAT_DEFAULT;
if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
return BENCH_FORMAT_DEFAULT;
else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
return BENCH_FORMAT_SIMPLE;
return BENCH_FORMAT_UNKNOWN;
}
int cmd_bench(int argc, const char **argv, const char *prefix __used)
{
int i, j, status = 0;
if (argc < 2) {
/* No subsystem specified. */
print_usage();
goto end;
}
argc = parse_options(argc, argv, bench_options, bench_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
bench_format = bench_str2int(bench_format_str);
if (bench_format == BENCH_FORMAT_UNKNOWN) {
printf("Unknown format descriptor:%s\n", bench_format_str);
goto end;
}
if (argc < 1) {
print_usage();
goto end;
}
for (i = 0; subsystems[i].name; i++) {
if (strcmp(subsystems[i].name, argv[0]))
continue;
if (argc < 2) {
/* No suite specified. */
dump_suites(i);
goto end;
}
for (j = 0; subsystems[i].suites[j].name; j++) {
if (strcmp(subsystems[i].suites[j].name, argv[1]))
continue;
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Running %s/%s benchmark...\n",
subsystems[i].name,
subsystems[i].suites[j].name);
status = subsystems[i].suites[j].fn(argc - 1,
argv + 1, prefix);
goto end;
}
if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
dump_suites(i);
goto end;
}
printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
status = 1;
goto end;
}
printf("Unknown subsystem:%s\n", argv[0]);
status = 1;
end:
return status;
}
......@@ -15,6 +15,7 @@ extern int read_line_with_nul(char *buf, int size, FILE *file);
extern int check_pager_config(const char *cmd);
extern int cmd_annotate(int argc, const char **argv, const char *prefix);
extern int cmd_bench(int argc, const char **argv, const char *prefix);
extern int cmd_help(int argc, const char **argv, const char *prefix);
extern int cmd_sched(int argc, const char **argv, const char *prefix);
extern int cmd_list(int argc, const char **argv, const char *prefix);
......
......@@ -3,6 +3,7 @@
# command name category [deprecated] [common]
#
perf-annotate mainporcelain common
perf-bench mainporcelain common
perf-list mainporcelain common
perf-sched mainporcelain common
perf-record mainporcelain common
......
......@@ -137,6 +137,8 @@ enum sw_event_ids {
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
};
Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
......
......@@ -289,6 +289,7 @@ static void handle_internal_command(int argc, const char **argv)
{ "list", cmd_list, 0 },
{ "record", cmd_record, 0 },
{ "report", cmd_report, 0 },
{ "bench", cmd_bench, 0 },
{ "stat", cmd_stat, 0 },
{ "timechart", cmd_timechart, 0 },
{ "top", cmd_top, 0 },
......
......@@ -48,6 +48,8 @@ static struct event_symbol event_symbols[] = {
{ CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
{ CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
{ CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
{ CSW(EMULATION_FAULTS), "emulation-faults", "" },
};
#define __PERF_EVENT_FIELD(config, name) \
......@@ -76,6 +78,8 @@ static const char *sw_event_names[] = {
"CPU-migrations",
"minor-faults",
"major-faults",
"alignment-faults",
"emulation-faults",
};
#define MAX_ALIASES 8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment