Commit b847d050 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

  * Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf sched lat'
    back working.

  * We don't use Newt anymore, just plain libslang.

  * Kill a bunch of die() calls, from Namhyung Kim.

  * Add --no-demangle to report/top, from Namhyung Kim.

  * Fix dependency of the python binding wrt libtraceevent, from Naohiro Aota.

  * Introduce per core aggregation in 'perf stat', from Stephane Eranian.

  * Add memory profiling via PEBS, from Stephane Eranian.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 0a119538 d06f7911
...@@ -71,6 +71,7 @@ ...@@ -71,6 +71,7 @@
#define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345 #define MSR_IA32_PERF_CAPABILITIES 0x00000345
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
#define MSR_MTRRfix64K_00000 0x00000250 #define MSR_MTRRfix64K_00000 0x00000250
#define MSR_MTRRfix16K_80000 0x00000258 #define MSR_MTRRfix16K_80000 0x00000258
......
...@@ -1316,9 +1316,16 @@ static struct attribute_group x86_pmu_format_group = { ...@@ -1316,9 +1316,16 @@ static struct attribute_group x86_pmu_format_group = {
*/ */
static void __init filter_events(struct attribute **attrs) static void __init filter_events(struct attribute **attrs)
{ {
struct device_attribute *d;
struct perf_pmu_events_attr *pmu_attr;
int i, j; int i, j;
for (i = 0; attrs[i]; i++) { for (i = 0; attrs[i]; i++) {
d = (struct device_attribute *)attrs[i];
pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
/* str trumps id */
if (pmu_attr->event_str)
continue;
if (x86_pmu.event_map(i)) if (x86_pmu.event_map(i))
continue; continue;
...@@ -1330,22 +1337,45 @@ static void __init filter_events(struct attribute **attrs) ...@@ -1330,22 +1337,45 @@ static void __init filter_events(struct attribute **attrs)
} }
} }
static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, /* Merge two pointer arrays */
static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
{
struct attribute **new;
int j, i;
for (j = 0; a[j]; j++)
;
for (i = 0; b[i]; i++)
j++;
j++;
new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
if (!new)
return NULL;
j = 0;
for (i = 0; a[i]; i++)
new[j++] = a[i];
for (i = 0; b[i]; i++)
new[j++] = b[i];
new[j] = NULL;
return new;
}
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page) char *page)
{ {
struct perf_pmu_events_attr *pmu_attr = \ struct perf_pmu_events_attr *pmu_attr = \
container_of(attr, struct perf_pmu_events_attr, attr); container_of(attr, struct perf_pmu_events_attr, attr);
u64 config = x86_pmu.event_map(pmu_attr->id); u64 config = x86_pmu.event_map(pmu_attr->id);
return x86_pmu.events_sysfs_show(page, config);
}
#define EVENT_VAR(_id) event_attr_##_id /* string trumps id */
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr if (pmu_attr->event_str)
return sprintf(page, "%s", pmu_attr->event_str);
#define EVENT_ATTR(_name, _id) \ return x86_pmu.events_sysfs_show(page, config);
PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ }
events_sysfs_show)
EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS ); EVENT_ATTR(instructions, INSTRUCTIONS );
...@@ -1459,16 +1489,27 @@ static int __init init_hw_perf_events(void) ...@@ -1459,16 +1489,27 @@ static int __init init_hw_perf_events(void)
unconstrained = (struct event_constraint) unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
0, x86_pmu.num_counters, 0); 0, x86_pmu.num_counters, 0, 0);
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
x86_pmu_format_group.attrs = x86_pmu.format_attrs; x86_pmu_format_group.attrs = x86_pmu.format_attrs;
if (x86_pmu.event_attrs)
x86_pmu_events_group.attrs = x86_pmu.event_attrs;
if (!x86_pmu.events_sysfs_show) if (!x86_pmu.events_sysfs_show)
x86_pmu_events_group.attrs = &empty_attrs; x86_pmu_events_group.attrs = &empty_attrs;
else else
filter_events(x86_pmu_events_group.attrs); filter_events(x86_pmu_events_group.attrs);
if (x86_pmu.cpu_events) {
struct attribute **tmp;
tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
if (!WARN_ON(!tmp))
x86_pmu_events_group.attrs = tmp;
}
pr_info("... version: %d\n", x86_pmu.version); pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters); pr_info("... generic registers: %d\n", x86_pmu.num_counters);
......
...@@ -46,6 +46,7 @@ enum extra_reg_type { ...@@ -46,6 +46,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */ EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
EXTRA_REG_MAX /* number of entries needed */ EXTRA_REG_MAX /* number of entries needed */
}; };
...@@ -59,7 +60,13 @@ struct event_constraint { ...@@ -59,7 +60,13 @@ struct event_constraint {
u64 cmask; u64 cmask;
int weight; int weight;
int overlap; int overlap;
int flags;
}; };
/*
* struct event_constraint flags
*/
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
struct amd_nb { struct amd_nb {
int nb_id; /* NorthBridge id */ int nb_id; /* NorthBridge id */
...@@ -170,16 +177,17 @@ struct cpu_hw_events { ...@@ -170,16 +177,17 @@ struct cpu_hw_events {
void *kfree_on_online; void *kfree_on_online;
}; };
#define __EVENT_CONSTRAINT(c, n, m, w, o) {\ #define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
{ .idxmsk64 = (n) }, \ { .idxmsk64 = (n) }, \
.code = (c), \ .code = (c), \
.cmask = (m), \ .cmask = (m), \
.weight = (w), \ .weight = (w), \
.overlap = (o), \ .overlap = (o), \
.flags = f, \
} }
#define EVENT_CONSTRAINT(c, n, m) \ #define EVENT_CONSTRAINT(c, n, m) \
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
/* /*
* The overlap flag marks event constraints with overlapping counter * The overlap flag marks event constraints with overlapping counter
...@@ -203,7 +211,7 @@ struct cpu_hw_events { ...@@ -203,7 +211,7 @@ struct cpu_hw_events {
* and its counter masks must be kept at a minimum. * and its counter masks must be kept at a minimum.
*/ */
#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
/* /*
* Constraint on the Event code. * Constraint on the Event code.
...@@ -231,6 +239,14 @@ struct cpu_hw_events { ...@@ -231,6 +239,14 @@ struct cpu_hw_events {
#define INTEL_UEVENT_CONSTRAINT(c, n) \ #define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
#define INTEL_PLD_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
#define INTEL_PST_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
#define EVENT_CONSTRAINT_END \ #define EVENT_CONSTRAINT_END \
EVENT_CONSTRAINT(0, 0, 0) EVENT_CONSTRAINT(0, 0, 0)
...@@ -260,12 +276,22 @@ struct extra_reg { ...@@ -260,12 +276,22 @@ struct extra_reg {
.msr = (ms), \ .msr = (ms), \
.config_mask = (m), \ .config_mask = (m), \
.valid_mask = (vm), \ .valid_mask = (vm), \
.idx = EXTRA_REG_##i \ .idx = EXTRA_REG_##i, \
} }
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
INTEL_UEVENT_EXTRA_REG(c, \
MSR_PEBS_LD_LAT_THRESHOLD, \
0xffff, \
LDLAT)
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
union perf_capabilities { union perf_capabilities {
...@@ -355,8 +381,10 @@ struct x86_pmu { ...@@ -355,8 +381,10 @@ struct x86_pmu {
*/ */
int attr_rdpmc; int attr_rdpmc;
struct attribute **format_attrs; struct attribute **format_attrs;
struct attribute **event_attrs;
ssize_t (*events_sysfs_show)(char *page, u64 config); ssize_t (*events_sysfs_show)(char *page, u64 config);
struct attribute **cpu_events;
/* /*
* CPU Hotplug hooks * CPU Hotplug hooks
...@@ -421,6 +449,23 @@ do { \ ...@@ -421,6 +449,23 @@ do { \
#define ERF_NO_HT_SHARING 1 #define ERF_NO_HT_SHARING 1
#define ERF_HAS_RSP_1 2 #define ERF_HAS_RSP_1 2
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
#define EVENT_ATTR(_name, _id) \
static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
.id = PERF_COUNT_HW_##_id, \
.event_str = NULL, \
};
#define EVENT_ATTR_STR(_name, v, str) \
static struct perf_pmu_events_attr event_attr_##v = { \
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
.id = 0, \
.event_str = str, \
};
extern struct x86_pmu x86_pmu __read_mostly; extern struct x86_pmu x86_pmu __read_mostly;
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
...@@ -628,6 +673,9 @@ int p6_pmu_init(void); ...@@ -628,6 +673,9 @@ int p6_pmu_init(void);
int knc_pmu_init(void); int knc_pmu_init(void);
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
#else /* CONFIG_CPU_SUP_INTEL */ #else /* CONFIG_CPU_SUP_INTEL */
static inline void reserve_ds_buffers(void) static inline void reserve_ds_buffers(void)
......
...@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = ...@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
{ {
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END EVENT_EXTRA_END
}; };
...@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly = ...@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
{ {
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END EVENT_EXTRA_END
}; };
...@@ -155,9 +157,25 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = ...@@ -155,9 +157,25 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
static struct extra_reg intel_snb_extra_regs[] __read_mostly = { static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END EVENT_EXTRA_END
}; };
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
struct attribute *nhm_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
struct attribute *snb_events_attrs[] = {
EVENT_PTR(mem_ld_snb),
EVENT_PTR(mem_st_snb),
NULL,
};
static u64 intel_pmu_event_map(int hw_event) static u64 intel_pmu_event_map(int hw_event)
{ {
return intel_perfmon_event_map[hw_event]; return intel_perfmon_event_map[hw_event];
...@@ -1392,10 +1410,13 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) ...@@ -1392,10 +1410,13 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (x86_pmu.event_constraints) { if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) {
if ((event->hw.config & c->cmask) == c->code) if ((event->hw.config & c->cmask) == c->code) {
/* hw.flags zeroed at initialization */
event->hw.flags |= c->flags;
return c; return c;
} }
} }
}
return &unconstrained; return &unconstrained;
} }
...@@ -1438,6 +1459,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, ...@@ -1438,6 +1459,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
static void intel_put_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event) struct perf_event *event)
{ {
event->hw.flags = 0;
intel_put_shared_regs_event_constraints(cpuc, event); intel_put_shared_regs_event_constraints(cpuc, event);
} }
...@@ -1761,6 +1783,8 @@ static void intel_pmu_flush_branch_stack(void) ...@@ -1761,6 +1783,8 @@ static void intel_pmu_flush_branch_stack(void)
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
static struct attribute *intel_arch3_formats_attr[] = { static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr, &format_attr_event.attr,
&format_attr_umask.attr, &format_attr_umask.attr,
...@@ -1771,6 +1795,7 @@ static struct attribute *intel_arch3_formats_attr[] = { ...@@ -1771,6 +1795,7 @@ static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_cmask.attr, &format_attr_cmask.attr,
&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
&format_attr_ldlat.attr, /* PEBS load latency */
NULL, NULL,
}; };
...@@ -2031,6 +2056,8 @@ __init int intel_pmu_init(void) ...@@ -2031,6 +2056,8 @@ __init int intel_pmu_init(void)
x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs; x86_pmu.extra_regs = intel_nehalem_extra_regs;
x86_pmu.cpu_events = nhm_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */ /* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
...@@ -2074,6 +2101,8 @@ __init int intel_pmu_init(void) ...@@ -2074,6 +2101,8 @@ __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_westmere_extra_regs; x86_pmu.extra_regs = intel_westmere_extra_regs;
x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.cpu_events = nhm_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */ /* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
...@@ -2102,6 +2131,8 @@ __init int intel_pmu_init(void) ...@@ -2102,6 +2131,8 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING; x86_pmu.er_flags |= ERF_NO_HT_SHARING;
x86_pmu.cpu_events = snb_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
...@@ -2128,6 +2159,8 @@ __init int intel_pmu_init(void) ...@@ -2128,6 +2159,8 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING; x86_pmu.er_flags |= ERF_NO_HT_SHARING;
x86_pmu.cpu_events = snb_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
......
...@@ -24,6 +24,130 @@ struct pebs_record_32 { ...@@ -24,6 +24,130 @@ struct pebs_record_32 {
*/ */
union intel_x86_pebs_dse {
u64 val;
struct {
unsigned int ld_dse:4;
unsigned int ld_stlb_miss:1;
unsigned int ld_locked:1;
unsigned int ld_reserved:26;
};
struct {
unsigned int st_l1d_hit:1;
unsigned int st_reserved1:3;
unsigned int st_stlb_miss:1;
unsigned int st_locked:1;
unsigned int st_reserved2:26;
};
};
/*
* Map PEBS Load Latency Data Source encodings to generic
* memory data source information
*/
#define P(a, b) PERF_MEM_S(a, b)
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
static const u64 pebs_data_source[] = {
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
};
static u64 precise_store_data(u64 status)
{
union intel_x86_pebs_dse dse;
u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
dse.val = status;
/*
* bit 4: TLB access
* 1 = stored missed 2nd level TLB
*
* so it either hit the walker or the OS
* otherwise hit 2nd level TLB
*/
if (dse.st_stlb_miss)
val |= P(TLB, MISS);
else
val |= P(TLB, HIT);
/*
* bit 0: hit L1 data cache
* if not set, then all we know is that
* it missed L1D
*/
if (dse.st_l1d_hit)
val |= P(LVL, HIT);
else
val |= P(LVL, MISS);
/*
* bit 5: Locked prefix
*/
if (dse.st_locked)
val |= P(LOCK, LOCKED);
return val;
}
static u64 load_latency_data(u64 status)
{
union intel_x86_pebs_dse dse;
u64 val;
int model = boot_cpu_data.x86_model;
int fam = boot_cpu_data.x86;
dse.val = status;
/*
* use the mapping table for bit 0-3
*/
val = pebs_data_source[dse.ld_dse];
/*
* Nehalem models do not support TLB, Lock infos
*/
if (fam == 0x6 && (model == 26 || model == 30
|| model == 31 || model == 46)) {
val |= P(TLB, NA) | P(LOCK, NA);
return val;
}
/*
* bit 4: TLB access
* 0 = did not miss 2nd level TLB
* 1 = missed 2nd level TLB
*/
if (dse.ld_stlb_miss)
val |= P(TLB, MISS) | P(TLB, L2);
else
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
/*
* bit 5: locked prefix
*/
if (dse.ld_locked)
val |= P(LOCK, LOCKED);
return val;
}
struct pebs_record_core { struct pebs_record_core {
u64 flags, ip; u64 flags, ip;
u64 ax, bx, cx, dx; u64 ax, bx, cx, dx;
...@@ -364,7 +488,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { ...@@ -364,7 +488,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
}; };
struct event_constraint intel_nehalem_pebs_event_constraints[] = { struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
...@@ -379,7 +503,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = { ...@@ -379,7 +503,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
}; };
struct event_constraint intel_westmere_pebs_event_constraints[] = { struct event_constraint intel_westmere_pebs_event_constraints[] = {
INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
...@@ -399,7 +523,8 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { ...@@ -399,7 +523,8 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
...@@ -413,7 +538,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { ...@@ -413,7 +538,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
...@@ -430,10 +556,12 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) ...@@ -430,10 +556,12 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
if (x86_pmu.pebs_constraints) { if (x86_pmu.pebs_constraints) {
for_each_event_constraint(c, x86_pmu.pebs_constraints) { for_each_event_constraint(c, x86_pmu.pebs_constraints) {
if ((event->hw.config & c->cmask) == c->code) if ((event->hw.config & c->cmask) == c->code) {
event->hw.flags |= c->flags;
return c; return c;
} }
} }
}
return &emptyconstraint; return &emptyconstraint;
} }
...@@ -446,6 +574,11 @@ void intel_pmu_pebs_enable(struct perf_event *event) ...@@ -446,6 +574,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
cpuc->pebs_enabled |= 1ULL << hwc->idx; cpuc->pebs_enabled |= 1ULL << hwc->idx;
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled |= 1ULL << 63;
} }
void intel_pmu_pebs_disable(struct perf_event *event) void intel_pmu_pebs_disable(struct perf_event *event)
...@@ -558,20 +691,51 @@ static void __intel_pmu_pebs_event(struct perf_event *event, ...@@ -558,20 +691,51 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs, void *__pebs) struct pt_regs *iregs, void *__pebs)
{ {
/* /*
* We cast to pebs_record_core since that is a subset of * We cast to pebs_record_nhm to get the load latency data
* both formats and we don't use the other fields in this * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
* routine.
*/ */
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_core *pebs = __pebs; struct pebs_record_nhm *pebs = __pebs;
struct perf_sample_data data; struct perf_sample_data data;
struct pt_regs regs; struct pt_regs regs;
u64 sample_type;
int fll, fst;
if (!intel_pmu_save_and_restart(event)) if (!intel_pmu_save_and_restart(event))
return; return;
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
perf_sample_data_init(&data, 0, event->hw.last_period); perf_sample_data_init(&data, 0, event->hw.last_period);
data.period = event->hw.last_period;
sample_type = event->attr.sample_type;
/*
* if PEBS-LL or PreciseStore
*/
if (fll || fst) {
if (sample_type & PERF_SAMPLE_ADDR)
data.addr = pebs->dla;
/*
* Use latency for weight (only avail with PEBS-LL)
*/
if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
data.weight = pebs->lat;
/*
* data.data_src encodes the data source
*/
if (sample_type & PERF_SAMPLE_DATA_SRC) {
if (fll)
data.data_src.val = load_latency_data(pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
}
/* /*
* We use the interrupt regs as a base because the PEBS record * We use the interrupt regs as a base because the PEBS record
* does not contain a full regs set, specifically it seems to * does not contain a full regs set, specifically it seems to
......
...@@ -2438,7 +2438,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type) ...@@ -2438,7 +2438,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
type->unconstrainted = (struct event_constraint) type->unconstrainted = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
0, type->num_counters, 0); 0, type->num_counters, 0, 0);
for (i = 0; i < type->num_boxes; i++) { for (i = 0; i < type->num_boxes; i++) {
pmus[i].func_id = -1; pmus[i].func_id = -1;
......
...@@ -127,6 +127,7 @@ struct hw_perf_event { ...@@ -127,6 +127,7 @@ struct hw_perf_event {
int event_base_rdpmc; int event_base_rdpmc;
int idx; int idx;
int last_cpu; int last_cpu;
int flags;
struct hw_perf_event_extra extra_reg; struct hw_perf_event_extra extra_reg;
struct hw_perf_event_extra branch_reg; struct hw_perf_event_extra branch_reg;
...@@ -567,11 +568,13 @@ struct perf_sample_data { ...@@ -567,11 +568,13 @@ struct perf_sample_data {
u32 reserved; u32 reserved;
} cpu_entry; } cpu_entry;
u64 period; u64 period;
union perf_mem_data_src data_src;
struct perf_callchain_entry *callchain; struct perf_callchain_entry *callchain;
struct perf_raw_record *raw; struct perf_raw_record *raw;
struct perf_branch_stack *br_stack; struct perf_branch_stack *br_stack;
struct perf_regs_user regs_user; struct perf_regs_user regs_user;
u64 stack_user_size; u64 stack_user_size;
u64 weight;
}; };
static inline void perf_sample_data_init(struct perf_sample_data *data, static inline void perf_sample_data_init(struct perf_sample_data *data,
...@@ -585,6 +588,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, ...@@ -585,6 +588,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
data->regs_user.regs = NULL; data->regs_user.regs = NULL;
data->stack_user_size = 0; data->stack_user_size = 0;
data->weight = 0;
data->data_src.val = 0;
} }
extern void perf_output_sample(struct perf_output_handle *handle, extern void perf_output_sample(struct perf_output_handle *handle,
...@@ -809,6 +814,7 @@ do { \ ...@@ -809,6 +814,7 @@ do { \
struct perf_pmu_events_attr { struct perf_pmu_events_attr {
struct device_attribute attr; struct device_attribute attr;
u64 id; u64 id;
const char *event_str;
}; };
#define PMU_EVENT_ATTR(_name, _var, _id, _show) \ #define PMU_EVENT_ATTR(_name, _var, _id, _show) \
......
...@@ -132,8 +132,10 @@ enum perf_event_sample_format { ...@@ -132,8 +132,10 @@ enum perf_event_sample_format {
PERF_SAMPLE_BRANCH_STACK = 1U << 11, PERF_SAMPLE_BRANCH_STACK = 1U << 11,
PERF_SAMPLE_REGS_USER = 1U << 12, PERF_SAMPLE_REGS_USER = 1U << 12,
PERF_SAMPLE_STACK_USER = 1U << 13, PERF_SAMPLE_STACK_USER = 1U << 13,
PERF_SAMPLE_WEIGHT = 1U << 14,
PERF_SAMPLE_DATA_SRC = 1U << 15,
PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */ PERF_SAMPLE_MAX = 1U << 16, /* non-ABI */
}; };
/* /*
...@@ -443,6 +445,7 @@ struct perf_event_mmap_page { ...@@ -443,6 +445,7 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0)
#define PERF_RECORD_MISC_GUEST_USER (5 << 0) #define PERF_RECORD_MISC_GUEST_USER (5 << 0)
#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
/* /*
* Indicates that the content of PERF_SAMPLE_IP points to * Indicates that the content of PERF_SAMPLE_IP points to
* the actual instruction that triggered the event. See also * the actual instruction that triggered the event. See also
...@@ -588,6 +591,9 @@ enum perf_event_type { ...@@ -588,6 +591,9 @@ enum perf_event_type {
* { u64 size; * { u64 size;
* char data[size]; * char data[size];
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
* { u64 weight; } && PERF_SAMPLE_WEIGHT
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
* }; * };
*/ */
PERF_RECORD_SAMPLE = 9, PERF_RECORD_SAMPLE = 9,
...@@ -613,4 +619,67 @@ enum perf_callchain_context { ...@@ -613,4 +619,67 @@ enum perf_callchain_context {
#define PERF_FLAG_FD_OUTPUT (1U << 1) #define PERF_FLAG_FD_OUTPUT (1U << 1)
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ #define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
union perf_mem_data_src {
__u64 val;
struct {
__u64 mem_op:5, /* type of opcode */
mem_lvl:14, /* memory hierarchy level */
mem_snoop:5, /* snoop mode */
mem_lock:2, /* lock instr */
mem_dtlb:7, /* tlb access */
mem_rsvd:31;
};
};
/* type of opcode (load/store/prefetch,code) */
#define PERF_MEM_OP_NA 0x01 /* not available */
#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
#define PERF_MEM_OP_STORE 0x04 /* store instruction */
#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
#define PERF_MEM_OP_SHIFT 0
/* memory hierarchy (memory level, hit or miss) */
#define PERF_MEM_LVL_NA 0x01 /* not available */
#define PERF_MEM_LVL_HIT 0x02 /* hit level */
#define PERF_MEM_LVL_MISS 0x04 /* miss level */
#define PERF_MEM_LVL_L1 0x08 /* L1 */
#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
#define PERF_MEM_LVL_L2 0x20 /* L2 hit */
#define PERF_MEM_LVL_L3 0x40 /* L3 hit */
#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
#define PERF_MEM_LVL_SHIFT 5
/* snoop mode */
#define PERF_MEM_SNOOP_NA 0x01 /* not available */
#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
#define PERF_MEM_SNOOP_SHIFT 19
/* locked instruction */
#define PERF_MEM_LOCK_NA 0x01 /* not available */
#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
#define PERF_MEM_LOCK_SHIFT 24
/* TLB access */
#define PERF_MEM_TLB_NA 0x01 /* not available */
#define PERF_MEM_TLB_HIT 0x02 /* hit level */
#define PERF_MEM_TLB_MISS 0x04 /* miss level */
#define PERF_MEM_TLB_L1 0x08 /* L1 */
#define PERF_MEM_TLB_L2 0x10 /* L2 */
#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
#define PERF_MEM_TLB_SHIFT 26
#define PERF_MEM_S(a, s) \
(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
#endif /* _UAPI_LINUX_PERF_EVENT_H */ #endif /* _UAPI_LINUX_PERF_EVENT_H */
...@@ -976,9 +976,15 @@ static void perf_event__header_size(struct perf_event *event) ...@@ -976,9 +976,15 @@ static void perf_event__header_size(struct perf_event *event)
if (sample_type & PERF_SAMPLE_PERIOD) if (sample_type & PERF_SAMPLE_PERIOD)
size += sizeof(data->period); size += sizeof(data->period);
if (sample_type & PERF_SAMPLE_WEIGHT)
size += sizeof(data->weight);
if (sample_type & PERF_SAMPLE_READ) if (sample_type & PERF_SAMPLE_READ)
size += event->read_size; size += event->read_size;
if (sample_type & PERF_SAMPLE_DATA_SRC)
size += sizeof(data->data_src.val);
event->header_size = size; event->header_size = size;
} }
...@@ -4193,6 +4199,12 @@ void perf_output_sample(struct perf_output_handle *handle, ...@@ -4193,6 +4199,12 @@ void perf_output_sample(struct perf_output_handle *handle,
perf_output_sample_ustack(handle, perf_output_sample_ustack(handle,
data->stack_user_size, data->stack_user_size,
data->regs_user.regs); data->regs_user.regs);
if (sample_type & PERF_SAMPLE_WEIGHT)
perf_output_put(handle, data->weight);
if (sample_type & PERF_SAMPLE_DATA_SRC)
perf_output_put(handle, data->data_src.val);
} }
void perf_prepare_sample(struct perf_event_header *header, void perf_prepare_sample(struct perf_event_header *header,
...@@ -4779,6 +4791,9 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) ...@@ -4779,6 +4791,9 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
mmap_event->file_name = name; mmap_event->file_name = name;
mmap_event->file_size = size; mmap_event->file_size = size;
if (!(vma->vm_flags & VM_EXEC))
mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
rcu_read_lock(); rcu_read_lock();
......
perf-mem(1)
===========
NAME
----
perf-mem - Profile memory accesses
SYNOPSIS
--------
[verse]
'perf mem' [<options>] (record [<command>] | report)
DESCRIPTION
-----------
"perf mem -t <TYPE> record" runs a command and gathers memory operation data
from it, into perf.data. Perf record options are accepted and are passed through.
"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
right set of options to display a memory access profile.
OPTIONS
-------
<command>...::
Any command you can specify in a shell.
-t::
--type=::
Select the memory operation type: load or store (default: load)
-D::
--dump-raw-samples=::
Dump the raw decoded samples on the screen in a format that is easy to parse with
one sample per line.
-x::
--field-separator::
Specify the field separator used when dump raw samples (-D option). By default,
The separator is the space character.
-C::
--cpu-list::
Restrict dump of raw samples to those provided via this option. Note that the same
option can be passed in record mode. It will be interpreted the same way as perf
record.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]
...@@ -182,6 +182,12 @@ is enabled for all the sampling events. The sampled branch type is the same for ...@@ -182,6 +182,12 @@ is enabled for all the sampling events. The sampled branch type is the same for
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors. Note that this feature may not be available on all processors.
-W::
--weight::
Enable weightened sampling. An additional weight is recorded per sample and can be
displayed with the weight and local_weight sort keys. This currently works for TSX
abort events and some memory events in precise mode on modern Intel CPUs.
SEE ALSO SEE ALSO
-------- --------
linkperf:perf-stat[1], linkperf:perf-list[1] linkperf:perf-stat[1], linkperf:perf-list[1]
...@@ -59,7 +59,7 @@ OPTIONS ...@@ -59,7 +59,7 @@ OPTIONS
--sort=:: --sort=::
Sort histogram entries by given key(s) - multiple keys can be specified Sort histogram entries by given key(s) - multiple keys can be specified
in CSV format. Following sort keys are available: in CSV format. Following sort keys are available:
pid, comm, dso, symbol, parent, cpu, srcline. pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight.
Each key has following meaning: Each key has following meaning:
...@@ -206,6 +206,10 @@ OPTIONS ...@@ -206,6 +206,10 @@ OPTIONS
--group:: --group::
Show event group information together. Show event group information together.
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
SEE ALSO SEE ALSO
-------- --------
linkperf:perf-stat[1], linkperf:perf-annotate[1] linkperf:perf-stat[1], linkperf:perf-annotate[1]
...@@ -119,13 +119,19 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m ...@@ -119,13 +119,19 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
Print count deltas every N milliseconds (minimum: 100ms) Print count deltas every N milliseconds (minimum: 100ms)
example: perf stat -I 1000 -e cycles -a sleep 5 example: perf stat -I 1000 -e cycles -a sleep 5
--aggr-socket:: --per-socket::
Aggregate counts per processor socket for system-wide mode measurements. This Aggregate counts per processor socket for system-wide mode measurements. This
is a useful mode to detect imbalance between sockets. To enable this mode, is a useful mode to detect imbalance between sockets. To enable this mode,
use --aggr-socket in addition to -a. (system-wide). The output includes the use --per-socket in addition to -a. (system-wide). The output includes the
socket number and the number of online processors on that socket. This is socket number and the number of online processors on that socket. This is
useful to gauge the amount of aggregation. useful to gauge the amount of aggregation.
--per-core::
Aggregate counts per physical processor for system-wide mode measurements. This
is a useful mode to detect imbalance between physical cores. To enable this mode,
use --per-core in addition to -a. (system-wide). The output includes the
core number and the number of online logical processors on that physical processor.
EXAMPLES EXAMPLES
-------- --------
......
...@@ -112,7 +112,7 @@ Default is to monitor all CPUS. ...@@ -112,7 +112,7 @@ Default is to monitor all CPUS.
-s:: -s::
--sort:: --sort::
Sort by key(s): pid, comm, dso, symbol, parent, srcline. Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, local_weight.
-n:: -n::
--show-nr-samples:: --show-nr-samples::
......
...@@ -35,7 +35,9 @@ include config/utilities.mak ...@@ -35,7 +35,9 @@ include config/utilities.mak
# #
# Define WERROR=0 to disable treating any warnings as errors. # Define WERROR=0 to disable treating any warnings as errors.
# #
# Define NO_NEWT if you do not want TUI support. # Define NO_NEWT if you do not want TUI support. (deprecated)
#
# Define NO_SLANG if you do not want TUI support.
# #
# Define NO_GTK2 if you do not want GTK+ GUI support. # Define NO_GTK2 if you do not want GTK+ GUI support.
# #
...@@ -104,6 +106,10 @@ ifdef PARSER_DEBUG ...@@ -104,6 +106,10 @@ ifdef PARSER_DEBUG
PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG
endif endif
ifdef NO_NEWT
NO_SLANG=1
endif
CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS)
EXTLIBS = -lpthread -lrt -lelf -lm EXTLIBS = -lpthread -lrt -lelf -lm
ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
...@@ -272,7 +278,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP ...@@ -272,7 +278,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT)
$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
...@@ -547,6 +553,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-lock.o ...@@ -547,6 +553,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
BUILTIN_OBJS += $(OUTPUT)builtin-inject.o BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT) PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
...@@ -679,15 +686,15 @@ ifndef NO_LIBAUDIT ...@@ -679,15 +686,15 @@ ifndef NO_LIBAUDIT
endif endif
endif endif
ifndef NO_NEWT ifndef NO_SLANG
FLAGS_NEWT=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lnewt FLAGS_SLANG=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -I/usr/include/slang -lslang
ifneq ($(call try-cc,$(SOURCE_NEWT),$(FLAGS_NEWT),libnewt),y) ifneq ($(call try-cc,$(SOURCE_SLANG),$(FLAGS_SLANG),libslang),y)
msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev); msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev);
else else
# Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
BASIC_CFLAGS += -I/usr/include/slang BASIC_CFLAGS += -I/usr/include/slang
BASIC_CFLAGS += -DNEWT_SUPPORT BASIC_CFLAGS += -DSLANG_SUPPORT
EXTLIBS += -lnewt -lslang EXTLIBS += -lslang
LIB_OBJS += $(OUTPUT)ui/browser.o LIB_OBJS += $(OUTPUT)ui/browser.o
LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
LIB_OBJS += $(OUTPUT)ui/browsers/hists.o LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
......
...@@ -63,7 +63,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, ...@@ -63,7 +63,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0; return 0;
} }
he = __hists__add_entry(&evsel->hists, al, NULL, 1); he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1);
if (he == NULL) if (he == NULL)
return -ENOMEM; return -ENOMEM;
......
...@@ -231,9 +231,10 @@ int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair, ...@@ -231,9 +231,10 @@ int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
} }
static int hists__add_entry(struct hists *self, static int hists__add_entry(struct hists *self,
struct addr_location *al, u64 period) struct addr_location *al, u64 period,
u64 weight)
{ {
if (__hists__add_entry(self, al, NULL, period) != NULL) if (__hists__add_entry(self, al, NULL, period, weight) != NULL)
return 0; return 0;
return -ENOMEM; return -ENOMEM;
} }
...@@ -255,7 +256,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, ...@@ -255,7 +256,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
if (al.filtered) if (al.filtered)
return 0; return 0;
if (hists__add_entry(&evsel->hists, &al, sample->period)) { if (hists__add_entry(&evsel->hists, &al, sample->period, sample->weight)) {
pr_warning("problem incrementing symbol period, skipping event\n"); pr_warning("problem incrementing symbol period, skipping event\n");
return -1; return -1;
} }
......
#include "builtin.h"
#include "perf.h"
#include "util/parse-options.h"
#include "util/trace-event.h"
#include "util/tool.h"
#include "util/session.h"
#define MEM_OPERATION_LOAD "load"
#define MEM_OPERATION_STORE "store"
static const char *mem_operation = MEM_OPERATION_LOAD;
struct perf_mem {
struct perf_tool tool;
char const *input_name;
symbol_filter_t annotate_init;
bool hide_unresolved;
bool dump_raw;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
static const char * const mem_usage[] = {
"perf mem [<options>] {record <command> |report}",
NULL
};
static int __cmd_record(int argc, const char **argv)
{
int rec_argc, i = 0, j;
const char **rec_argv;
char event[64];
int ret;
rec_argc = argc + 4;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (!rec_argv)
return -1;
rec_argv[i++] = strdup("record");
if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
rec_argv[i++] = strdup("-W");
rec_argv[i++] = strdup("-d");
rec_argv[i++] = strdup("-e");
if (strcmp(mem_operation, MEM_OPERATION_LOAD))
sprintf(event, "cpu/mem-stores/pp");
else
sprintf(event, "cpu/mem-loads/pp");
rec_argv[i++] = strdup(event);
for (j = 1; j < argc; j++, i++)
rec_argv[i] = argv[j];
ret = cmd_record(i, rec_argv, NULL);
free(rec_argv);
return ret;
}
static int
dump_raw_samples(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __maybe_unused,
struct machine *machine)
{
struct perf_mem *mem = container_of(tool, struct perf_mem, tool);
struct addr_location al;
const char *fmt;
if (perf_event__preprocess_sample(event, machine, &al, sample,
mem->annotate_init) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
event->header.type);
return -1;
}
if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
return 0;
if (al.map != NULL)
al.map->dso->hit = 1;
if (symbol_conf.field_sep) {
fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
"%s0x%"PRIx64"%s%s:%s\n";
} else {
fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
"%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
symbol_conf.field_sep = " ";
}
printf(fmt,
sample->pid,
symbol_conf.field_sep,
sample->tid,
symbol_conf.field_sep,
event->ip.ip,
symbol_conf.field_sep,
sample->addr,
symbol_conf.field_sep,
sample->weight,
symbol_conf.field_sep,
sample->data_src,
symbol_conf.field_sep,
al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
al.sym ? al.sym->name : "???");
return 0;
}
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
return dump_raw_samples(tool, event, sample, evsel, machine);
}
static int report_raw_events(struct perf_mem *mem)
{
int err = -EINVAL;
int ret;
struct perf_session *session = perf_session__new(input_name, O_RDONLY,
0, false, &mem->tool);
if (session == NULL)
return -ENOMEM;
if (mem->cpu_list) {
ret = perf_session__cpu_bitmap(session, mem->cpu_list,
mem->cpu_bitmap);
if (ret)
goto out_delete;
}
if (symbol__init() < 0)
return -1;
printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
err = perf_session__process_events(session, &mem->tool);
if (err)
return err;
return 0;
out_delete:
perf_session__delete(session);
return err;
}
static int report_events(int argc, const char **argv, struct perf_mem *mem)
{
const char **rep_argv;
int ret, i = 0, j, rep_argc;
if (mem->dump_raw)
return report_raw_events(mem);
rep_argc = argc + 3;
rep_argv = calloc(rep_argc + 1, sizeof(char *));
if (!rep_argv)
return -1;
rep_argv[i++] = strdup("report");
rep_argv[i++] = strdup("--mem-mode");
rep_argv[i++] = strdup("-n"); /* display number of samples */
/*
* there is no weight (cost) associated with stores, so don't print
* the column
*/
if (strcmp(mem_operation, MEM_OPERATION_LOAD))
rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
"dso_daddr,tlb,locked");
for (j = 1; j < argc; j++, i++)
rep_argv[i] = argv[j];
ret = cmd_report(i, rep_argv, NULL);
free(rep_argv);
return ret;
}
int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
{
struct stat st;
struct perf_mem mem = {
.tool = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
.comm = perf_event__process_comm,
.lost = perf_event__process_lost,
.fork = perf_event__process_fork,
.build_id = perf_event__process_build_id,
.ordered_samples = true,
},
.input_name = "perf.data",
};
const struct option mem_options[] = {
OPT_STRING('t', "type", &mem_operation,
"type", "memory operations(load/store)"),
OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
"dump raw samples in ASCII"),
OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
"Only display entries resolved to a symbol"),
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
OPT_STRING('C', "cpu", &mem.cpu_list, "cpu",
"list of cpus to profile"),
OPT_STRING('x', "field-separator", &symbol_conf.field_sep,
"separator",
"separator for columns, no spaces will be added"
" between columns '.' is reserved."),
OPT_END()
};
argc = parse_options(argc, argv, mem_options, mem_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
usage_with_options(mem_usage, mem_options);
if (!mem.input_name || !strlen(mem.input_name)) {
if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
mem.input_name = "-";
else
mem.input_name = "perf.data";
}
if (!strncmp(argv[0], "rec", 3))
return __cmd_record(argc, argv);
else if (!strncmp(argv[0], "rep", 3))
return report_events(argc, argv, &mem);
else
usage_with_options(mem_usage, mem_options);
return 0;
}
...@@ -5,8 +5,6 @@ ...@@ -5,8 +5,6 @@
* (or a CPU, or a PID) into the perf.data output file - for * (or a CPU, or a PID) into the perf.data output file - for
* later analysis via perf report. * later analysis via perf report.
*/ */
#define _FILE_OFFSET_BITS 64
#include "builtin.h" #include "builtin.h"
#include "perf.h" #include "perf.h"
...@@ -955,6 +953,8 @@ const struct option record_options[] = { ...@@ -955,6 +953,8 @@ const struct option record_options[] = {
OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
"branch filter mask", "branch stack filter modes", "branch filter mask", "branch stack filter modes",
parse_branch_stack), parse_branch_stack),
OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
"sample by weight (on special events only)"),
OPT_END() OPT_END()
}; };
......
...@@ -46,6 +46,7 @@ struct perf_report { ...@@ -46,6 +46,7 @@ struct perf_report {
bool show_full_info; bool show_full_info;
bool show_threads; bool show_threads;
bool inverted_callchain; bool inverted_callchain;
bool mem_mode;
struct perf_read_values show_threads_values; struct perf_read_values show_threads_values;
const char *pretty_printing_style; const char *pretty_printing_style;
symbol_filter_t annotate_init; symbol_filter_t annotate_init;
...@@ -64,6 +65,99 @@ static int perf_report_config(const char *var, const char *value, void *cb) ...@@ -64,6 +65,99 @@ static int perf_report_config(const char *var, const char *value, void *cb)
return perf_default_config(var, value, cb); return perf_default_config(var, value, cb);
} }
static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
struct addr_location *al,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine,
union perf_event *event)
{
struct perf_report *rep = container_of(tool, struct perf_report, tool);
struct symbol *parent = NULL;
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
int err = 0;
struct hist_entry *he;
struct mem_info *mi, *mx;
uint64_t cost;
if ((sort__has_parent || symbol_conf.use_callchain) &&
sample->callchain) {
err = machine__resolve_callchain(machine, evsel, al->thread,
sample, &parent);
if (err)
return err;
}
mi = machine__resolve_mem(machine, al->thread, sample, cpumode);
if (!mi)
return -ENOMEM;
if (rep->hide_unresolved && !al->sym)
return 0;
cost = sample->weight;
if (!cost)
cost = 1;
/*
* must pass period=weight in order to get the correct
* sorting from hists__collapse_resort() which is solely
* based on periods. We want sorting be done on nr_events * weight
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
he = __hists__add_mem_entry(&evsel->hists, al, parent, mi, cost, cost);
if (!he)
return -ENOMEM;
/*
* In the TUI browser, we are doing integrated annotation,
* so we don't allocate the extra space needed because the stdio
* code will not use it.
*/
if (sort__has_sym && he->ms.sym && use_browser > 0) {
struct annotation *notes = symbol__annotation(he->ms.sym);
assert(evsel != NULL);
if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
goto out;
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
if (err)
goto out;
}
if (sort__has_sym && he->mem_info->daddr.sym && use_browser > 0) {
struct annotation *notes;
mx = he->mem_info;
notes = symbol__annotation(mx->daddr.sym);
if (notes->src == NULL && symbol__alloc_hist(mx->daddr.sym) < 0)
goto out;
err = symbol__inc_addr_samples(mx->daddr.sym,
mx->daddr.map,
evsel->idx,
mx->daddr.al_addr);
if (err)
goto out;
}
evsel->hists.stats.total_period += cost;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
err = 0;
if (symbol_conf.use_callchain) {
err = callchain_append(he->callchain,
&callchain_cursor,
sample->period);
}
out:
return err;
}
static int perf_report__add_branch_hist_entry(struct perf_tool *tool, static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
struct addr_location *al, struct addr_location *al,
struct perf_sample *sample, struct perf_sample *sample,
...@@ -98,7 +192,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, ...@@ -98,7 +192,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
* and not events sampled. Thus we use a pseudo period of 1. * and not events sampled. Thus we use a pseudo period of 1.
*/ */
he = __hists__add_branch_entry(&evsel->hists, al, parent, he = __hists__add_branch_entry(&evsel->hists, al, parent,
&bi[i], 1); &bi[i], 1, 1);
if (he) { if (he) {
struct annotation *notes; struct annotation *notes;
err = -ENOMEM; err = -ENOMEM;
...@@ -156,7 +250,8 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, ...@@ -156,7 +250,8 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
return err; return err;
} }
he = __hists__add_entry(&evsel->hists, al, parent, sample->period); he = __hists__add_entry(&evsel->hists, al, parent, sample->period,
sample->weight);
if (he == NULL) if (he == NULL)
return -ENOMEM; return -ENOMEM;
...@@ -168,7 +263,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, ...@@ -168,7 +263,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
return err; return err;
} }
/* /*
* Only in the newt browser we are doing integrated annotation, * Only in the TUI browser we are doing integrated annotation,
* so we don't allocated the extra space needed because the stdio * so we don't allocated the extra space needed because the stdio
* code will not use it. * code will not use it.
*/ */
...@@ -219,6 +314,12 @@ static int process_sample_event(struct perf_tool *tool, ...@@ -219,6 +314,12 @@ static int process_sample_event(struct perf_tool *tool,
pr_debug("problem adding lbr entry, skipping event\n"); pr_debug("problem adding lbr entry, skipping event\n");
return -1; return -1;
} }
} else if (rep->mem_mode == 1) {
if (perf_report__add_mem_hist_entry(tool, &al, sample,
evsel, machine, event)) {
pr_debug("problem adding mem entry, skipping event\n");
return -1;
}
} else { } else {
if (al.map != NULL) if (al.map != NULL)
al.map->dso->hit = 1; al.map->dso->hit = 1;
...@@ -302,7 +403,8 @@ static void sig_handler(int sig __maybe_unused) ...@@ -302,7 +403,8 @@ static void sig_handler(int sig __maybe_unused)
session_done = 1; session_done = 1;
} }
static size_t hists__fprintf_nr_sample_events(struct hists *self, static size_t hists__fprintf_nr_sample_events(struct perf_report *rep,
struct hists *self,
const char *evname, FILE *fp) const char *evname, FILE *fp)
{ {
size_t ret; size_t ret;
...@@ -330,6 +432,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self, ...@@ -330,6 +432,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
if (evname != NULL) if (evname != NULL)
ret += fprintf(fp, " of event '%s'", evname); ret += fprintf(fp, " of event '%s'", evname);
if (rep->mem_mode) {
ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
ret += fprintf(fp, "\n# Sort order : %s", sort_order);
} else
ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events);
return ret + fprintf(fp, "\n#\n"); return ret + fprintf(fp, "\n#\n");
} }
...@@ -348,7 +454,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, ...@@ -348,7 +454,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
!perf_evsel__is_group_leader(pos)) !perf_evsel__is_group_leader(pos))
continue; continue;
hists__fprintf_nr_sample_events(hists, evname, stdout); hists__fprintf_nr_sample_events(rep, hists, evname, stdout);
hists__fprintf(hists, true, 0, 0, stdout); hists__fprintf(hists, true, 0, 0, stdout);
fprintf(stdout, "\n\n"); fprintf(stdout, "\n\n");
} }
...@@ -644,7 +750,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -644,7 +750,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"Use the stdio interface"), "Use the stdio interface"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]", OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
" dso_to, dso_from, symbol_to, symbol_from, mispredict"), " dso_to, dso_from, symbol_to, symbol_from, mispredict,"
" weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, "
"snoop, locked"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
"Show sample percentage for different cpu modes"), "Show sample percentage for different cpu modes"),
OPT_STRING('p', "parent", &parent_pattern, "regex", OPT_STRING('p', "parent", &parent_pattern, "regex",
...@@ -692,6 +800,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -692,6 +800,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"use branch records for histogram filling", parse_branch_mode), "use branch records for histogram filling", parse_branch_mode),
OPT_STRING(0, "objdump", &objdump_path, "path", OPT_STRING(0, "objdump", &objdump_path, "path",
"objdump binary to use for disassembly and annotations"), "objdump binary to use for disassembly and annotations"),
OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
"Disable symbol demangling"),
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
OPT_END() OPT_END()
}; };
...@@ -749,12 +860,24 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -749,12 +860,24 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"dso_to,symbol_to"; "dso_to,symbol_to";
} }
if (report.mem_mode) {
if (sort__branch_mode == 1) {
fprintf(stderr, "branch and mem mode incompatible\n");
goto error;
}
/*
* if no sort_order is provided, then specify
* branch-mode specific order
*/
if (sort_order == default_sort_order)
sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
}
if (setup_sorting() < 0) if (setup_sorting() < 0)
usage_with_options(report_usage, options); usage_with_options(report_usage, options);
/* /*
* Only in the newt browser we are doing integrated annotation, * Only in the TUI browser we are doing integrated annotation,
* so don't allocate extra space that won't be used in the stdio * so don't allocate extra space that won't be used in the stdio
* implementation. * implementation.
*/ */
...@@ -814,6 +937,14 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -814,6 +937,14 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
} else { } else {
if (report.mem_mode) {
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "symbol_daddr", stdout);
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso_daddr", stdout);
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "mem", stdout);
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "local_weight", stdout);
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "tlb", stdout);
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "snoop", stdout);
}
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
} }
......
...@@ -1671,7 +1671,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1671,7 +1671,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
.sample = perf_sched__process_tracepoint_sample, .sample = perf_sched__process_tracepoint_sample,
.comm = perf_event__process_comm, .comm = perf_event__process_comm,
.lost = perf_event__process_lost, .lost = perf_event__process_lost,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork, .fork = perf_event__process_fork,
.ordered_samples = true, .ordered_samples = true,
}, },
......
This diff is collapsed.
...@@ -251,7 +251,8 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, ...@@ -251,7 +251,8 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
{ {
struct hist_entry *he; struct hist_entry *he;
he = __hists__add_entry(&evsel->hists, al, NULL, sample->period); he = __hists__add_entry(&evsel->hists, al, NULL, sample->period,
sample->weight);
if (he == NULL) if (he == NULL)
return NULL; return NULL;
...@@ -1088,7 +1089,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1088,7 +1089,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_INCR('v', "verbose", &verbose, OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"), "be more verbose (show counter open errors, etc)"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]", OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"), "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
"Show a column with the number of samples"), "Show a column with the number of samples"),
OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
......
...@@ -36,6 +36,7 @@ extern int cmd_kvm(int argc, const char **argv, const char *prefix); ...@@ -36,6 +36,7 @@ extern int cmd_kvm(int argc, const char **argv, const char *prefix);
extern int cmd_test(int argc, const char **argv, const char *prefix); extern int cmd_test(int argc, const char **argv, const char *prefix);
extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix);
extern int cmd_inject(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix);
extern int cmd_mem(int argc, const char **argv, const char *prefix);
extern int find_scripts(char **scripts_array, char **scripts_path_array); extern int find_scripts(char **scripts_array, char **scripts_path_array);
#endif #endif
...@@ -14,6 +14,7 @@ perf-kmem mainporcelain common ...@@ -14,6 +14,7 @@ perf-kmem mainporcelain common
perf-kvm mainporcelain common perf-kvm mainporcelain common
perf-list mainporcelain common perf-list mainporcelain common
perf-lock mainporcelain common perf-lock mainporcelain common
perf-mem mainporcelain common
perf-probe mainporcelain full perf-probe mainporcelain full
perf-record mainporcelain common perf-record mainporcelain common
perf-report mainporcelain common perf-report mainporcelain common
......
...@@ -61,15 +61,13 @@ int main(void) ...@@ -61,15 +61,13 @@ int main(void)
} }
endef endef
ifndef NO_NEWT ifndef NO_SLANG
define SOURCE_NEWT define SOURCE_SLANG
#include <newt.h> #include <slang.h>
int main(void) int main(void)
{ {
newtInit(); return SLsmg_init_smg();
newtCls();
return newtFinished();
} }
endef endef
endif endif
......
...@@ -60,6 +60,7 @@ static struct cmd_struct commands[] = { ...@@ -60,6 +60,7 @@ static struct cmd_struct commands[] = {
{ "trace", cmd_trace, 0 }, { "trace", cmd_trace, 0 },
#endif #endif
{ "inject", cmd_inject, 0 }, { "inject", cmd_inject, 0 },
{ "mem", cmd_mem, 0 },
}; };
struct pager_config { struct pager_config {
...@@ -517,9 +518,8 @@ int main(int argc, const char **argv) ...@@ -517,9 +518,8 @@ int main(int argc, const char **argv)
while (1) { while (1) {
static int done_help; static int done_help;
static int was_alias; int was_alias = run_argv(&argc, &argv);
was_alias = run_argv(&argc, &argv);
if (errno != ENOENT) if (errno != ENOENT)
break; break;
......
...@@ -218,6 +218,7 @@ struct perf_record_opts { ...@@ -218,6 +218,7 @@ struct perf_record_opts {
bool pipe_output; bool pipe_output;
bool raw_samples; bool raw_samples;
bool sample_address; bool sample_address;
bool sample_weight;
bool sample_time; bool sample_time;
bool period; bool period;
unsigned int freq; unsigned int freq;
......
...@@ -223,7 +223,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) ...@@ -223,7 +223,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
&sample, 0) < 0) &sample, 0) < 0)
goto out; goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL, 1); he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
if (he == NULL) if (he == NULL)
goto out; goto out;
...@@ -247,7 +247,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) ...@@ -247,7 +247,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
&sample, 0) < 0) &sample, 0) < 0)
goto out; goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL, 1); he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
if (he == NULL) if (he == NULL)
goto out; goto out;
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
#include "../cache.h" #include "../cache.h"
#include "../../perf.h" #include "../../perf.h"
#include "libslang.h" #include "libslang.h"
#include <newt.h>
#include "ui.h" #include "ui.h"
#include "util.h" #include "util.h"
#include <linux/compiler.h> #include <linux/compiler.h>
...@@ -234,7 +233,7 @@ void ui_browser__reset_index(struct ui_browser *browser) ...@@ -234,7 +233,7 @@ void ui_browser__reset_index(struct ui_browser *browser)
void __ui_browser__show_title(struct ui_browser *browser, const char *title) void __ui_browser__show_title(struct ui_browser *browser, const char *title)
{ {
SLsmg_gotorc(0, 0); SLsmg_gotorc(0, 0);
ui_browser__set_color(browser, NEWT_COLORSET_ROOT); ui_browser__set_color(browser, HE_COLORSET_ROOT);
slsmg_write_nstring(title, browser->width + 1); slsmg_write_nstring(title, browser->width + 1);
} }
...@@ -513,6 +512,12 @@ static struct ui_browser_colorset { ...@@ -513,6 +512,12 @@ static struct ui_browser_colorset {
.fg = "magenta", .fg = "magenta",
.bg = "default", .bg = "default",
}, },
{
.colorset = HE_COLORSET_ROOT,
.name = "root",
.fg = "white",
.bg = "blue",
},
{ {
.name = NULL, .name = NULL,
} }
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#define HE_COLORSET_SELECTED 53 #define HE_COLORSET_SELECTED 53
#define HE_COLORSET_CODE 54 #define HE_COLORSET_CODE 54
#define HE_COLORSET_ADDR 55 #define HE_COLORSET_ADDR 55
#define HE_COLORSET_ROOT 56
struct ui_browser { struct ui_browser {
u64 index, top_idx; u64 index, top_idx;
......
...@@ -10,7 +10,6 @@ ...@@ -10,7 +10,6 @@
#include "../../util/symbol.h" #include "../../util/symbol.h"
#include "../../util/evsel.h" #include "../../util/evsel.h"
#include <pthread.h> #include <pthread.h>
#include <newt.h>
struct browser_disasm_line { struct browser_disasm_line {
struct rb_node rb_node; struct rb_node rb_node;
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
#include "../libslang.h" #include "../libslang.h"
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <newt.h>
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include "../../util/evsel.h" #include "../../util/evsel.h"
......
#include "../libslang.h" #include "../libslang.h"
#include <elf.h> #include <elf.h>
#include <newt.h>
#include <inttypes.h> #include <inttypes.h>
#include <sys/ttydefaults.h> #include <sys/ttydefaults.h>
#include <string.h> #include <string.h>
...@@ -10,41 +9,9 @@ ...@@ -10,41 +9,9 @@
#include "../../util/symbol.h" #include "../../util/symbol.h"
#include "../browser.h" #include "../browser.h"
#include "../helpline.h" #include "../helpline.h"
#include "../keysyms.h"
#include "map.h" #include "map.h"
static int ui_entry__read(const char *title, char *bf, size_t size, int width)
{
struct newtExitStruct es;
newtComponent form, entry;
const char *result;
int err = -1;
newtCenteredWindow(width, 1, title);
form = newtForm(NULL, NULL, 0);
if (form == NULL)
return -1;
entry = newtEntry(0, 0, "0x", width, &result, NEWT_FLAG_SCROLL);
if (entry == NULL)
goto out_free_form;
newtFormAddComponent(form, entry);
newtFormAddHotKey(form, NEWT_KEY_ENTER);
newtFormAddHotKey(form, NEWT_KEY_ESCAPE);
newtFormAddHotKey(form, NEWT_KEY_LEFT);
newtFormAddHotKey(form, CTRL('c'));
newtFormRun(form, &es);
if (result != NULL) {
strncpy(bf, result, size);
err = 0;
}
out_free_form:
newtPopWindow();
newtFormDestroy(form);
return err;
}
struct map_browser { struct map_browser {
struct ui_browser b; struct ui_browser b;
struct map *map; struct map *map;
...@@ -78,10 +45,11 @@ static int map_browser__search(struct map_browser *self) ...@@ -78,10 +45,11 @@ static int map_browser__search(struct map_browser *self)
{ {
char target[512]; char target[512];
struct symbol *sym; struct symbol *sym;
int err = ui_entry__read("Search by name/addr", target, sizeof(target), 40); int err = ui_browser__input_window("Search by name/addr",
"Prefix with 0x to search by address",
if (err) target, "ENTER: OK, ESC: Cancel", 0);
return err; if (err != K_ENTER)
return -1;
if (target[0] == '0' && tolower(target[1]) == 'x') { if (target[0] == '0' && tolower(target[1]) == 'x') {
u64 addr = strtoull(target, NULL, 16); u64 addr = strtoull(target, NULL, 16);
...@@ -112,12 +80,20 @@ static int map_browser__run(struct map_browser *self) ...@@ -112,12 +80,20 @@ static int map_browser__run(struct map_browser *self)
while (1) { while (1) {
key = ui_browser__run(&self->b, 0); key = ui_browser__run(&self->b, 0);
if (verbose && key == '/') switch (key) {
case '/':
if (verbose)
map_browser__search(self); map_browser__search(self);
else default:
break; break;
case K_LEFT:
case K_ESC:
case 'q':
case CTRL('c'):
goto out;
} }
}
out:
ui_browser__hide(&self->b); ui_browser__hide(&self->b);
return key; return key;
} }
......
#include <elf.h> #include <elf.h>
#include <newt.h>
#include <inttypes.h> #include <inttypes.h>
#include <sys/ttydefaults.h> #include <sys/ttydefaults.h>
#include <string.h> #include <string.h>
......
#include <newt.h>
#include <signal.h> #include <signal.h>
#include <stdbool.h> #include <stdbool.h>
...@@ -88,13 +87,6 @@ int ui__getch(int delay_secs) ...@@ -88,13 +87,6 @@ int ui__getch(int delay_secs)
return SLkp_getkey(); return SLkp_getkey();
} }
static void newt_suspend(void *d __maybe_unused)
{
newtSuspend();
raise(SIGTSTP);
newtResume();
}
static void ui__signal(int sig) static void ui__signal(int sig)
{ {
ui__exit(false); ui__exit(false);
...@@ -106,7 +98,17 @@ int ui__init(void) ...@@ -106,7 +98,17 @@ int ui__init(void)
{ {
int err; int err;
newtInit(); SLutf8_enable(-1);
SLtt_get_terminfo();
SLtt_get_screen_size();
err = SLsmg_init_smg();
if (err < 0)
goto out;
err = SLang_init_tty(0, 0, 0);
if (err < 0)
goto out;
err = SLkp_init(); err = SLkp_init();
if (err < 0) { if (err < 0) {
pr_err("TUI initialization failed.\n"); pr_err("TUI initialization failed.\n");
...@@ -115,7 +117,6 @@ int ui__init(void) ...@@ -115,7 +117,6 @@ int ui__init(void)
SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB); SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB);
newtSetSuspendCallback(newt_suspend, NULL);
ui_helpline__init(); ui_helpline__init();
ui_browser__init(); ui_browser__init();
ui_progress__init(); ui_progress__init();
......
...@@ -12,7 +12,7 @@ extern int use_browser; ...@@ -12,7 +12,7 @@ extern int use_browser;
void setup_browser(bool fallback_to_pager); void setup_browser(bool fallback_to_pager);
void exit_browser(bool wait_for_ok); void exit_browser(bool wait_for_ok);
#ifdef NEWT_SUPPORT #ifdef SLANG_SUPPORT
int ui__init(void); int ui__init(void);
void ui__exit(bool wait_for_ok); void ui__exit(bool wait_for_ok);
#else #else
......
...@@ -150,7 +150,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, ...@@ -150,7 +150,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
struct perf_evsel *evsel, bool print_lines, struct perf_evsel *evsel, bool print_lines,
bool full_paths, int min_pcnt, int max_lines); bool full_paths, int min_pcnt, int max_lines);
#ifdef NEWT_SUPPORT #ifdef SLANG_SUPPORT
int symbol__tui_annotate(struct symbol *sym, struct map *map, int symbol__tui_annotate(struct symbol *sym, struct map *map,
struct perf_evsel *evsel, struct perf_evsel *evsel,
struct hist_browser_timer *hbt); struct hist_browser_timer *hbt);
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "cpumap.h" #include "cpumap.h"
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
static struct cpu_map *cpu_map__default_new(void) static struct cpu_map *cpu_map__default_new(void)
{ {
...@@ -219,7 +220,7 @@ int cpu_map__get_socket(struct cpu_map *map, int idx) ...@@ -219,7 +220,7 @@ int cpu_map__get_socket(struct cpu_map *map, int idx)
if (!mnt) if (!mnt)
return -1; return -1;
sprintf(path, snprintf(path, PATH_MAX,
"%s/devices/system/cpu/cpu%d/topology/physical_package_id", "%s/devices/system/cpu/cpu%d/topology/physical_package_id",
mnt, cpu); mnt, cpu);
...@@ -231,27 +232,88 @@ int cpu_map__get_socket(struct cpu_map *map, int idx) ...@@ -231,27 +232,88 @@ int cpu_map__get_socket(struct cpu_map *map, int idx)
return ret == 1 ? cpu : -1; return ret == 1 ? cpu : -1;
} }
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) static int cmp_ids(const void *a, const void *b)
{ {
struct cpu_map *sock; return *(int *)a - *(int *)b;
}
static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
int (*f)(struct cpu_map *map, int cpu))
{
struct cpu_map *c;
int nr = cpus->nr; int nr = cpus->nr;
int cpu, s1, s2; int cpu, s1, s2;
sock = calloc(1, sizeof(*sock) + nr * sizeof(int)); /* allocate as much as possible */
if (!sock) c = calloc(1, sizeof(*c) + nr * sizeof(int));
if (!c)
return -1; return -1;
for (cpu = 0; cpu < nr; cpu++) { for (cpu = 0; cpu < nr; cpu++) {
s1 = cpu_map__get_socket(cpus, cpu); s1 = f(cpus, cpu);
for (s2 = 0; s2 < sock->nr; s2++) { for (s2 = 0; s2 < c->nr; s2++) {
if (s1 == sock->map[s2]) if (s1 == c->map[s2])
break; break;
} }
if (s2 == sock->nr) { if (s2 == c->nr) {
sock->map[sock->nr] = s1; c->map[c->nr] = s1;
sock->nr++; c->nr++;
} }
} }
*sockp = sock; /* ensure we process id in increasing order */
qsort(c->map, c->nr, sizeof(int), cmp_ids);
*res = c;
return 0; return 0;
} }
int cpu_map__get_core(struct cpu_map *map, int idx)
{
FILE *fp;
const char *mnt;
char path[PATH_MAX];
int cpu, ret, s;
if (idx > map->nr)
return -1;
cpu = map->map[idx];
mnt = sysfs_find_mountpoint();
if (!mnt)
return -1;
snprintf(path, PATH_MAX,
"%s/devices/system/cpu/cpu%d/topology/core_id",
mnt, cpu);
fp = fopen(path, "r");
if (!fp)
return -1;
ret = fscanf(fp, "%d", &cpu);
fclose(fp);
if (ret != 1)
return -1;
s = cpu_map__get_socket(map, idx);
if (s == -1)
return -1;
/*
* encode socket in upper 16 bits
* core_id is relative to socket, and
* we need a global id. So we combine
* socket+ core id
*/
return (s << 16) | (cpu & 0xffff);
}
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
{
return cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
}
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
{
return cpu_map__build_map(cpus, corep, cpu_map__get_core);
}
...@@ -15,7 +15,9 @@ void cpu_map__delete(struct cpu_map *map); ...@@ -15,7 +15,9 @@ void cpu_map__delete(struct cpu_map *map);
struct cpu_map *cpu_map__read(FILE *file); struct cpu_map *cpu_map__read(FILE *file);
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
int cpu_map__get_socket(struct cpu_map *map, int idx); int cpu_map__get_socket(struct cpu_map *map, int idx);
int cpu_map__get_core(struct cpu_map *map, int idx);
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
static inline int cpu_map__socket(struct cpu_map *sock, int s) static inline int cpu_map__socket(struct cpu_map *sock, int s)
{ {
...@@ -24,6 +26,16 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s) ...@@ -24,6 +26,16 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
return sock->map[s]; return sock->map[s];
} }
static inline int cpu_map__id_to_socket(int id)
{
return id >> 16;
}
static inline int cpu_map__id_to_cpu(int id)
{
return id & 0xffff;
}
static inline int cpu_map__nr(const struct cpu_map *map) static inline int cpu_map__nr(const struct cpu_map *map)
{ {
return map ? map->nr : 1; return map ? map->nr : 1;
......
...@@ -88,8 +88,10 @@ struct perf_sample { ...@@ -88,8 +88,10 @@ struct perf_sample {
u64 id; u64 id;
u64 stream_id; u64 stream_id;
u64 period; u64 period;
u64 weight;
u32 cpu; u32 cpu;
u32 raw_size; u32 raw_size;
u64 data_src;
void *raw_data; void *raw_data;
struct ip_callchain *callchain; struct ip_callchain *callchain;
struct branch_stack *branch_stack; struct branch_stack *branch_stack;
...@@ -97,6 +99,13 @@ struct perf_sample { ...@@ -97,6 +99,13 @@ struct perf_sample {
struct stack_dump user_stack; struct stack_dump user_stack;
}; };
#define PERF_MEM_DATA_SRC_NONE \
(PERF_MEM_S(OP, NA) |\
PERF_MEM_S(LVL, NA) |\
PERF_MEM_S(SNOOP, NA) |\
PERF_MEM_S(LOCK, NA) |\
PERF_MEM_S(TLB, NA))
struct build_id_event { struct build_id_event {
struct perf_event_header header; struct perf_event_header header;
pid_t pid; pid_t pid;
......
...@@ -554,6 +554,9 @@ void perf_evsel__config(struct perf_evsel *evsel, ...@@ -554,6 +554,9 @@ void perf_evsel__config(struct perf_evsel *evsel,
perf_evsel__set_sample_bit(evsel, CPU); perf_evsel__set_sample_bit(evsel, CPU);
} }
if (opts->sample_address)
attr->sample_type |= PERF_SAMPLE_DATA_SRC;
if (opts->no_delay) { if (opts->no_delay) {
attr->watermark = 0; attr->watermark = 0;
attr->wakeup_events = 1; attr->wakeup_events = 1;
...@@ -563,6 +566,9 @@ void perf_evsel__config(struct perf_evsel *evsel, ...@@ -563,6 +566,9 @@ void perf_evsel__config(struct perf_evsel *evsel,
attr->branch_sample_type = opts->branch_stack; attr->branch_sample_type = opts->branch_stack;
} }
if (opts->sample_weight)
attr->sample_type |= PERF_SAMPLE_WEIGHT;
attr->mmap = track; attr->mmap = track;
attr->comm = track; attr->comm = track;
...@@ -1017,6 +1023,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, ...@@ -1017,6 +1023,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
data->cpu = data->pid = data->tid = -1; data->cpu = data->pid = data->tid = -1;
data->stream_id = data->id = data->time = -1ULL; data->stream_id = data->id = data->time = -1ULL;
data->period = 1; data->period = 1;
data->weight = 0;
if (event->header.type != PERF_RECORD_SAMPLE) { if (event->header.type != PERF_RECORD_SAMPLE) {
if (!evsel->attr.sample_id_all) if (!evsel->attr.sample_id_all)
...@@ -1167,6 +1174,18 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, ...@@ -1167,6 +1174,18 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
} }
} }
data->weight = 0;
if (type & PERF_SAMPLE_WEIGHT) {
data->weight = *array;
array++;
}
data->data_src = PERF_MEM_DATA_SRC_NONE;
if (type & PERF_SAMPLE_DATA_SRC) {
data->data_src = *array;
array++;
}
return 0; return 0;
} }
......
#define _FILE_OFFSET_BITS 64
#include "util.h" #include "util.h"
#include <sys/types.h> #include <sys/types.h>
#include <byteswap.h> #include <byteswap.h>
...@@ -1672,8 +1670,8 @@ static int process_tracing_data(struct perf_file_section *section __maybe_unused ...@@ -1672,8 +1670,8 @@ static int process_tracing_data(struct perf_file_section *section __maybe_unused
struct perf_header *ph __maybe_unused, struct perf_header *ph __maybe_unused,
int fd, void *data) int fd, void *data)
{ {
trace_report(fd, data, false); ssize_t ret = trace_report(fd, data, false);
return 0; return ret < 0 ? -1 : 0;
} }
static int process_build_id(struct perf_file_section *section, static int process_build_id(struct perf_file_section *section,
...@@ -2752,6 +2750,11 @@ static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel, ...@@ -2752,6 +2750,11 @@ static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
if (evsel->tp_format) if (evsel->tp_format)
return 0; return 0;
if (pevent == NULL) {
pr_debug("broken or missing trace data\n");
return -1;
}
event = pevent_find_event(pevent, evsel->attr.config); event = pevent_find_event(pevent, evsel->attr.config);
if (event == NULL) if (event == NULL)
return -1; return -1;
......
...@@ -67,12 +67,16 @@ static void hists__set_unres_dso_col_len(struct hists *hists, int dso) ...@@ -67,12 +67,16 @@ static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
void hists__calc_col_len(struct hists *hists, struct hist_entry *h) void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
{ {
const unsigned int unresolved_col_width = BITS_PER_LONG / 4; const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
int symlen;
u16 len; u16 len;
if (h->ms.sym) if (h->ms.sym)
hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4);
else else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO); hists__set_unres_dso_col_len(hists, HISTC_DSO);
}
len = thread__comm_len(h->thread); len = thread__comm_len(h->thread);
if (hists__new_col_len(hists, HISTC_COMM, len)) if (hists__new_col_len(hists, HISTC_COMM, len))
...@@ -87,7 +91,6 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) ...@@ -87,7 +91,6 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen); hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
if (h->branch_info) { if (h->branch_info) {
int symlen;
/* /*
* +4 accounts for '[x] ' priv level info * +4 accounts for '[x] ' priv level info
* +2 account of 0x prefix on raw addresses * +2 account of 0x prefix on raw addresses
...@@ -116,6 +119,42 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) ...@@ -116,6 +119,42 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
} }
} }
if (h->mem_info) {
/*
* +4 accounts for '[x] ' priv level info
* +2 account of 0x prefix on raw addresses
*/
if (h->mem_info->daddr.sym) {
symlen = (int)h->mem_info->daddr.sym->namelen + 4
+ unresolved_col_width + 2;
hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
symlen);
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
symlen);
}
if (h->mem_info->daddr.map) {
symlen = dso__name_len(h->mem_info->daddr.map->dso);
hists__new_col_len(hists, HISTC_MEM_DADDR_DSO,
symlen);
} else {
symlen = unresolved_col_width + 4 + 2;
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}
hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
hists__new_col_len(hists, HISTC_MEM_TLB, 22);
hists__new_col_len(hists, HISTC_MEM_SNOOP, 12);
hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
} }
void hists__output_recalc_col_len(struct hists *hists, int max_rows) void hists__output_recalc_col_len(struct hists *hists, int max_rows)
...@@ -155,9 +194,12 @@ static void hist_entry__add_cpumode_period(struct hist_entry *he, ...@@ -155,9 +194,12 @@ static void hist_entry__add_cpumode_period(struct hist_entry *he,
} }
} }
static void he_stat__add_period(struct he_stat *he_stat, u64 period) static void he_stat__add_period(struct he_stat *he_stat, u64 period,
u64 weight)
{ {
he_stat->period += period; he_stat->period += period;
he_stat->weight += weight;
he_stat->nr_events += 1; he_stat->nr_events += 1;
} }
...@@ -169,12 +211,14 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) ...@@ -169,12 +211,14 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->period_guest_sys += src->period_guest_sys; dest->period_guest_sys += src->period_guest_sys;
dest->period_guest_us += src->period_guest_us; dest->period_guest_us += src->period_guest_us;
dest->nr_events += src->nr_events; dest->nr_events += src->nr_events;
dest->weight += src->weight;
} }
static void hist_entry__decay(struct hist_entry *he) static void hist_entry__decay(struct hist_entry *he)
{ {
he->stat.period = (he->stat.period * 7) / 8; he->stat.period = (he->stat.period * 7) / 8;
he->stat.nr_events = (he->stat.nr_events * 7) / 8; he->stat.nr_events = (he->stat.nr_events * 7) / 8;
/* XXX need decay for weight too? */
} }
static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
...@@ -239,7 +283,7 @@ void hists__decay_entries_threaded(struct hists *hists, ...@@ -239,7 +283,7 @@ void hists__decay_entries_threaded(struct hists *hists,
static struct hist_entry *hist_entry__new(struct hist_entry *template) static struct hist_entry *hist_entry__new(struct hist_entry *template)
{ {
size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
struct hist_entry *he = malloc(sizeof(*he) + callchain_size); struct hist_entry *he = zalloc(sizeof(*he) + callchain_size);
if (he != NULL) { if (he != NULL) {
*he = *template; *he = *template;
...@@ -254,6 +298,13 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) ...@@ -254,6 +298,13 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
he->branch_info->to.map->referenced = true; he->branch_info->to.map->referenced = true;
} }
if (he->mem_info) {
if (he->mem_info->iaddr.map)
he->mem_info->iaddr.map->referenced = true;
if (he->mem_info->daddr.map)
he->mem_info->daddr.map->referenced = true;
}
if (symbol_conf.use_callchain) if (symbol_conf.use_callchain)
callchain_init(he->callchain); callchain_init(he->callchain);
...@@ -282,7 +333,8 @@ static u8 symbol__parent_filter(const struct symbol *parent) ...@@ -282,7 +333,8 @@ static u8 symbol__parent_filter(const struct symbol *parent)
static struct hist_entry *add_hist_entry(struct hists *hists, static struct hist_entry *add_hist_entry(struct hists *hists,
struct hist_entry *entry, struct hist_entry *entry,
struct addr_location *al, struct addr_location *al,
u64 period) u64 period,
u64 weight)
{ {
struct rb_node **p; struct rb_node **p;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
...@@ -306,7 +358,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists, ...@@ -306,7 +358,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
cmp = hist_entry__cmp(he, entry); cmp = hist_entry__cmp(he, entry);
if (!cmp) { if (!cmp) {
he_stat__add_period(&he->stat, period); he_stat__add_period(&he->stat, period, weight);
/* If the map of an existing hist_entry has /* If the map of an existing hist_entry has
* become out-of-date due to an exec() or * become out-of-date due to an exec() or
...@@ -341,11 +393,42 @@ static struct hist_entry *add_hist_entry(struct hists *hists, ...@@ -341,11 +393,42 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
return he; return he;
} }
struct hist_entry *__hists__add_mem_entry(struct hists *self,
struct addr_location *al,
struct symbol *sym_parent,
struct mem_info *mi,
u64 period,
u64 weight)
{
struct hist_entry entry = {
.thread = al->thread,
.ms = {
.map = al->map,
.sym = al->sym,
},
.stat = {
.period = period,
.weight = weight,
.nr_events = 1,
},
.cpu = al->cpu,
.ip = al->addr,
.level = al->level,
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent),
.hists = self,
.mem_info = mi,
.branch_info = NULL,
};
return add_hist_entry(self, &entry, al, period, weight);
}
struct hist_entry *__hists__add_branch_entry(struct hists *self, struct hist_entry *__hists__add_branch_entry(struct hists *self,
struct addr_location *al, struct addr_location *al,
struct symbol *sym_parent, struct symbol *sym_parent,
struct branch_info *bi, struct branch_info *bi,
u64 period) u64 period,
u64 weight)
{ {
struct hist_entry entry = { struct hist_entry entry = {
.thread = al->thread, .thread = al->thread,
...@@ -359,19 +442,22 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self, ...@@ -359,19 +442,22 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
.stat = { .stat = {
.period = period, .period = period,
.nr_events = 1, .nr_events = 1,
.weight = weight,
}, },
.parent = sym_parent, .parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent), .filtered = symbol__parent_filter(sym_parent),
.branch_info = bi, .branch_info = bi,
.hists = self, .hists = self,
.mem_info = NULL,
}; };
return add_hist_entry(self, &entry, al, period); return add_hist_entry(self, &entry, al, period, weight);
} }
struct hist_entry *__hists__add_entry(struct hists *self, struct hist_entry *__hists__add_entry(struct hists *self,
struct addr_location *al, struct addr_location *al,
struct symbol *sym_parent, u64 period) struct symbol *sym_parent, u64 period,
u64 weight)
{ {
struct hist_entry entry = { struct hist_entry entry = {
.thread = al->thread, .thread = al->thread,
...@@ -385,13 +471,16 @@ struct hist_entry *__hists__add_entry(struct hists *self, ...@@ -385,13 +471,16 @@ struct hist_entry *__hists__add_entry(struct hists *self,
.stat = { .stat = {
.period = period, .period = period,
.nr_events = 1, .nr_events = 1,
.weight = weight,
}, },
.parent = sym_parent, .parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent), .filtered = symbol__parent_filter(sym_parent),
.hists = self, .hists = self,
.branch_info = NULL,
.mem_info = NULL,
}; };
return add_hist_entry(self, &entry, al, period); return add_hist_entry(self, &entry, al, period, weight);
} }
int64_t int64_t
...@@ -431,6 +520,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) ...@@ -431,6 +520,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
void hist_entry__free(struct hist_entry *he) void hist_entry__free(struct hist_entry *he)
{ {
free(he->branch_info); free(he->branch_info);
free(he->mem_info);
free(he); free(he);
} }
......
...@@ -49,6 +49,14 @@ enum hist_column { ...@@ -49,6 +49,14 @@ enum hist_column {
HISTC_DSO_FROM, HISTC_DSO_FROM,
HISTC_DSO_TO, HISTC_DSO_TO,
HISTC_SRCLINE, HISTC_SRCLINE,
HISTC_LOCAL_WEIGHT,
HISTC_GLOBAL_WEIGHT,
HISTC_MEM_DADDR_SYMBOL,
HISTC_MEM_DADDR_DSO,
HISTC_MEM_LOCKED,
HISTC_MEM_TLB,
HISTC_MEM_LVL,
HISTC_MEM_SNOOP,
HISTC_NR_COLS, /* Last entry */ HISTC_NR_COLS, /* Last entry */
}; };
...@@ -73,7 +81,8 @@ struct hists { ...@@ -73,7 +81,8 @@ struct hists {
struct hist_entry *__hists__add_entry(struct hists *self, struct hist_entry *__hists__add_entry(struct hists *self,
struct addr_location *al, struct addr_location *al,
struct symbol *parent, u64 period); struct symbol *parent, u64 period,
u64 weight);
int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size, int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size,
...@@ -84,7 +93,15 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self, ...@@ -84,7 +93,15 @@ struct hist_entry *__hists__add_branch_entry(struct hists *self,
struct addr_location *al, struct addr_location *al,
struct symbol *sym_parent, struct symbol *sym_parent,
struct branch_info *bi, struct branch_info *bi,
u64 period); u64 period,
u64 weight);
struct hist_entry *__hists__add_mem_entry(struct hists *self,
struct addr_location *al,
struct symbol *sym_parent,
struct mem_info *mi,
u64 period,
u64 weight);
void hists__output_resort(struct hists *self); void hists__output_resort(struct hists *self);
void hists__output_resort_threaded(struct hists *hists); void hists__output_resort_threaded(struct hists *hists);
...@@ -175,7 +192,7 @@ struct hist_browser_timer { ...@@ -175,7 +192,7 @@ struct hist_browser_timer {
int refresh; int refresh;
}; };
#ifdef NEWT_SUPPORT #ifdef SLANG_SUPPORT
#include "../ui/keysyms.h" #include "../ui/keysyms.h"
int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
struct hist_browser_timer *hbt); struct hist_browser_timer *hbt);
......
...@@ -955,6 +955,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event ...@@ -955,6 +955,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
struct thread *thread; struct thread *thread;
struct map *map; struct map *map;
enum map_type type;
int ret = 0; int ret = 0;
if (dump_trace) if (dump_trace)
...@@ -971,10 +972,17 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event ...@@ -971,10 +972,17 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
thread = machine__findnew_thread(machine, event->mmap.pid); thread = machine__findnew_thread(machine, event->mmap.pid);
if (thread == NULL) if (thread == NULL)
goto out_problem; goto out_problem;
if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
type = MAP__VARIABLE;
else
type = MAP__FUNCTION;
map = map__new(&machine->user_dsos, event->mmap.start, map = map__new(&machine->user_dsos, event->mmap.start,
event->mmap.len, event->mmap.pgoff, event->mmap.len, event->mmap.pgoff,
event->mmap.pid, event->mmap.filename, event->mmap.pid, event->mmap.filename,
MAP__FUNCTION); type);
if (map == NULL) if (map == NULL)
goto out_problem; goto out_problem;
...@@ -1097,6 +1105,38 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread, ...@@ -1097,6 +1105,38 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread,
ams->map = al.map; ams->map = al.map;
} }
static void ip__resolve_data(struct machine *machine, struct thread *thread,
u8 m, struct addr_map_symbol *ams, u64 addr)
{
struct addr_location al;
memset(&al, 0, sizeof(al));
thread__find_addr_location(thread, machine, m, MAP__VARIABLE, addr, &al,
NULL);
ams->addr = addr;
ams->al_addr = al.addr;
ams->sym = al.sym;
ams->map = al.map;
}
struct mem_info *machine__resolve_mem(struct machine *machine,
struct thread *thr,
struct perf_sample *sample,
u8 cpumode)
{
struct mem_info *mi = zalloc(sizeof(*mi));
if (!mi)
return NULL;
ip__resolve_ams(machine, thr, &mi->iaddr, sample->ip);
ip__resolve_data(machine, thr, cpumode, &mi->daddr, sample->addr);
mi->data_src.val = sample->data_src;
return mi;
}
struct branch_info *machine__resolve_bstack(struct machine *machine, struct branch_info *machine__resolve_bstack(struct machine *machine,
struct thread *thr, struct thread *thr,
struct branch_stack *bs) struct branch_stack *bs)
......
...@@ -76,6 +76,9 @@ void machine__delete(struct machine *machine); ...@@ -76,6 +76,9 @@ void machine__delete(struct machine *machine);
struct branch_info *machine__resolve_bstack(struct machine *machine, struct branch_info *machine__resolve_bstack(struct machine *machine,
struct thread *thread, struct thread *thread,
struct branch_stack *bs); struct branch_stack *bs);
struct mem_info *machine__resolve_mem(struct machine *machine,
struct thread *thread,
struct perf_sample *sample, u8 cpumode);
int machine__resolve_callchain(struct machine *machine, int machine__resolve_callchain(struct machine *machine,
struct perf_evsel *evsel, struct perf_evsel *evsel,
struct thread *thread, struct thread *thread,
......
#define _FILE_OFFSET_BITS 64
#include <linux/kernel.h> #include <linux/kernel.h>
#include <byteswap.h> #include <byteswap.h>
...@@ -800,6 +798,12 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, ...@@ -800,6 +798,12 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_STACK_USER) if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack); stack_user__printf(&sample->user_stack);
if (sample_type & PERF_SAMPLE_WEIGHT)
printf("... weight: %" PRIu64 "\n", sample->weight);
if (sample_type & PERF_SAMPLE_DATA_SRC)
printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
} }
static struct machine * static struct machine *
......
This diff is collapsed.
...@@ -49,6 +49,7 @@ struct he_stat { ...@@ -49,6 +49,7 @@ struct he_stat {
u64 period_us; u64 period_us;
u64 period_guest_sys; u64 period_guest_sys;
u64 period_guest_us; u64 period_guest_us;
u64 weight;
u32 nr_events; u32 nr_events;
}; };
...@@ -100,7 +101,8 @@ struct hist_entry { ...@@ -100,7 +101,8 @@ struct hist_entry {
struct rb_root sorted_chain; struct rb_root sorted_chain;
struct branch_info *branch_info; struct branch_info *branch_info;
struct hists *hists; struct hists *hists;
struct callchain_root callchain[0]; struct mem_info *mem_info;
struct callchain_root callchain[0]; /* must be last member */
}; };
static inline bool hist_entry__has_pairs(struct hist_entry *he) static inline bool hist_entry__has_pairs(struct hist_entry *he)
...@@ -130,6 +132,14 @@ enum sort_type { ...@@ -130,6 +132,14 @@ enum sort_type {
SORT_PARENT, SORT_PARENT,
SORT_CPU, SORT_CPU,
SORT_SRCLINE, SORT_SRCLINE,
SORT_LOCAL_WEIGHT,
SORT_GLOBAL_WEIGHT,
SORT_MEM_DADDR_SYMBOL,
SORT_MEM_DADDR_DSO,
SORT_MEM_LOCKED,
SORT_MEM_TLB,
SORT_MEM_LVL,
SORT_MEM_SNOOP,
/* branch stack specific sort keys */ /* branch stack specific sort keys */
__SORT_BRANCH_STACK, __SORT_BRANCH_STACK,
......
...@@ -806,9 +806,12 @@ int dso__load_sym(struct dso *dso, struct map *map, ...@@ -806,9 +806,12 @@ int dso__load_sym(struct dso *dso, struct map *map,
* DWARF DW_compile_unit has this, but we don't always have access * DWARF DW_compile_unit has this, but we don't always have access
* to it... * to it...
*/ */
demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); if (symbol_conf.demangle) {
demangled = bfd_demangle(NULL, elf_name,
DMGL_PARAMS | DMGL_ANSI);
if (demangled != NULL) if (demangled != NULL)
elf_name = demangled; elf_name = demangled;
}
new_symbol: new_symbol:
f = symbol__new(sym.st_value, sym.st_size, f = symbol__new(sym.st_value, sym.st_size,
GELF_ST_BIND(sym.st_info), elf_name); GELF_ST_BIND(sym.st_info), elf_name);
......
...@@ -36,6 +36,7 @@ struct symbol_conf symbol_conf = { ...@@ -36,6 +36,7 @@ struct symbol_conf symbol_conf = {
.use_modules = true, .use_modules = true,
.try_vmlinux_path = true, .try_vmlinux_path = true,
.annotate_src = true, .annotate_src = true,
.demangle = true,
.symfs = "", .symfs = "",
}; };
......
...@@ -97,7 +97,8 @@ struct symbol_conf { ...@@ -97,7 +97,8 @@ struct symbol_conf {
kptr_restrict, kptr_restrict,
annotate_asm_raw, annotate_asm_raw,
annotate_src, annotate_src,
event_group; event_group,
demangle;
const char *vmlinux_name, const char *vmlinux_name,
*kallsyms_name, *kallsyms_name,
*source_prefix, *source_prefix,
...@@ -155,6 +156,12 @@ struct branch_info { ...@@ -155,6 +156,12 @@ struct branch_info {
struct branch_flags flags; struct branch_flags flags;
}; };
struct mem_info {
struct addr_map_symbol iaddr;
struct addr_map_symbol daddr;
union perf_mem_data_src data_src;
};
struct addr_location { struct addr_location {
struct thread *thread; struct thread *thread;
struct map *map; struct map *map;
......
This diff is collapsed.
...@@ -18,8 +18,6 @@ ...@@ -18,8 +18,6 @@
* *
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/ */
#define _FILE_OFFSET_BITS 64
#include <dirent.h> #include <dirent.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
...@@ -45,20 +43,10 @@ int file_bigendian; ...@@ -45,20 +43,10 @@ int file_bigendian;
int host_bigendian; int host_bigendian;
static int long_size; static int long_size;
static ssize_t calc_data_size; static ssize_t trace_data_size;
static bool repipe; static bool repipe;
static void *malloc_or_die(int size) static int __do_read(int fd, void *buf, int size)
{
void *ret;
ret = malloc(size);
if (!ret)
die("malloc");
return ret;
}
static int do_read(int fd, void *buf, int size)
{ {
int rsize = size; int rsize = size;
...@@ -71,8 +59,10 @@ static int do_read(int fd, void *buf, int size) ...@@ -71,8 +59,10 @@ static int do_read(int fd, void *buf, int size)
if (repipe) { if (repipe) {
int retw = write(STDOUT_FILENO, buf, ret); int retw = write(STDOUT_FILENO, buf, ret);
if (retw <= 0 || retw != ret) if (retw <= 0 || retw != ret) {
die("repiping input file"); pr_debug("repiping input file");
return -1;
}
} }
size -= ret; size -= ret;
...@@ -82,17 +72,18 @@ static int do_read(int fd, void *buf, int size) ...@@ -82,17 +72,18 @@ static int do_read(int fd, void *buf, int size)
return rsize; return rsize;
} }
static int read_or_die(void *data, int size) static int do_read(void *data, int size)
{ {
int r; int r;
r = do_read(input_fd, data, size); r = __do_read(input_fd, data, size);
if (r <= 0) if (r <= 0) {
die("reading input file (size expected=%d received=%d)", pr_debug("reading input file (size expected=%d received=%d)",
size, r); size, r);
return -1;
}
if (calc_data_size) trace_data_size += r;
calc_data_size += r;
return r; return r;
} }
...@@ -105,7 +96,7 @@ static void skip(int size) ...@@ -105,7 +96,7 @@ static void skip(int size)
while (size) { while (size) {
r = size > BUFSIZ ? BUFSIZ : size; r = size > BUFSIZ ? BUFSIZ : size;
read_or_die(buf, r); do_read(buf, r);
size -= r; size -= r;
}; };
} }
...@@ -114,7 +105,8 @@ static unsigned int read4(struct pevent *pevent) ...@@ -114,7 +105,8 @@ static unsigned int read4(struct pevent *pevent)
{ {
unsigned int data; unsigned int data;
read_or_die(&data, 4); if (do_read(&data, 4) < 0)
return 0;
return __data2host4(pevent, data); return __data2host4(pevent, data);
} }
...@@ -122,7 +114,8 @@ static unsigned long long read8(struct pevent *pevent) ...@@ -122,7 +114,8 @@ static unsigned long long read8(struct pevent *pevent)
{ {
unsigned long long data; unsigned long long data;
read_or_die(&data, 8); if (do_read(&data, 8) < 0)
return 0;
return __data2host8(pevent, data); return __data2host8(pevent, data);
} }
...@@ -136,17 +129,23 @@ static char *read_string(void) ...@@ -136,17 +129,23 @@ static char *read_string(void)
for (;;) { for (;;) {
r = read(input_fd, &c, 1); r = read(input_fd, &c, 1);
if (r < 0) if (r < 0) {
die("reading input file"); pr_debug("reading input file");
goto out;
}
if (!r) if (!r) {
die("no data"); pr_debug("no data");
goto out;
}
if (repipe) { if (repipe) {
int retw = write(STDOUT_FILENO, &c, 1); int retw = write(STDOUT_FILENO, &c, 1);
if (retw <= 0 || retw != r) if (retw <= 0 || retw != r) {
die("repiping input file string"); pr_debug("repiping input file string");
goto out;
}
} }
buf[size++] = c; buf[size++] = c;
...@@ -155,60 +154,79 @@ static char *read_string(void) ...@@ -155,60 +154,79 @@ static char *read_string(void)
break; break;
} }
if (calc_data_size) trace_data_size += size;
calc_data_size += size;
str = malloc_or_die(size); str = malloc(size);
if (str)
memcpy(str, buf, size); memcpy(str, buf, size);
out:
return str; return str;
} }
static void read_proc_kallsyms(struct pevent *pevent) static int read_proc_kallsyms(struct pevent *pevent)
{ {
unsigned int size; unsigned int size;
char *buf; char *buf;
size = read4(pevent); size = read4(pevent);
if (!size) if (!size)
return; return 0;
buf = malloc_or_die(size + 1); buf = malloc(size + 1);
read_or_die(buf, size); if (buf == NULL)
return -1;
if (do_read(buf, size) < 0) {
free(buf);
return -1;
}
buf[size] = '\0'; buf[size] = '\0';
parse_proc_kallsyms(pevent, buf, size); parse_proc_kallsyms(pevent, buf, size);
free(buf); free(buf);
return 0;
} }
static void read_ftrace_printk(struct pevent *pevent) static int read_ftrace_printk(struct pevent *pevent)
{ {
unsigned int size; unsigned int size;
char *buf; char *buf;
/* it can have 0 size */
size = read4(pevent); size = read4(pevent);
if (!size) if (!size)
return; return 0;
buf = malloc_or_die(size); buf = malloc(size);
read_or_die(buf, size); if (buf == NULL)
return -1;
if (do_read(buf, size) < 0) {
free(buf);
return -1;
}
parse_ftrace_printk(pevent, buf, size); parse_ftrace_printk(pevent, buf, size);
free(buf); free(buf);
return 0;
} }
static void read_header_files(struct pevent *pevent) static int read_header_files(struct pevent *pevent)
{ {
unsigned long long size; unsigned long long size;
char *header_event; char *header_event;
char buf[BUFSIZ]; char buf[BUFSIZ];
int ret = 0;
read_or_die(buf, 12); if (do_read(buf, 12) < 0)
return -1;
if (memcmp(buf, "header_page", 12) != 0) if (memcmp(buf, "header_page", 12) != 0) {
die("did not read header page"); pr_debug("did not read header page");
return -1;
}
size = read8(pevent); size = read8(pevent);
skip(size); skip(size);
...@@ -219,70 +237,107 @@ static void read_header_files(struct pevent *pevent) ...@@ -219,70 +237,107 @@ static void read_header_files(struct pevent *pevent)
*/ */
long_size = header_page_size_size; long_size = header_page_size_size;
read_or_die(buf, 13); if (do_read(buf, 13) < 0)
if (memcmp(buf, "header_event", 13) != 0) return -1;
die("did not read header event");
if (memcmp(buf, "header_event", 13) != 0) {
pr_debug("did not read header event");
return -1;
}
size = read8(pevent); size = read8(pevent);
header_event = malloc_or_die(size); header_event = malloc(size);
read_or_die(header_event, size); if (header_event == NULL)
return -1;
if (do_read(header_event, size) < 0)
ret = -1;
free(header_event); free(header_event);
return ret;
} }
static void read_ftrace_file(struct pevent *pevent, unsigned long long size) static int read_ftrace_file(struct pevent *pevent, unsigned long long size)
{ {
char *buf; char *buf;
buf = malloc_or_die(size); buf = malloc(size);
read_or_die(buf, size); if (buf == NULL)
return -1;
if (do_read(buf, size) < 0) {
free(buf);
return -1;
}
parse_ftrace_file(pevent, buf, size); parse_ftrace_file(pevent, buf, size);
free(buf); free(buf);
return 0;
} }
static void read_event_file(struct pevent *pevent, char *sys, static int read_event_file(struct pevent *pevent, char *sys,
unsigned long long size) unsigned long long size)
{ {
char *buf; char *buf;
buf = malloc_or_die(size); buf = malloc(size);
read_or_die(buf, size); if (buf == NULL)
return -1;
if (do_read(buf, size) < 0) {
free(buf);
return -1;
}
parse_event_file(pevent, buf, size, sys); parse_event_file(pevent, buf, size, sys);
free(buf); free(buf);
return 0;
} }
static void read_ftrace_files(struct pevent *pevent) static int read_ftrace_files(struct pevent *pevent)
{ {
unsigned long long size; unsigned long long size;
int count; int count;
int i; int i;
int ret;
count = read4(pevent); count = read4(pevent);
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
size = read8(pevent); size = read8(pevent);
read_ftrace_file(pevent, size); ret = read_ftrace_file(pevent, size);
if (ret)
return ret;
} }
return 0;
} }
static void read_event_files(struct pevent *pevent) static int read_event_files(struct pevent *pevent)
{ {
unsigned long long size; unsigned long long size;
char *sys; char *sys;
int systems; int systems;
int count; int count;
int i,x; int i,x;
int ret;
systems = read4(pevent); systems = read4(pevent);
for (i = 0; i < systems; i++) { for (i = 0; i < systems; i++) {
sys = read_string(); sys = read_string();
if (sys == NULL)
return -1;
count = read4(pevent); count = read4(pevent);
for (x=0; x < count; x++) { for (x=0; x < count; x++) {
size = read8(pevent); size = read8(pevent);
read_event_file(pevent, sys, size); ret = read_event_file(pevent, sys, size);
if (ret)
return ret;
} }
} }
return 0;
} }
ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe) ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
...@@ -293,58 +348,85 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe) ...@@ -293,58 +348,85 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
int show_version = 0; int show_version = 0;
int show_funcs = 0; int show_funcs = 0;
int show_printk = 0; int show_printk = 0;
ssize_t size; ssize_t size = -1;
struct pevent *pevent;
int err;
calc_data_size = 1; *ppevent = NULL;
repipe = __repipe;
repipe = __repipe;
input_fd = fd; input_fd = fd;
read_or_die(buf, 3); if (do_read(buf, 3) < 0)
if (memcmp(buf, test, 3) != 0) return -1;
die("no trace data in the file"); if (memcmp(buf, test, 3) != 0) {
pr_debug("no trace data in the file");
return -1;
}
read_or_die(buf, 7); if (do_read(buf, 7) < 0)
if (memcmp(buf, "tracing", 7) != 0) return -1;
die("not a trace file (missing 'tracing' tag)"); if (memcmp(buf, "tracing", 7) != 0) {
pr_debug("not a trace file (missing 'tracing' tag)");
return -1;
}
version = read_string(); version = read_string();
if (version == NULL)
return -1;
if (show_version) if (show_version)
printf("version = %s\n", version); printf("version = %s\n", version);
free(version); free(version);
read_or_die(buf, 1); if (do_read(buf, 1) < 0)
return -1;
file_bigendian = buf[0]; file_bigendian = buf[0];
host_bigendian = bigendian(); host_bigendian = bigendian();
*ppevent = read_trace_init(file_bigendian, host_bigendian); pevent = read_trace_init(file_bigendian, host_bigendian);
if (*ppevent == NULL) if (pevent == NULL) {
die("read_trace_init failed"); pr_debug("read_trace_init failed");
goto out;
}
read_or_die(buf, 1); if (do_read(buf, 1) < 0)
goto out;
long_size = buf[0]; long_size = buf[0];
page_size = read4(*ppevent); page_size = read4(pevent);
if (!page_size)
read_header_files(*ppevent); goto out;
read_ftrace_files(*ppevent); err = read_header_files(pevent);
read_event_files(*ppevent); if (err)
read_proc_kallsyms(*ppevent); goto out;
read_ftrace_printk(*ppevent); err = read_ftrace_files(pevent);
if (err)
size = calc_data_size - 1; goto out;
calc_data_size = 0; err = read_event_files(pevent);
if (err)
goto out;
err = read_proc_kallsyms(pevent);
if (err)
goto out;
err = read_ftrace_printk(pevent);
if (err)
goto out;
size = trace_data_size;
repipe = false; repipe = false;
if (show_funcs) { if (show_funcs) {
pevent_print_funcs(*ppevent); pevent_print_funcs(pevent);
return size; } else if (show_printk) {
} pevent_print_printk(pevent);
if (show_printk) {
pevent_print_printk(*ppevent);
return size;
} }
*ppevent = pevent;
pevent = NULL;
out:
if (pevent)
pevent_free(pevent);
return size; return size;
} }
...@@ -68,7 +68,7 @@ struct tracing_data { ...@@ -68,7 +68,7 @@ struct tracing_data {
struct tracing_data *tracing_data_get(struct list_head *pattrs, struct tracing_data *tracing_data_get(struct list_head *pattrs,
int fd, bool temp); int fd, bool temp);
void tracing_data_put(struct tracing_data *tdata); int tracing_data_put(struct tracing_data *tdata);
struct addr_location; struct addr_location;
......
#ifndef GIT_COMPAT_UTIL_H #ifndef GIT_COMPAT_UTIL_H
#define GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H
#define _FILE_OFFSET_BITS 64
#ifndef FLEX_ARRAY #ifndef FLEX_ARRAY
/* /*
* See if our compiler is known to support flexible array members. * See if our compiler is known to support flexible array members.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment