Commit 1836ac85 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-3' of...

Merge tag 'perf-core-for-mingo-3' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - Improve --filter support for 'perf probe', allowing using its arguments
    on other commands, as --add, --del, etc (Masami Hiramatsu)

  - Show warning when running 'perf kmem stat' on a unsuitable perf.data file,
    i.e. one with events that are not the ones required for the stat variant
    used (Namhyung Kim).

Infrastructure changes:

  - Auxtrace support patches, paving the way to support Intel PT and BTS (Adrian Hunter)

  - hists browser (top, report) refactorings (Namhyung Kim)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 5ebe6afa 3698dab1
......@@ -215,7 +215,6 @@ VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD))
export srctree objtree VPATH
# SUBARCH tells the usermode build what the underlying arch is. That is set
# first, and if a usermode build is happening, the "ARCH=um" on the command
# line overrides the setting of ARCH below. If a native build is happening,
......@@ -1497,11 +1496,11 @@ image_name:
# Clear a bunch of variables before executing the submake
tools/: FORCE
$(Q)mkdir -p $(objtree)/tools
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/
tools/%: FORCE
$(Q)mkdir -p $(objtree)/tools
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ $*
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/ $*
# Single targets
# ---------------------------------------------------------------------------
......
# Some of the tools (perf) use same make variables
# as in kernel build.
export srctree=
export objtree=
include scripts/Makefile.include
help:
......@@ -47,11 +52,16 @@ cgroup firewire hv guest usb virtio vm net: FORCE
liblockdep: FORCE
$(call descend,lib/lockdep)
libapikfs: FORCE
libapi: FORCE
$(call descend,lib/api)
perf: libapikfs FORCE
$(call descend,$@)
# The perf build does not follow the descend function setup,
# invoking it via it's own make rule.
PERF_O = $(if $(O),$(O)/tools/perf,)
perf: FORCE
$(Q)mkdir -p $(PERF_O) .
$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir=
selftests: FORCE
$(call descend,testing/$@)
......@@ -97,10 +107,10 @@ cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clea
liblockdep_clean:
$(call descend,lib/lockdep,clean)
libapikfs_clean:
libapi_clean:
$(call descend,lib/api,clean)
perf_clean: libapikfs_clean
perf_clean:
$(call descend,$(@:_clean=),clean)
selftests_clean:
......
......@@ -1387,7 +1387,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
do_warning_event(event, "%s: no type found", __func__);
goto fail;
}
field->name = last_token;
field->name = field->alias = last_token;
if (test_type(type, EVENT_OP))
goto fail;
......@@ -1469,7 +1469,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
size_dynamic = type_size(field->name);
free_token(field->name);
strcat(field->type, brackets);
field->name = token;
field->name = field->alias = token;
type = read_token(&token);
} else {
char *new_type;
......@@ -6444,6 +6444,8 @@ void pevent_ref(struct pevent *pevent)
void pevent_free_format_field(struct format_field *field)
{
free(field->type);
if (field->alias != field->name)
free(field->alias);
free(field->name);
free(field);
}
......
......@@ -191,6 +191,7 @@ struct format_field {
struct event_format *event;
char *type;
char *name;
char *alias;
int offset;
int size;
unsigned int arraylen;
......
Overhead calculation
--------------------
The overhead can be shown in two columns as 'Children' and 'Self' when
perf collects callchains. The 'self' overhead is simply calculated by
adding all period values of the entry - usually a function (symbol).
This is the value that perf shows traditionally and sum of all the
'self' overhead values should be 100%.
The 'children' overhead is calculated by adding all period values of
the child functions so that it can show the total overhead of the
higher level functions even if they don't directly execute much.
'Children' here means functions that are called from another (parent)
function.
It might be confusing that the sum of all the 'children' overhead
values exceeds 100% since each of them is already an accumulation of
'self' overhead of its child functions. But with this enabled, users
can find which function has the most overhead even if samples are
spread over the children.
Consider the following example; there are three functions like below.
-----------------------
void foo(void) {
/* do something */
}
void bar(void) {
/* do something */
foo();
}
int main(void) {
bar()
return 0;
}
-----------------------
In this case 'foo' is a child of 'bar', and 'bar' is an immediate
child of 'main' so 'foo' also is a child of 'main'. In other words,
'main' is a parent of 'foo' and 'bar', and 'bar' is a parent of 'foo'.
Suppose all samples are recorded in 'foo' and 'bar' only. When it's
recorded with callchains the output will show something like below
in the usual (self-overhead-only) output of perf report:
----------------------------------
Overhead Symbol
........ .....................
60.00% foo
|
--- foo
bar
main
__libc_start_main
40.00% bar
|
--- bar
main
__libc_start_main
----------------------------------
When the --children option is enabled, the 'self' overhead values of
child functions (i.e. 'foo' and 'bar') are added to the parents to
calculate the 'children' overhead. In this case the report could be
displayed as:
-------------------------------------------
Children Self Symbol
........ ........ ....................
100.00% 0.00% __libc_start_main
|
--- __libc_start_main
100.00% 0.00% main
|
--- main
__libc_start_main
100.00% 40.00% bar
|
--- bar
main
__libc_start_main
60.00% 60.00% foo
|
--- foo
bar
main
__libc_start_main
-------------------------------------------
In the above output, the 'self' overhead of 'foo' (60%) was add to the
'children' overhead of 'bar', 'main' and '\_\_libc_start_main'.
Likewise, the 'self' overhead of 'bar' (40%) was added to the
'children' overhead of 'main' and '\_\_libc_start_main'.
So '\_\_libc_start_main' and 'main' are shown first since they have
same (100%) 'children' overhead (even though they have zero 'self'
overhead) and they are the parents of 'foo' and 'bar'.
Since v3.16 the 'children' overhead is shown by default and the output
is sorted by its values. The 'children' overhead is disabled by
specifying --no-children option on the command line or by adding
'report.children = false' or 'top.children = false' in the perf config
file.
......@@ -44,6 +44,33 @@ OPTIONS
--kallsyms=<file>::
kallsyms pathname
--itrace::
Decode Instruction Tracing data, replacing it with synthesized events.
Options are:
i synthesize instructions events
b synthesize branches events
c synthesize branches events (calls only)
r synthesize branches events (returns only)
x synthesize transactions events
e synthesize error events
d create a debug log
g synthesize a call chain (use with i or x)
The default is all events i.e. the same as --itrace=ibxe
In addition, the period (default 100000) for instructions events
can be specified in units of:
i instructions
t ticks
ms milliseconds
us microseconds
ns nanoseconds (default)
Also the call chain size (default 16, max. 1024) for instructions or
transactions events can be specified.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
......@@ -37,7 +37,11 @@ OPTIONS
-s <key[,key2...]>::
--sort=<key[,key2...]>::
Sort the output (default: frag,hit,bytes)
Sort the output (default: 'frag,hit,bytes' for slab and 'bytes,hit'
for page). Available sort keys are 'ptr, callsite, bytes, hit,
pingpong, frag' for slab and 'page, callsite, bytes, hit, order,
migtype, gfp' for page. This option should be preceded by one of the
mode selection options - i.e. --slab, --page, --alloc and/or --caller.
-l <num>::
--line=<num>::
......@@ -52,6 +56,11 @@ OPTIONS
--page::
Analyze page allocator events
--live::
Show live page stat. The perf kmem shows total allocation stat by
default, but this option shows live (currently allocated) pages
instead. (This option works with --page option only)
SEE ALSO
--------
linkperf:perf-record[1]
......@@ -14,11 +14,13 @@ or
or
'perf probe' [options] --del='[GROUP:]EVENT' [...]
or
'perf probe' --list
'perf probe' --list[=[GROUP:]EVENT]
or
'perf probe' [options] --line='LINE'
or
'perf probe' [options] --vars='PROBEPOINT'
or
'perf probe' [options] --funcs
DESCRIPTION
-----------
......@@ -64,8 +66,8 @@ OPTIONS
classes(e.g. [a-z], [!A-Z]).
-l::
--list::
List up current probe events.
--list[=[GROUP:]EVENT]::
List up current probe events. This can also accept filtering patterns of event names.
-L::
--line=::
......@@ -82,9 +84,10 @@ OPTIONS
variables.
-F::
--funcs::
--funcs[=FILTER]::
Show available functions in given module or kernel. With -x/--exec,
can also list functions in a user space executable / shared library.
This also can accept a FILTER rule argument.
--filter=FILTER::
(Only for --vars and --funcs) Set filter. FILTER is a combination of glob
......
......@@ -108,6 +108,8 @@ OPTIONS
Number of mmap data pages (must be a power of two) or size
specification with appended unit character - B/K/M/G. The
size is rounded up to have nearest pages power of two value.
Also, by adding a comma, the number of mmap pages for AUX
area tracing can be specified.
--group::
Put all events in a single event group. This precedes the --event
......@@ -257,6 +259,13 @@ records. See clock_gettime(). In particular CLOCK_MONOTONIC and
CLOCK_MONOTONIC_RAW are supported, some events might also allow
CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
-S::
--snapshot::
Select AUX area tracing Snapshot Mode. This option is valid only with an
AUX area tracing event. Optionally the number of bytes to capture per
snapshot can be specified. In Snapshot Mode, trace data is captured only when
signal SIGUSR2 is received.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
......@@ -193,6 +193,7 @@ OPTIONS
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires callchains are recorded.
See the `overhead calculation' section for more details.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
......@@ -323,6 +324,37 @@ OPTIONS
--header-only::
Show only perf.data header (forces --stdio).
--itrace::
Options for decoding instruction tracing data. The options are:
i synthesize instructions events
b synthesize branches events
c synthesize branches events (calls only)
r synthesize branches events (returns only)
x synthesize transactions events
e synthesize error events
d create a debug log
g synthesize a call chain (use with i or x)
The default is all events i.e. the same as --itrace=ibxe
In addition, the period (default 100000) for instructions events
can be specified in units of:
i instructions
t ticks
ms milliseconds
us microseconds
ns nanoseconds (default)
Also the call chain size (default 16, max. 1024) for instructions or
transactions events can be specified.
To disable decoding entirely, use --no-itrace.
include::callchain-overhead-calculation.txt[]
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-annotate[1]
......@@ -115,7 +115,8 @@ OPTIONS
-f::
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period.
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, flags.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
......@@ -165,6 +166,12 @@ OPTIONS
At this point usage is displayed, and perf-script exits.
The flags field is synthesized and may have a value when Instruction
Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
call, return, conditional, system, asynchronous, interrupt,
transaction abort, trace begin, trace end, and in transaction,
respectively.
Finally, a user may not set fields to none for all event types.
i.e., -f "" is not allowed.
......@@ -221,6 +228,34 @@ OPTIONS
--header-only
Show only perf.data header.
--itrace::
Options for decoding instruction tracing data. The options are:
i synthesize instructions events
b synthesize branches events
c synthesize branches events (calls only)
r synthesize branches events (returns only)
x synthesize transactions events
e synthesize error events
d create a debug log
g synthesize a call chain (use with i or x)
The default is all events i.e. the same as --itrace=ibxe
In addition, the period (default 100000) for instructions events
can be specified in units of:
i instructions
t ticks
ms milliseconds
us microseconds
ns nanoseconds (default)
Also the call chain size (default 16, max. 1024) for instructions or
transactions events can be specified.
To disable decoding entirely, use --no-itrace.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
......
......@@ -168,7 +168,7 @@ Default is to monitor all CPUS.
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires -g/--call-graph option
enabled.
enabled. See the `overhead calculation' section for more details.
--max-stack::
Set the stack depth limit when parsing the callchain, anything
......@@ -234,6 +234,7 @@ INTERACTIVE PROMPTING KEYS
Pressing any unmapped key displays a menu, and prompts for input.
include::callchain-overhead-calculation.txt[]
SEE ALSO
--------
......
......@@ -35,7 +35,7 @@ OPTIONS
-e::
--expr::
List of events to show, currently only syscall names.
List of syscalls to show, currently only syscall names.
Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it.
......
......@@ -24,7 +24,7 @@ unexport MAKEFLAGS
# (To override it, run 'make JOBS=1' and similar.)
#
ifeq ($(JOBS),)
JOBS := $(shell egrep -c '^processor|^CPU' /proc/cpuinfo 2>/dev/null)
JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
ifeq ($(JOBS),0)
JOBS := 1
endif
......
......@@ -73,6 +73,8 @@ include config/utilities.mak
# for CTF data format.
#
# Define NO_LZMA if you do not want to support compressed (xz) kernel modules
#
# Define NO_AUXTRACE if you do not want AUX area tracing support
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(shell pwd)))
......
libperf-y += header.o
libperf-y += sym-handling.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* Copyright (C) 2015 Naveen N. Rao, IBM Corporation
*/
#include "debug.h"
#include "symbol.h"
#include "map.h"
#include "probe-event.h"
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
{
return ehdr.e_type == ET_EXEC ||
ehdr.e_type == ET_REL ||
ehdr.e_type == ET_DYN;
}
#if defined(_CALL_ELF) && _CALL_ELF == 2
void arch__elf_sym_adjust(GElf_Sym *sym)
{
sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
}
#endif
#endif
#if !defined(_CALL_ELF) || _CALL_ELF != 2
int arch__choose_best_symbol(struct symbol *syma,
struct symbol *symb __maybe_unused)
{
char *sym = syma->name;
/* Skip over any initial dot */
if (*sym == '.')
sym++;
/* Avoid "SyS" kernel syscall aliases */
if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
return SYMBOL_B;
if (strlen(sym) >= 10 && !strncmp(sym, "compat_SyS", 10))
return SYMBOL_B;
return SYMBOL_A;
}
/* Allow matching against dot variants */
int arch__compare_symbol_names(const char *namea, const char *nameb)
{
/* Skip over initial dot */
if (*namea == '.')
namea++;
if (*nameb == '.')
nameb++;
return strcmp(namea, nameb);
}
#endif
#if defined(_CALL_ELF) && _CALL_ELF == 2
bool arch__prefers_symtab(void)
{
return true;
}
#define PPC64LE_LEP_OFFSET 8
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
struct probe_trace_event *tev, struct map *map)
{
/*
* ppc64 ABIv2 local entry point is currently always 2 instructions
* (8 bytes) after the global entry point.
*/
if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
tev->point.address += PPC64LE_LEP_OFFSET;
tev->point.offset += PPC64LE_LEP_OFFSET;
}
}
#endif
......@@ -23,6 +23,7 @@
#include <pthread.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#include <sys/types.h>
......@@ -51,6 +52,9 @@ struct thread_data {
unsigned int loops_done;
u64 val;
u64 runtime_ns;
u64 system_time_ns;
u64 user_time_ns;
double speed_gbs;
pthread_mutex_t *process_lock;
};
......@@ -1034,6 +1038,7 @@ static void *worker_thread(void *__tdata)
u64 bytes_done;
long work_done;
u32 l;
struct rusage rusage;
bind_to_cpumask(td->bind_cpumask);
bind_to_memnode(td->bind_node);
......@@ -1186,6 +1191,13 @@ static void *worker_thread(void *__tdata)
timersub(&stop, &start0, &diff);
td->runtime_ns = diff.tv_sec * 1000000000ULL;
td->runtime_ns += diff.tv_usec * 1000ULL;
td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
getrusage(RUSAGE_THREAD, &rusage);
td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
free_data(thread_data, g->p.bytes_thread);
......@@ -1412,7 +1424,7 @@ static int __bench_numa(const char *name)
double runtime_sec_min;
int wait_stat;
double bytes;
int i, t;
int i, t, p;
if (init())
return -1;
......@@ -1548,6 +1560,24 @@ static int __bench_numa(const char *name)
print_res(name, bytes / runtime_sec_max / 1e9,
"GB/sec,", "total-speed", "GB/sec total speed");
if (g->p.show_details >= 2) {
char tname[32];
struct thread_data *td;
for (p = 0; p < g->p.nr_proc; p++) {
for (t = 0; t < g->p.nr_threads; t++) {
memset(tname, 0, 32);
td = g->threads + p*g->p.nr_threads + t;
snprintf(tname, 32, "process%d:thread%d", p, t);
print_res(tname, td->speed_gbs,
"GB/sec", "thread-speed", "GB/sec/thread speed");
print_res(tname, td->system_time_ns / 1e9,
"secs", "thread-system-time", "system CPU time/thread");
print_res(tname, td->user_time_ns / 1e9,
"secs", "thread-user-time", "user CPU time/thread");
}
}
}
free(pids);
deinit();
......
......@@ -69,6 +69,15 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops);
if (session == NULL)
return -1;
/*
* We take all buildids when the file contains AUX area tracing data
* because we do not decode the trace because it would take too long.
*/
if (!perf_data_file__is_pipe(&file) &&
perf_header__has_feat(&session->header, HEADER_AUXTRACE))
with_hits = false;
/*
* in pipe-mode, the only way to get the buildids is to parse
* the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
......
......@@ -16,6 +16,7 @@
#include "util/debug.h"
#include "util/build-id.h"
#include "util/data.h"
#include "util/auxtrace.h"
#include "util/parse-options.h"
......@@ -26,10 +27,12 @@ struct perf_inject {
struct perf_session *session;
bool build_ids;
bool sched_stat;
bool have_auxtrace;
const char *input_name;
struct perf_data_file output;
u64 bytes_written;
struct list_head samples;
struct itrace_synth_opts itrace_synth_opts;
};
struct event_entry {
......@@ -38,14 +41,11 @@ struct event_entry {
union perf_event event[0];
};
static int perf_event__repipe_synth(struct perf_tool *tool,
union perf_event *event)
static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
ssize_t size;
size = perf_data_file__write(&inject->output, event,
event->header.size);
size = perf_data_file__write(&inject->output, buf, sz);
if (size < 0)
return -errno;
......@@ -53,6 +53,15 @@ static int perf_event__repipe_synth(struct perf_tool *tool,
return 0;
}
static int perf_event__repipe_synth(struct perf_tool *tool,
union perf_event *event)
{
struct perf_inject *inject = container_of(tool, struct perf_inject,
tool);
return output_bytes(inject, event, event->header.size);
}
static int perf_event__repipe_oe_synth(struct perf_tool *tool,
union perf_event *event,
struct ordered_events *oe __maybe_unused)
......@@ -86,6 +95,79 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
#ifdef HAVE_AUXTRACE_SUPPORT
static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
{
char buf[4096];
ssize_t ssz;
int ret;
while (size > 0) {
ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
if (ssz < 0)
return -errno;
ret = output_bytes(inject, buf, ssz);
if (ret)
return ret;
size -= ssz;
}
return 0;
}
static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
union perf_event *event,
struct perf_session *session
__maybe_unused)
{
struct perf_inject *inject = container_of(tool, struct perf_inject,
tool);
int ret;
inject->have_auxtrace = true;
if (!inject->output.is_pipe) {
off_t offset;
offset = lseek(inject->output.fd, 0, SEEK_CUR);
if (offset == -1)
return -errno;
ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
event, offset);
if (ret < 0)
return ret;
}
if (perf_data_file__is_pipe(session->file) || !session->one_mmap) {
ret = output_bytes(inject, event, event->header.size);
if (ret < 0)
return ret;
ret = copy_bytes(inject, perf_data_file__fd(session->file),
event->auxtrace.size);
} else {
ret = output_bytes(inject, event,
event->header.size + event->auxtrace.size);
}
if (ret < 0)
return ret;
return event->auxtrace.size;
}
#else
static s64
perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_session *session __maybe_unused)
{
pr_err("AUX area tracing not supported\n");
return -EINVAL;
}
#endif
static int perf_event__repipe(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
......@@ -155,6 +237,32 @@ static int perf_event__repipe_fork(struct perf_tool *tool,
return err;
}
static int perf_event__repipe_comm(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
int err;
err = perf_event__process_comm(tool, event, sample, machine);
perf_event__repipe(tool, event, sample, machine);
return err;
}
static int perf_event__repipe_exit(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
int err;
err = perf_event__process_exit(tool, event, sample, machine);
perf_event__repipe(tool, event, sample, machine);
return err;
}
static int perf_event__repipe_tracing_data(struct perf_tool *tool,
union perf_event *event,
struct perf_session *session)
......@@ -167,6 +275,18 @@ static int perf_event__repipe_tracing_data(struct perf_tool *tool,
return err;
}
static int perf_event__repipe_id_index(struct perf_tool *tool,
union perf_event *event,
struct perf_session *session)
{
int err;
perf_event__repipe_synth(tool, event);
err = perf_event__process_id_index(tool, event, session);
return err;
}
static int dso__read_build_id(struct dso *dso)
{
if (dso->has_build_id)
......@@ -351,16 +471,20 @@ static int __cmd_inject(struct perf_inject *inject)
struct perf_session *session = inject->session;
struct perf_data_file *file_out = &inject->output;
int fd = perf_data_file__fd(file_out);
u64 output_data_offset;
signal(SIGINT, sig_handler);
if (inject->build_ids || inject->sched_stat) {
if (inject->build_ids || inject->sched_stat ||
inject->itrace_synth_opts.set) {
inject->tool.mmap = perf_event__repipe_mmap;
inject->tool.mmap2 = perf_event__repipe_mmap2;
inject->tool.fork = perf_event__repipe_fork;
inject->tool.tracing_data = perf_event__repipe_tracing_data;
}
output_data_offset = session->header.data_offset;
if (inject->build_ids) {
inject->tool.sample = perf_event__inject_buildid;
} else if (inject->sched_stat) {
......@@ -379,17 +503,43 @@ static int __cmd_inject(struct perf_inject *inject)
else if (!strncmp(name, "sched:sched_stat_", 17))
evsel->handler = perf_inject__sched_stat;
}
} else if (inject->itrace_synth_opts.set) {
session->itrace_synth_opts = &inject->itrace_synth_opts;
inject->itrace_synth_opts.inject = true;
inject->tool.comm = perf_event__repipe_comm;
inject->tool.exit = perf_event__repipe_exit;
inject->tool.id_index = perf_event__repipe_id_index;
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
inject->tool.auxtrace = perf_event__process_auxtrace;
inject->tool.ordered_events = true;
inject->tool.ordering_requires_timestamps = true;
/* Allow space in the header for new attributes */
output_data_offset = 4096;
}
if (!inject->itrace_synth_opts.set)
auxtrace_index__free(&session->auxtrace_index);
if (!file_out->is_pipe)
lseek(fd, session->header.data_offset, SEEK_SET);
lseek(fd, output_data_offset, SEEK_SET);
ret = perf_session__process_events(session);
if (!file_out->is_pipe) {
if (inject->build_ids)
if (inject->build_ids) {
perf_header__set_feat(&session->header,
HEADER_BUILD_ID);
if (inject->have_auxtrace)
dsos__hit_all(session);
}
/*
* The AUX areas have been removed and replaced with
* synthesized hardware events, so clear the feature flag.
*/
if (inject->itrace_synth_opts.set)
perf_header__clear_feat(&session->header,
HEADER_AUXTRACE);
session->header.data_offset = output_data_offset;
session->header.data_size = inject->bytes_written;
perf_session__write_header(session, session->evlist, fd, true);
}
......@@ -408,11 +558,16 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
.fork = perf_event__repipe,
.exit = perf_event__repipe,
.lost = perf_event__repipe,
.aux = perf_event__repipe,
.itrace_start = perf_event__repipe,
.read = perf_event__repipe_sample,
.throttle = perf_event__repipe,
.unthrottle = perf_event__repipe,
.attr = perf_event__repipe_attr,
.tracing_data = perf_event__repipe_op2_synth,
.auxtrace_info = perf_event__repipe_op2_synth,
.auxtrace = perf_event__repipe_auxtrace,
.auxtrace_error = perf_event__repipe_op2_synth,
.finished_round = perf_event__repipe_oe_synth,
.build_id = perf_event__repipe_op2_synth,
.id_index = perf_event__repipe_op2_synth,
......@@ -444,6 +599,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
"kallsyms pathname"),
OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
NULL, "opts", "Instruction Tracing options",
itrace_parse_synth_opts),
OPT_END()
};
const char * const inject_usage[] = {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -36,6 +36,8 @@
#include "util/data.h"
#include "arch/common.h"
#include "util/auxtrace.h"
#include <dlfcn.h>
#include <linux/bitmap.h>
......@@ -585,6 +587,7 @@ parse_percent_limit(const struct option *opt, const char *str,
int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
{
struct perf_session *session;
struct itrace_synth_opts itrace_synth_opts = { .set = 0, };
struct stat st;
bool has_br_stack = false;
int branch_mode = -1;
......@@ -607,6 +610,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
.attr = perf_event__process_attr,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
.id_index = perf_event__process_id_index,
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
......@@ -717,6 +723,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
"how to display percentage of filtered entries", parse_filter_percentage),
OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
"Instruction Tracing options",
itrace_parse_synth_opts),
OPT_END()
};
struct perf_data_file file = {
......@@ -761,6 +770,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
report.queue_size);
}
session->itrace_synth_opts = &itrace_synth_opts;
report.session = session;
has_br_stack = perf_header__has_feat(&session->header,
......
......@@ -16,6 +16,7 @@
#include "util/evsel.h"
#include "util/sort.h"
#include "util/data.h"
#include "util/auxtrace.h"
#include <linux/bitmap.h>
static char const *script_name;
......@@ -26,6 +27,7 @@ static u64 nr_unordered;
static bool no_callchain;
static bool latency_format;
static bool system_wide;
static bool print_flags;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
......@@ -146,9 +148,10 @@ static const char *output_field2str(enum perf_output_field field)
#define PRINT_FIELD(x) (output[attr->type].fields & PERF_OUTPUT_##x)
static int perf_evsel__check_stype(struct perf_evsel *evsel,
u64 sample_type, const char *sample_msg,
enum perf_output_field field)
static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
u64 sample_type, const char *sample_msg,
enum perf_output_field field,
bool allow_user_set)
{
struct perf_event_attr *attr = &evsel->attr;
int type = attr->type;
......@@ -158,6 +161,8 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
return 0;
if (output[type].user_set) {
if (allow_user_set)
return 0;
evname = perf_evsel__name(evsel);
pr_err("Samples for '%s' event do not have %s attribute set. "
"Cannot print '%s' field.\n",
......@@ -175,10 +180,22 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
return 0;
}
static int perf_evsel__check_stype(struct perf_evsel *evsel,
u64 sample_type, const char *sample_msg,
enum perf_output_field field)
{
return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
false);
}
static int perf_evsel__check_attr(struct perf_evsel *evsel,
struct perf_session *session)
{
struct perf_event_attr *attr = &evsel->attr;
bool allow_user_set;
allow_user_set = perf_header__has_feat(&session->header,
HEADER_AUXTRACE);
if (PRINT_FIELD(TRACE) &&
!perf_session__has_traces(session, "record -R"))
......@@ -191,8 +208,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
}
if (PRINT_FIELD(ADDR) &&
perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
PERF_OUTPUT_ADDR))
perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
PERF_OUTPUT_ADDR, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
......@@ -229,8 +246,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
return -EINVAL;
if (PRINT_FIELD(CPU) &&
perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
PERF_OUTPUT_CPU))
perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
PERF_OUTPUT_CPU, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(PERIOD) &&
......@@ -445,6 +462,25 @@ static void print_sample_bts(union perf_event *event,
printf("\n");
}
static void print_sample_flags(u32 flags)
{
const char *chars = PERF_IP_FLAG_CHARS;
const int n = strlen(PERF_IP_FLAG_CHARS);
char str[33];
int i, pos = 0;
for (i = 0; i < n; i++, flags >>= 1) {
if (flags & 1)
str[pos++] = chars[i];
}
for (; i < 32; i++, flags >>= 1) {
if (flags & 1)
str[pos++] = '?';
}
str[pos] = 0;
printf(" %-4s ", str);
}
static void process_event(union perf_event *event, struct perf_sample *sample,
struct perf_evsel *evsel, struct addr_location *al)
{
......@@ -464,6 +500,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
printf("%s: ", evname ? evname : "[unknown]");
}
if (print_flags)
print_sample_flags(sample->flags);
if (is_bts_event(attr)) {
print_sample_bts(event, sample, evsel, thread, al);
return;
......@@ -999,12 +1038,15 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
}
}
tok = strtok(tok, ",");
while (tok) {
for (tok = strtok(tok, ","); tok; tok = strtok(NULL, ",")) {
for (i = 0; i < imax; ++i) {
if (strcmp(tok, all_output_options[i].str) == 0)
break;
}
if (i == imax && strcmp(tok, "flags") == 0) {
print_flags = true;
continue;
}
if (i == imax) {
fprintf(stderr, "Invalid field requested.\n");
rc = -EINVAL;
......@@ -1032,8 +1074,6 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
}
output[type].fields |= all_output_options[i].field;
}
tok = strtok(NULL, ",");
}
if (type >= 0) {
......@@ -1497,6 +1537,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
char *rec_script_path = NULL;
char *rep_script_path = NULL;
struct perf_session *session;
struct itrace_synth_opts itrace_synth_opts = { .set = false, };
char *script_path = NULL;
const char **__argv;
int i, j, err = 0;
......@@ -1511,6 +1552,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.attr = process_attr,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
.id_index = perf_event__process_id_index,
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.auxtrace_error = perf_event__process_auxtrace_error,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
......@@ -1549,7 +1594,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,period", parse_output_fields),
"addr,symoff,period,flags", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
......@@ -1570,6 +1615,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
"Show the mmap events"),
OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
"Instruction Tracing options",
itrace_parse_synth_opts),
OPT_END()
};
const char * const script_subcommands[] = { "record", "report", NULL };
......@@ -1765,6 +1813,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
script.session = session;
session->itrace_synth_opts = &itrace_synth_opts;
if (cpu_list) {
err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
if (err < 0)
......
This diff is collapsed.
......@@ -2660,16 +2660,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "comm", &trace.show_comm,
"show the thread COMM next to its id"),
OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
"list of events to trace"),
OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
OPT_STRING('o', "output", &output_name, "file", "output file name"),
OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
"trace events on existing process id"),
OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
"trace events on existing thread id"),
OPT_CALLBACK(0, "filter-pids", &trace, "float",
"show only events with duration > N.M ms", trace__set_filter_pids),
OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
"pids to filter (by the kernel)", trace__set_filter_pids),
OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
......
......@@ -610,6 +610,11 @@ ifdef LIBBABELTRACE
endif
endif
ifndef NO_AUXTRACE
$(call detected,CONFIG_AUXTRACE)
CFLAGS += -DHAVE_AUXTRACE_SUPPORT
endif
# Among the variables below, these:
# perfexecdir
# template_dir
......
......@@ -54,12 +54,17 @@ struct record_opts {
bool period;
bool sample_intr_regs;
bool running_time;
bool full_auxtrace;
bool auxtrace_snapshot_mode;
unsigned int freq;
unsigned int mmap_pages;
unsigned int auxtrace_mmap_pages;
unsigned int user_freq;
u64 branch_stack;
u64 default_interval;
u64 user_interval;
size_t auxtrace_snapshot_size;
const char *auxtrace_snapshot_opts;
bool sample_transaction;
unsigned initial_delay;
bool use_clockid;
......
......@@ -482,7 +482,7 @@ static int do_test_code_reading(bool try_kcore)
else
str = "cycles";
pr_debug("Parsing event '%s'\n", str);
ret = parse_events(evlist, str);
ret = parse_events(evlist, str, NULL);
if (ret < 0) {
pr_debug("parse_events failed\n");
goto out_err;
......
......@@ -23,7 +23,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
__perf_evsel__hw_cache_type_op_res_name(type, op, i,
name, sizeof(name));
err = parse_events(evlist, name);
err = parse_events(evlist, name, NULL);
if (err)
ret = err;
}
......@@ -71,7 +71,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
return -ENOMEM;
for (i = 0; i < nr_names; ++i) {
err = parse_events(evlist, names[i]);
err = parse_events(evlist, names[i], NULL);
if (err) {
pr_debug("failed to parse event '%s', err %d\n",
names[i], err);
......
......@@ -695,7 +695,7 @@ int test__hists_cumulate(void)
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock");
err = parse_events(evlist, "cpu-clock", NULL);
if (err)
goto out;
......
......@@ -108,10 +108,10 @@ int test__hists_filter(void)
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock");
err = parse_events(evlist, "cpu-clock", NULL);
if (err)
goto out;
err = parse_events(evlist, "task-clock");
err = parse_events(evlist, "task-clock", NULL);
if (err)
goto out;
......
......@@ -282,10 +282,10 @@ int test__hists_link(void)
if (evlist == NULL)
return -ENOMEM;
err = parse_events(evlist, "cpu-clock");
err = parse_events(evlist, "cpu-clock", NULL);
if (err)
goto out;
err = parse_events(evlist, "task-clock");
err = parse_events(evlist, "task-clock", NULL);
if (err)
goto out;
......
......@@ -590,7 +590,7 @@ int test__hists_output(void)
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock");
err = parse_events(evlist, "cpu-clock", NULL);
if (err)
goto out;
......
......@@ -78,8 +78,8 @@ int test__keep_tracking(void)
perf_evlist__set_maps(evlist, cpus, threads);
CHECK__(parse_events(evlist, "dummy:u"));
CHECK__(parse_events(evlist, "cycles:u"));
CHECK__(parse_events(evlist, "dummy:u", NULL));
CHECK__(parse_events(evlist, "cycles:u", NULL));
perf_evlist__config(evlist, &opts);
......
......@@ -32,6 +32,7 @@ make_no_backtrace := NO_BACKTRACE=1
make_no_libnuma := NO_LIBNUMA=1
make_no_libaudit := NO_LIBAUDIT=1
make_no_libbionic := NO_LIBBIONIC=1
make_no_auxtrace := NO_AUXTRACE=1
make_tags := tags
make_cscope := cscope
make_help := help
......@@ -52,7 +53,7 @@ make_static := LDFLAGS=-static
make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
make_minimal += NO_LIBDW_DWARF_UNWIND=1
make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1
# $(run) contains all available tests
run := make_pure
......@@ -74,6 +75,7 @@ run += make_no_backtrace
run += make_no_libnuma
run += make_no_libaudit
run += make_no_libbionic
run += make_no_auxtrace
run += make_help
run += make_doc
run += make_perf_o
......@@ -223,7 +225,19 @@ tarpkg:
echo "- $@: $$cmd" && echo $$cmd > $@ && \
( eval $$cmd ) >> $@ 2>&1
all: $(run) $(run_O) tarpkg
make_kernelsrc:
@echo " - make -C <kernelsrc> tools/perf"
$(call clean); \
(make -C ../.. tools/perf) > $@ 2>&1 && \
test -x perf && rm -f $@ || (cat $@ ; false)
make_kernelsrc_tools:
@echo " - make -C <kernelsrc>/tools perf"
$(call clean); \
(make -C ../../tools perf) > $@ 2>&1 && \
test -x perf && rm -f $@ || (cat $@ ; false)
all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
@echo OK
out: $(run_O)
......
......@@ -1571,7 +1571,7 @@ static int test_event(struct evlist_test *e)
if (evlist == NULL)
return -ENOMEM;
ret = parse_events(evlist, e->name);
ret = parse_events(evlist, e->name, NULL);
if (ret) {
pr_debug("failed to parse event '%s', err %d\n",
e->name, ret);
......
......@@ -68,7 +68,7 @@ int test__perf_time_to_tsc(void)
perf_evlist__set_maps(evlist, cpus, threads);
CHECK__(parse_events(evlist, "cycles:u"));
CHECK__(parse_events(evlist, "cycles:u", NULL));
perf_evlist__config(evlist, &opts);
......
......@@ -152,7 +152,8 @@ int test__pmu(void)
if (ret)
break;
ret = perf_pmu__config_terms(&formats, &attr, terms, false);
ret = perf_pmu__config_terms(&formats, &attr, terms,
false, NULL);
if (ret)
break;
......
......@@ -347,7 +347,7 @@ int test__switch_tracking(void)
perf_evlist__set_maps(evlist, cpus, threads);
/* First event */
err = parse_events(evlist, "cpu-clock:u");
err = parse_events(evlist, "cpu-clock:u", NULL);
if (err) {
pr_debug("Failed to parse event dummy:u\n");
goto out_err;
......@@ -356,7 +356,7 @@ int test__switch_tracking(void)
cpu_clocks_evsel = perf_evlist__last(evlist);
/* Second event */
err = parse_events(evlist, "cycles:u");
err = parse_events(evlist, "cycles:u", NULL);
if (err) {
pr_debug("Failed to parse event cycles:u\n");
goto out_err;
......@@ -371,7 +371,7 @@ int test__switch_tracking(void)
goto out;
}
err = parse_events(evlist, sched_switch);
err = parse_events(evlist, sched_switch, NULL);
if (err) {
pr_debug("Failed to parse event %s\n", sched_switch);
goto out_err;
......@@ -401,7 +401,7 @@ int test__switch_tracking(void)
perf_evsel__set_sample_bit(cycles_evsel, TIME);
/* Fourth event */
err = parse_events(evlist, "dummy:u");
err = parse_events(evlist, "dummy:u", NULL);
if (err) {
pr_debug("Failed to parse event dummy:u\n");
goto out_err;
......
This diff is collapsed.
......@@ -74,6 +74,7 @@ libperf-y += data.o
libperf-$(CONFIG_X86) += tsc.o
libperf-y += cloexec.o
libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o
libperf-$(CONFIG_LIBELF) += probe-event.o
......@@ -117,7 +118,7 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y
CFLAGS_parse-events-flex.o += -w
CFLAGS_pmu-flex.o += -w
CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
......
This diff is collapsed.
This diff is collapsed.
......@@ -72,6 +72,10 @@ extern struct callchain_param callchain_param;
struct callchain_list {
u64 ip;
struct map_symbol ms;
struct /* for TUI */ {
bool unfolded;
bool has_children;
};
char *srcline;
struct list_head list;
};
......
This diff is collapsed.
......@@ -4,6 +4,7 @@
#include "symbol.h"
#include "dso.h"
#include "machine.h"
#include "auxtrace.h"
#include "util.h"
#include "debug.h"
......@@ -961,6 +962,7 @@ void dso__delete(struct dso *dso)
}
dso__data_close(dso);
auxtrace_cache__free(dso->auxtrace_cache);
dso_cache__free(&dso->data.cache);
dso__free_a2l(dso);
zfree(&dso->symsrc_filename);
......
......@@ -126,6 +126,8 @@ struct dsos {
struct rb_root root; /* rbtree root sorted by long name */
};
struct auxtrace_cache;
struct dso {
struct list_head node;
struct rb_node rb_node; /* rbtree node sorted by long name */
......@@ -156,6 +158,7 @@ struct dso {
u16 long_name_len;
u16 short_name_len;
void *dwfl; /* DWARF debug info */
struct auxtrace_cache *auxtrace_cache;
/* dso data file */
struct {
......
......@@ -23,12 +23,17 @@ static const char *perf_event__names[] = {
[PERF_RECORD_FORK] = "FORK",
[PERF_RECORD_READ] = "READ",
[PERF_RECORD_SAMPLE] = "SAMPLE",
[PERF_RECORD_AUX] = "AUX",
[PERF_RECORD_ITRACE_START] = "ITRACE_START",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
[PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
[PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
[PERF_RECORD_ID_INDEX] = "ID_INDEX",
[PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO",
[PERF_RECORD_AUXTRACE] = "AUXTRACE",
[PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR",
};
const char *perf_event__name(unsigned int id)
......@@ -692,6 +697,22 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
return machine__process_lost_event(machine, event, sample);
}
int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
{
return machine__process_aux_event(machine, event);
}
int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
{
return machine__process_itrace_start_event(machine, event);
}
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
......@@ -755,6 +776,21 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
return machine__process_exit_event(machine, event, sample);
}
size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
{
return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s]\n",
event->aux.aux_offset, event->aux.aux_size,
event->aux.flags,
event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "");
}
size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
{
return fprintf(fp, " pid: %u tid: %u\n",
event->itrace_start.pid, event->itrace_start.tid);
}
size_t perf_event__fprintf(union perf_event *event, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
......@@ -774,6 +810,12 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_MMAP2:
ret += perf_event__fprintf_mmap2(event, fp);
break;
case PERF_RECORD_AUX:
ret += perf_event__fprintf_aux(event, fp);
break;
case PERF_RECORD_ITRACE_START:
ret += perf_event__fprintf_itrace_start(event, fp);
break;
default:
ret += fprintf(fp, "\n");
}
......
......@@ -157,6 +157,8 @@ enum {
PERF_IP_FLAG_IN_TX = 1ULL << 10,
};
#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
#define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\
PERF_IP_FLAG_CALL |\
......@@ -215,9 +217,17 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_HEADER_BUILD_ID = 67,
PERF_RECORD_FINISHED_ROUND = 68,
PERF_RECORD_ID_INDEX = 69,
PERF_RECORD_AUXTRACE_INFO = 70,
PERF_RECORD_AUXTRACE = 71,
PERF_RECORD_AUXTRACE_ERROR = 72,
PERF_RECORD_HEADER_MAX
};
enum auxtrace_error_type {
PERF_AUXTRACE_ERROR_ITRACE = 1,
PERF_AUXTRACE_ERROR_MAX
};
/*
* The kernel collects the number of events it couldn't send in a stretch and
* when possible sends this number in a PERF_RECORD_LOST event. The number of
......@@ -242,6 +252,7 @@ struct events_stats {
u32 nr_invalid_chains;
u32 nr_unknown_id;
u32 nr_unprocessable_samples;
u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
};
struct attr_event {
......@@ -280,6 +291,50 @@ struct id_index_event {
struct id_index_entry entries[0];
};
struct auxtrace_info_event {
struct perf_event_header header;
u32 type;
u32 reserved__; /* For alignment */
u64 priv[];
};
struct auxtrace_event {
struct perf_event_header header;
u64 size;
u64 offset;
u64 reference;
u32 idx;
u32 tid;
u32 cpu;
u32 reserved__; /* For alignment */
};
#define MAX_AUXTRACE_ERROR_MSG 64
struct auxtrace_error_event {
struct perf_event_header header;
u32 type;
u32 code;
u32 cpu;
u32 pid;
u32 tid;
u32 reserved__; /* For alignment */
u64 ip;
char msg[MAX_AUXTRACE_ERROR_MSG];
};
struct aux_event {
struct perf_event_header header;
u64 aux_offset;
u64 aux_size;
u64 flags;
};
struct itrace_start_event {
struct perf_event_header header;
u32 pid, tid;
};
union perf_event {
struct perf_event_header header;
struct mmap_event mmap;
......@@ -295,6 +350,11 @@ union perf_event {
struct tracing_data_event tracing_data;
struct build_id_event build_id;
struct id_index_event id_index;
struct auxtrace_info_event auxtrace_info;
struct auxtrace_event auxtrace;
struct auxtrace_error_event auxtrace_error;
struct aux_event aux;
struct itrace_start_event itrace_start;
};
void perf_event__print_totals(void);
......@@ -330,6 +390,14 @@ int perf_event__process_lost(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
int perf_event__process_aux(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
int perf_event__process_itrace_start(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
int perf_event__process_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -387,6 +455,8 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
u64 kallsyms__get_function_start(const char *kallsyms_filename,
......
This diff is collapsed.
......@@ -8,6 +8,7 @@
#include "event.h"
#include "evsel.h"
#include "util.h"
#include "auxtrace.h"
#include <unistd.h>
struct pollfd;
......@@ -28,6 +29,7 @@ struct perf_mmap {
int mask;
int refcnt;
u64 prev;
struct auxtrace_mmap auxtrace_mmap;
char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
};
......@@ -122,10 +124,14 @@ int perf_evlist__start_workload(struct perf_evlist *evlist);
struct option;
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
int perf_evlist__parse_mmap_pages(const struct option *opt,
const char *str,
int unset);
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
bool overwrite, unsigned int auxtrace_pages,
bool auxtrace_overwrite);
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
bool overwrite);
void perf_evlist__munmap(struct perf_evlist *evlist);
......
......@@ -1121,6 +1121,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(sample_stack_user, p_unsigned);
PRINT_ATTRf(clockid, p_signed);
PRINT_ATTRf(sample_regs_intr, p_hex);
PRINT_ATTRf(aux_watermark, p_unsigned);
return ret;
}
......
......@@ -869,6 +869,20 @@ static int write_branch_stack(int fd __maybe_unused,
return 0;
}
static int write_auxtrace(int fd, struct perf_header *h,
struct perf_evlist *evlist __maybe_unused)
{
struct perf_session *session;
int err;
session = container_of(h, struct perf_session, header);
err = auxtrace_index__write(fd, &session->auxtrace_index);
if (err < 0)
pr_err("Failed to write auxtrace index\n");
return err;
}
static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
......@@ -1151,6 +1165,12 @@ static void print_branch_stack(struct perf_header *ph __maybe_unused,
fprintf(fp, "# contains samples with branch stack\n");
}
static void print_auxtrace(struct perf_header *ph __maybe_unused,
int fd __maybe_unused, FILE *fp)
{
fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
}
static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
......@@ -1821,6 +1841,22 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused,
return ret;
}
static int process_auxtrace(struct perf_file_section *section,
struct perf_header *ph, int fd,
void *data __maybe_unused)
{
struct perf_session *session;
int err;
session = container_of(ph, struct perf_session, header);
err = auxtrace_index__process(fd, section->size, session,
ph->needs_swap);
if (err < 0)
pr_err("Failed to process auxtrace index\n");
return err;
}
struct feature_ops {
int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
void (*print)(struct perf_header *h, int fd, FILE *fp);
......@@ -1861,6 +1897,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings),
FEAT_OPP(HEADER_GROUP_DESC, group_desc),
FEAT_OPP(HEADER_AUXTRACE, auxtrace),
};
struct header_print_data {
......
......@@ -30,6 +30,7 @@ enum {
HEADER_BRANCH_STACK,
HEADER_PMU_MAPPINGS,
HEADER_GROUP_DESC,
HEADER_AUXTRACE,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -10,5 +10,6 @@ bool pstack__empty(const struct pstack *pstack);
void pstack__remove(struct pstack *pstack, void *key);
void pstack__push(struct pstack *pstack, void *key);
void *pstack__pop(struct pstack *pstack);
void *pstack__peek(struct pstack *pstack);
#endif /* _PERF_PSTACK_ */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment