Commit 9c17dbc6 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

  - Accept a zero --itrace period, meaning "as often as possible".  In the case
    of Intel PT that is the same as a period of 1 and a unit of 'instructions'
    (i.e.  --itrace=i1i). (Adrian Hunter)

  - Harmonize itrace's synthesized callchains with the existing --max-stack
    tool option. (Adrian Hunter)

  - Allow time to be displayed in nanoseconds in 'perf script'. (Adrian Hunter)

  - Fix potential infinite loop when handling Intel PT timestamps. (Adrian Hunter)

  - Slighly improve Intel PT debug logging. (Adrian Hunter)

  - Warn when AUX data has been lost, just like when processing PERF_RECORD_LOST.
    (Adrian Hunter)

  - Further document export-to-postgresql.py script. (Adrian Hunter)

  - Add option to synthesize branch stack from auxtrace data. (Adrian Hunter)

  - Use equivalent logic to avoid using dso->kernel. (Arnaldo Carvalho de Melo)

  - Show proper error messages when parsing bad terms for hw/sw events. (He Kuang)

  - Tracepoint event parsing improvements. (He Kuang)

  - Store tracing mountpoint for better error message. (Jiri Olsa)

  - Add fixdep to tools/build, bringing it closer to the kernel counterpart, from
    where it is being lifted. (Jiri Olsa)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 18ab2cd3 e637d177
fixdep-y := fixdep.o
......@@ -54,15 +54,26 @@ make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1)))))
# PHONY targets skipped in both cases.
any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^)
###
# Copy dependency data into .cmd file
# - gcc -M dependency info
# - command line to create object 'cmd_object :='
dep-cmd = $(if $(wildcard $(fixdep)), \
$(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp; \
rm -f $(depfile); \
mv -f $(dot-target).tmp $(dot-target).cmd, \
printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
printf '\# using basic dep data\n\n' >> $(dot-target).cmd; \
cat $(depfile) >> $(dot-target).cmd; \
printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd)
###
# if_changed_dep - execute command if any prerequisite is newer than
# target, or command line has changed and update
# dependencies in the cmd file
if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \
@set -e; \
$(echo-cmd) $(cmd_$(1)); \
cat $(depfile) > $(dot-target).cmd; \
printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd)
$(echo-cmd) $(cmd_$(1)) && $(dep-cmd))
# if_changed - execute command if any prerequisite is newer than
# target, or command line has changed
......
......@@ -11,8 +11,9 @@ Unlike the kernel we don't have a single build object 'obj-y' list that where
we setup source objects, but we support more. This allows one 'Build' file to
carry a sources list for multiple build objects.
a) Build framework makefiles
----------------------------
Build framework makefiles
-------------------------
The build framework consists of 2 Makefiles:
......@@ -23,7 +24,7 @@ While the 'Build.include' file contains just some generic definitions, the
'Makefile.build' file is the makefile used from the outside. It's
interface/usage is following:
$ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT)
$ make -f tools/build/Makefile.build srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT)
where:
......@@ -38,8 +39,9 @@ called $(OBJECT)-in.o:
which includes all compiled sources described in 'Build' makefiles.
a) Build makefiles
------------------
Build makefiles
---------------
The user supplies 'Build' makefiles that contains a objects list, and connects
the build to nested directories.
......@@ -95,8 +97,31 @@ It's only a matter of 2 single commands to create the final binaries:
You can check the 'ex' example in 'tools/build/tests/ex' for more details.
b) Rules
--------
Makefile.include
----------------
The tools/build/Makefile.include makefile could be included
via user makefiles to get usefull definitions.
It defines following interface:
- build macro definition:
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
to make it easier to invoke build like:
make $(build)=ex
Fixdep
------
It is necessary to build the fixdep helper before invoking the build.
The Makefile.include file adds the fixdep target, that could be
invoked by the user.
Rules
-----
The build framework provides standard compilation rules to handle .S and .c
compilation.
......@@ -104,8 +129,9 @@ compilation.
It's possible to include special rule if needed (like we do for flex or bison
code generation).
c) CFLAGS
---------
CFLAGS
------
It's possible to alter the standard object C flags in the following way:
......@@ -115,8 +141,8 @@ It's possible to alter the standard object C flags in the following way:
This C flags changes has the scope of the Build makefile they are defined in.
d) Dependencies
---------------
Dependencies
------------
For each built object file 'a.o' the '.a.cmd' is created and holds:
......@@ -130,8 +156,8 @@ All existing '.cmd' files are included in the Build process to follow properly
the dependencies and trigger a rebuild when necessary.
e) Single rules
---------------
Single rules
------------
It's possible to build single object file by choice, like:
......
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(shell pwd)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
include $(srctree)/tools//scripts/Makefile.include
define allow-override
$(if $(or $(findstring environment,$(origin $(1))),\
$(findstring command line,$(origin $(1)))),,\
$(eval $(1) = $(2)))
endef
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,LD,$(CROSS_COMPILE)ld)
ifeq ($(V),1)
Q =
else
Q = @
endif
export Q srctree CC LD
MAKEFLAGS := --no-print-directory
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
all: fixdep
clean:
$(call QUIET_CLEAN, fixdep)
$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)rm -f fixdep
$(OUTPUT)fixdep-in.o: FORCE
$(Q)$(MAKE) $(build)=fixdep
$(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o
$(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $<
FORCE:
.PHONY: FORCE
......@@ -21,6 +21,13 @@ endif
build-dir := $(srctree)/tools/build
# Define $(fixdep) for dep-cmd function
ifeq ($(OUTPUT),)
fixdep := $(build-dir)/fixdep
else
fixdep := $(OUTPUT)/fixdep
endif
# Generic definitions
include $(build-dir)/Build.include
......
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
fixdep:
$(Q)$(MAKE) -C $(srctree)/tools/build fixdep
.PHONY: fixdep
/*
* "Optimize" a list of dependencies as spit out by gcc -MD
* for the build framework.
*
* Original author:
* Copyright 2002 by Kai Germaschewski <kai.germaschewski@gmx.de>
*
* This code has been borrowed from kbuild's fixdep (scripts/basic/fixdep.c),
* Please check it for detailed explanation. This fixdep borow only the
* base transformation of dependecies without the CONFIG mangle.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
char *target;
char *depfile;
char *cmdline;
static void usage(void)
{
fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n");
exit(1);
}
/*
* Print out the commandline prefixed with cmd_<target filename> :=
*/
static void print_cmdline(void)
{
printf("cmd_%s := %s\n\n", target, cmdline);
}
/*
* Important: The below generated source_foo.o and deps_foo.o variable
* assignments are parsed not only by make, but also by the rather simple
* parser in scripts/mod/sumversion.c.
*/
static void parse_dep_file(void *map, size_t len)
{
char *m = map;
char *end = m + len;
char *p;
char s[PATH_MAX];
int is_target;
int saw_any_target = 0;
int is_first_dep = 0;
while (m < end) {
/* Skip any "white space" */
while (m < end && (*m == ' ' || *m == '\\' || *m == '\n'))
m++;
/* Find next "white space" */
p = m;
while (p < end && *p != ' ' && *p != '\\' && *p != '\n')
p++;
/* Is the token we found a target name? */
is_target = (*(p-1) == ':');
/* Don't write any target names into the dependency file */
if (is_target) {
/* The /next/ file is the first dependency */
is_first_dep = 1;
} else {
/* Save this token/filename */
memcpy(s, m, p-m);
s[p - m] = 0;
/*
* Do not list the source file as dependency,
* so that kbuild is not confused if a .c file
* is rewritten into .S or vice versa. Storing
* it in source_* is needed for modpost to
* compute srcversions.
*/
if (is_first_dep) {
/*
* If processing the concatenation of
* multiple dependency files, only
* process the first target name, which
* will be the original source name,
* and ignore any other target names,
* which will be intermediate temporary
* files.
*/
if (!saw_any_target) {
saw_any_target = 1;
printf("source_%s := %s\n\n",
target, s);
printf("deps_%s := \\\n",
target);
}
is_first_dep = 0;
} else
printf(" %s \\\n", s);
}
/*
* Start searching for next token immediately after the first
* "whitespace" character that follows this token.
*/
m = p + 1;
}
if (!saw_any_target) {
fprintf(stderr, "fixdep: parse error; no targets found\n");
exit(1);
}
printf("\n%s: $(deps_%s)\n\n", target, target);
printf("$(deps_%s):\n", target);
}
static void print_deps(void)
{
struct stat st;
int fd;
void *map;
fd = open(depfile, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "fixdep: error opening depfile: ");
perror(depfile);
exit(2);
}
if (fstat(fd, &st) < 0) {
fprintf(stderr, "fixdep: error fstat'ing depfile: ");
perror(depfile);
exit(2);
}
if (st.st_size == 0) {
fprintf(stderr, "fixdep: %s is empty\n", depfile);
close(fd);
return;
}
map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if ((long) map == -1) {
perror("fixdep: mmap");
close(fd);
return;
}
parse_dep_file(map, st.st_size);
munmap(map, st.st_size);
close(fd);
}
int main(int argc, char **argv)
{
if (argc != 4)
usage();
depfile = argv[1];
target = argv[2];
cmdline = argv[3];
print_cmdline();
print_deps();
return 0;
}
......@@ -4,6 +4,7 @@ ex-y += b.o
ex-y += b.o
ex-y += empty/
ex-y += empty2/
ex-y += inc.o
libex-y += c.o
libex-y += d.o
......
export srctree := ../../../..
export srctree := $(abspath ../../../..)
export CC := gcc
export LD := ld
export AR := ar
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
ex:
include $(srctree)/tools/build/Makefile.include
ex: ex-in.o libex-in.o
gcc -o $@ $^
ex.%: FORCE
ex.%: fixdep FORCE
make -f $(srctree)/tools/build/Makefile.build dir=. $@
ex-in.o: FORCE
ex-in.o: fixdep FORCE
make $(build)=ex
libex-in.o: FORCE
libex-in.o: fixdep FORCE
make $(build)=libex
clean:
......
......@@ -5,6 +5,7 @@ int c(void);
int d(void);
int e(void);
int f(void);
int inc(void);
int main(void)
{
......@@ -14,6 +15,7 @@ int main(void)
d();
e();
f();
inc();
return 0;
}
#ifdef INCLUDE
#include "krava.h"
#endif
int inc(void)
{
return 0;
}
......@@ -34,9 +34,36 @@ function test_ex_suffix {
make -C ex V=1 clean > /dev/null 2>&1
rm -f ex.out
}
function test_ex_include {
make -C ex V=1 clean > ex.out 2>&1
# build with krava.h include
touch ex/krava.h
make -C ex V=1 CFLAGS=-DINCLUDE >> ex.out 2>&1
if [ ! -x ./ex/ex ]; then
echo FAILED
exit -1
fi
# build without the include
rm -f ex/krava.h ex/ex
make -C ex V=1 >> ex.out 2>&1
if [ ! -x ./ex/ex ]; then
echo FAILED
exit -1
fi
make -C ex V=1 clean > /dev/null 2>&1
rm -f ex.out
}
echo -n Testing..
test_ex
test_ex_suffix
test_ex_include
echo OK
......@@ -21,12 +21,14 @@ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
RM = rm -f
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
API_IN := $(OUTPUT)libapi-in.o
all:
export srctree OUTPUT CC LD CFLAGS V
include $(srctree)/tools/build/Makefile.include
all: $(LIBFILE)
all: fixdep $(LIBFILE)
$(API_IN): FORCE
@$(MAKE) $(build)=libapi
......
......@@ -12,12 +12,14 @@
#include "tracing_path.h"
char tracing_mnt[PATH_MAX + 1] = "/sys/kernel/debug";
char tracing_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing";
char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events";
static void __tracing_path_set(const char *tracing, const char *mountpoint)
{
snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);
snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
mountpoint, tracing);
snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
......@@ -109,19 +111,10 @@ static int strerror_open(int err, char *buf, size_t size, const char *filename)
"Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'");
break;
case EACCES: {
const char *mountpoint = debugfs__mountpoint();
if (!access(mountpoint, R_OK) && strncmp(filename, "tracing/", 8) == 0) {
const char *tracefs_mntpoint = tracefs__mountpoint();
if (tracefs_mntpoint)
mountpoint = tracefs__mountpoint();
}
snprintf(buf, size,
"Error:\tNo permissions to read %s/%s\n"
"Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
tracing_events_path, filename, mountpoint);
tracing_events_path, filename, tracing_mnt);
}
break;
default:
......
......@@ -123,8 +123,10 @@ endif
# the same command line setup.
MAKEOVERRIDES=
all:
export srctree OUTPUT CC LD CFLAGS V
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
include $(srctree)/tools/build/Makefile.include
BPF_IN := $(OUTPUT)libbpf-in.o
LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
......@@ -133,7 +135,7 @@ CMD_TARGETS = $(LIB_FILE)
TARGETS = $(CMD_TARGETS)
all: $(VERSION_FILES) all_cmd
all: fixdep $(VERSION_FILES) all_cmd
all_cmd: $(CMD_TARGETS)
......
......@@ -93,8 +93,10 @@ else
print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2';
endif
all:
export srctree OUTPUT CC LD CFLAGS V
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
include $(srctree)/tools/build/Makefile.include
do_compile_shared_library = \
($(print_shared_lib_compile) \
......@@ -109,7 +111,7 @@ CMD_TARGETS = $(LIB_FILE)
TARGETS = $(CMD_TARGETS)
all: all_cmd
all: fixdep all_cmd
all_cmd: $(CMD_TARGETS)
......
......@@ -671,6 +671,7 @@ The letters are:
e synthesize tracing error events
d create a debug log
g synthesize a call chain (use with i or x)
l synthesize last branch entries (use with i or x)
"Instructions" events look like they were recorded by "perf record -e
instructions".
......@@ -707,12 +708,26 @@ on the sample is *not* adjusted and reflects the last known value of TSC.
For Intel PT, the default period is 100us.
Setting it to a zero period means "as often as possible".
In the case of Intel PT that is the same as a period of 1 and a unit of
'instructions' (i.e. --itrace=i1i).
Also the call chain size (default 16, max. 1024) for instructions or
transactions events can be specified. e.g.
--itrace=ig32
--itrace=xg32
Also the number of last branch entries (default 64, max. 1024) for instructions or
transactions events can be specified. e.g.
--itrace=il10
--itrace=xl10
Note that last branch entries are cleared for each sample, so there is no overlap
from one sample to the next.
To disable trace decoding entirely, use the option --no-itrace.
......@@ -749,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is
removed and replaced with the synthesized events. e.g.
perf inject --itrace -i perf.data -o perf.data.new
Below is an example of using Intel PT with autofdo. It requires autofdo
(https://github.com/google/autofdo) and gcc version 5. The bubble
sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
amended to take the number of elements as a parameter.
$ gcc-5 -O3 sort.c -o sort_optimized
$ ./sort_optimized 30000
Bubble sorting array of 30000 elements
2254 ms
$ cat ~/.perfconfig
[intel-pt]
mispred-all
$ perf record -e intel_pt//u ./sort 3000
Bubble sorting array of 3000 elements
58 ms
[ perf record: Woken up 2 times to write data ]
[ perf record: Captured and wrote 3.939 MB perf.data ]
$ perf inject -i perf.data -o inj --itrace=i100usle --strip
$ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
$ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
$ ./sort_autofdo 30000
Bubble sorting array of 30000 elements
2155 ms
Note there is currently no advantage to using Intel PT instead of LBR, but
that may change in the future if greater use is made of the data.
......@@ -6,6 +6,7 @@
e synthesize error events
d create a debug log
g synthesize a call chain (use with i or x)
l synthesize last branch entries (use with i or x)
The default is all events i.e. the same as --itrace=ibxe
......@@ -20,3 +21,6 @@
Also the call chain size (default 16, max. 1024) for instructions or
transactions events can be specified.
Also the number of last branch entries (default 64, max. 1024) for
instructions or transactions events can be specified.
......@@ -50,6 +50,9 @@ OPTIONS
include::itrace.txt[]
--strip::
Use with --itrace to strip out non-synthesized events.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
......@@ -249,6 +249,9 @@ include::itrace.txt[]
--full-source-path::
Show the full path for source files for srcline output.
--ns::
Use 9 decimal places when displaying time (i.e. show the nanoseconds)
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
......
......@@ -297,16 +297,16 @@ strip: $(PROGRAMS) $(OUTPUT)perf
PERF_IN := $(OUTPUT)perf-in.o
export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
include $(srctree)/tools/build/Makefile.include
$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
$(PERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=perf
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
$(PERF_IN) $(LIBS) -o $@
$(GTK_IN): FORCE
$(GTK_IN): fixdep FORCE
$(Q)$(MAKE) $(build)=gtk
$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
......@@ -349,27 +349,27 @@ endif
__build-dir = $(subst $(OUTPUT),,$(dir $@))
build-dir = $(if $(__build-dir),$(__build-dir),.)
single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep
$(OUTPUT)%.o: %.c single_dep FORCE
$(OUTPUT)%.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%.i: %.c single_dep FORCE
$(OUTPUT)%.i: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%.s: %.c single_dep FORCE
$(OUTPUT)%.s: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%-bison.o: %.c single_dep FORCE
$(OUTPUT)%-bison.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%-flex.o: %.c single_dep FORCE
$(OUTPUT)%-flex.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%.o: %.S single_dep FORCE
$(OUTPUT)%.o: %.S prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)%.i: %.S single_dep FORCE
$(OUTPUT)%.i: %.S prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
$(OUTPUT)perf-%: %.o $(PERFLIBS)
......@@ -389,7 +389,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
LIBPERF_IN := $(OUTPUT)libperf-in.o
$(LIBPERF_IN): FORCE
$(LIBPERF_IN): fixdep FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIB_FILE): $(LIBPERF_IN)
......@@ -397,10 +397,10 @@ $(LIB_FILE): $(LIBPERF_IN)
LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
$(LIBTRACEEVENT): FORCE
$(LIBTRACEEVENT): fixdep FORCE
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
libtraceevent_plugins: FORCE
libtraceevent_plugins: fixdep FORCE
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
......@@ -413,7 +413,7 @@ $(LIBTRACEEVENT)-clean:
install-traceevent-plugins: $(LIBTRACEEVENT)
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
$(LIBAPI): FORCE
$(LIBAPI): fixdep FORCE
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
$(LIBAPI)-clean:
......@@ -591,6 +591,6 @@ FORCE:
.PHONY: all install clean config-clean strip install-gtk
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
.PHONY: libtraceevent_plugins
......@@ -28,9 +28,11 @@ struct perf_inject {
bool build_ids;
bool sched_stat;
bool have_auxtrace;
bool strip;
const char *input_name;
struct perf_data_file output;
u64 bytes_written;
u64 aux_id;
struct list_head samples;
struct itrace_synth_opts itrace_synth_opts;
};
......@@ -176,6 +178,27 @@ static int perf_event__repipe(struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
static int perf_event__drop(struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
{
return 0;
}
static int perf_event__drop_aux(struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct machine *machine __maybe_unused)
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
if (!inject->aux_id)
inject->aux_id = sample->id;
return 0;
}
typedef int (*inject_handler)(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -466,6 +489,78 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
return 0;
}
static int drop_sample(struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct perf_evsel *evsel __maybe_unused,
struct machine *machine __maybe_unused)
{
return 0;
}
static void strip_init(struct perf_inject *inject)
{
struct perf_evlist *evlist = inject->session->evlist;
struct perf_evsel *evsel;
inject->tool.context_switch = perf_event__drop;
evlist__for_each(evlist, evsel)
evsel->handler = drop_sample;
}
static bool has_tracking(struct perf_evsel *evsel)
{
return evsel->attr.mmap || evsel->attr.mmap2 || evsel->attr.comm ||
evsel->attr.task;
}
#define COMPAT_MASK (PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER)
/*
* In order that the perf.data file is parsable, tracking events like MMAP need
* their selected event to exist, except if there is only 1 selected event left
* and it has a compatible sample type.
*/
static bool ok_to_remove(struct perf_evlist *evlist,
struct perf_evsel *evsel_to_remove)
{
struct perf_evsel *evsel;
int cnt = 0;
bool ok = false;
if (!has_tracking(evsel_to_remove))
return true;
evlist__for_each(evlist, evsel) {
if (evsel->handler != drop_sample) {
cnt += 1;
if ((evsel->attr.sample_type & COMPAT_MASK) ==
(evsel_to_remove->attr.sample_type & COMPAT_MASK))
ok = true;
}
}
return ok && cnt == 1;
}
static void strip_fini(struct perf_inject *inject)
{
struct perf_evlist *evlist = inject->session->evlist;
struct perf_evsel *evsel, *tmp;
/* Remove non-synthesized evsels if possible */
evlist__for_each_safe(evlist, tmp, evsel) {
if (evsel->handler == drop_sample &&
ok_to_remove(evlist, evsel)) {
pr_debug("Deleting %s\n", perf_evsel__name(evsel));
perf_evlist__remove(evlist, evsel);
perf_evsel__delete(evsel);
}
}
}
static int __cmd_inject(struct perf_inject *inject)
{
int ret = -EINVAL;
......@@ -512,10 +607,14 @@ static int __cmd_inject(struct perf_inject *inject)
inject->tool.id_index = perf_event__repipe_id_index;
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
inject->tool.auxtrace = perf_event__process_auxtrace;
inject->tool.aux = perf_event__drop_aux;
inject->tool.itrace_start = perf_event__drop_aux,
inject->tool.ordered_events = true;
inject->tool.ordering_requires_timestamps = true;
/* Allow space in the header for new attributes */
output_data_offset = 4096;
if (inject->strip)
strip_init(inject);
}
if (!inject->itrace_synth_opts.set)
......@@ -535,11 +634,28 @@ static int __cmd_inject(struct perf_inject *inject)
}
/*
* The AUX areas have been removed and replaced with
* synthesized hardware events, so clear the feature flag.
* synthesized hardware events, so clear the feature flag and
* remove the evsel.
*/
if (inject->itrace_synth_opts.set)
if (inject->itrace_synth_opts.set) {
struct perf_evsel *evsel;
perf_header__clear_feat(&session->header,
HEADER_AUXTRACE);
if (inject->itrace_synth_opts.last_branch)
perf_header__set_feat(&session->header,
HEADER_BRANCH_STACK);
evsel = perf_evlist__id2evsel_strict(session->evlist,
inject->aux_id);
if (evsel) {
pr_debug("Deleting %s\n",
perf_evsel__name(evsel));
perf_evlist__remove(session->evlist, evsel);
perf_evsel__delete(evsel);
}
if (inject->strip)
strip_fini(inject);
}
session->header.data_offset = output_data_offset;
session->header.data_size = inject->bytes_written;
perf_session__write_header(session, session->evlist, fd, true);
......@@ -604,6 +720,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
NULL, "opts", "Instruction Tracing options",
itrace_parse_synth_opts),
OPT_BOOLEAN(0, "strip", &inject.strip,
"strip non-synthesized events (use with --itrace)"),
OPT_END()
};
const char * const inject_usage[] = {
......@@ -619,6 +737,11 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(inject_usage, options);
if (inject.strip && !inject.itrace_synth_opts.set) {
pr_err("--strip option requires --itrace option\n");
return -1;
}
if (perf_data_file__open(&inject.output)) {
perror("failed to create output file");
return -1;
......
......@@ -163,14 +163,21 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
goto out_put;
if (sort__mode == SORT_MODE__BRANCH)
if (sort__mode == SORT_MODE__BRANCH) {
/*
* A non-synthesized event might not have a branch stack if
* branch stacks have been synthesized (using itrace options).
*/
if (!sample->branch_stack)
goto out_put;
iter.ops = &hist_iter_branch;
else if (rep->mem_mode)
} else if (rep->mem_mode) {
iter.ops = &hist_iter_mem;
else if (symbol_conf.cumulate_callchain)
} else if (symbol_conf.cumulate_callchain) {
iter.ops = &hist_iter_cumulative;
else
} else {
iter.ops = &hist_iter_normal;
}
if (al.map != NULL)
al.map->dso->hit = 1;
......@@ -214,6 +221,15 @@ static int report__setup_sample_type(struct report *rep)
u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
bool is_pipe = perf_data_file__is_pipe(session->file);
if (session->itrace_synth_opts->callchain ||
(!is_pipe &&
perf_header__has_feat(&session->header, HEADER_AUXTRACE) &&
!session->itrace_synth_opts->set))
sample_type |= PERF_SAMPLE_CALLCHAIN;
if (session->itrace_synth_opts->last_branch)
sample_type |= PERF_SAMPLE_BRANCH_STACK;
if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
if (sort__has_parent) {
ui__error("Selected --sort parent, but no "
......@@ -793,6 +809,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
if (report.inverted_callchain)
callchain_param.order = ORDER_CALLER;
if (itrace_synth_opts.callchain &&
(int)itrace_synth_opts.callchain_sz > report.max_stack)
report.max_stack = itrace_synth_opts.callchain_sz;
if (!input_name || !strlen(input_name)) {
if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
input_name = "-";
......@@ -820,6 +840,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
if (itrace_synth_opts.last_branch)
has_br_stack = true;
/*
* Branch mode is a tristate:
* -1 means default, so decide based on the file having branch data.
......
......@@ -29,9 +29,12 @@ static bool no_callchain;
static bool latency_format;
static bool system_wide;
static bool print_flags;
static bool nanosecs;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
enum perf_output_field {
PERF_OUTPUT_COMM = 1U << 0,
PERF_OUTPUT_TID = 1U << 1,
......@@ -415,6 +418,9 @@ static void print_sample_start(struct perf_sample *sample,
secs = nsecs / NSECS_PER_SEC;
nsecs -= secs * NSECS_PER_SEC;
usecs = nsecs / NSECS_PER_USEC;
if (nanosecs)
printf("%5lu.%09llu: ", secs, nsecs);
else
printf("%5lu.%06lu: ", secs, usecs);
}
}
......@@ -471,7 +477,7 @@ static void print_sample_bts(union perf_event *event,
}
}
perf_evsel__print_ip(evsel, sample, al, print_opts,
PERF_MAX_STACK_DEPTH);
scripting_max_stack);
}
/* print branch_to information */
......@@ -548,7 +554,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
perf_evsel__print_ip(evsel, sample, al,
output[attr->type].print_ip_opts,
PERF_MAX_STACK_DEPTH);
scripting_max_stack);
}
if (PRINT_FIELD(IREGS))
......@@ -1695,6 +1701,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
"Show context switch events (if recorded)"),
OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
OPT_BOOLEAN(0, "ns", &nanosecs,
"Use 9 decimal places when displaying time"),
OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
"Instruction Tracing options",
itrace_parse_synth_opts),
......@@ -1740,6 +1748,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
}
}
if (itrace_synth_opts.callchain &&
itrace_synth_opts.callchain_sz > scripting_max_stack)
scripting_max_stack = itrace_synth_opts.callchain_sz;
/* make sure PERF_EXEC_PATH is set for scripts */
perf_set_argv_exec_path(perf_exec_path());
......
......@@ -655,7 +655,7 @@ static int symbol_filter(struct map *map, struct symbol *sym)
{
const char *name = sym->name;
if (!map->dso->kernel)
if (!__map__is_kernel(map))
return 0;
/*
* ppc64 uses function descriptors and appends a '.' to the
......
......@@ -61,6 +61,142 @@ import datetime
#
# An example of using the database is provided by the script
# call-graph-from-postgresql.py. Refer to that script for details.
#
# Tables:
#
# The tables largely correspond to perf tools' data structures. They are largely self-explanatory.
#
# samples
#
# 'samples' is the main table. It represents what instruction was executing at a point in time
# when something (a selected event) happened. The memory address is the instruction pointer or 'ip'.
#
# calls
#
# 'calls' represents function calls and is related to 'samples' by 'call_id' and 'return_id'.
# 'calls' is only created when the 'calls' option to this script is specified.
#
# call_paths
#
# 'call_paths' represents all the call stacks. Each 'call' has an associated record in 'call_paths'.
# 'calls_paths' is only created when the 'calls' option to this script is specified.
#
# branch_types
#
# 'branch_types' provides descriptions for each type of branch.
#
# comm_threads
#
# 'comm_threads' shows how 'comms' relates to 'threads'.
#
# comms
#
# 'comms' contains a record for each 'comm' - the name given to the executable that is running.
#
# dsos
#
# 'dsos' contains a record for each executable file or library.
#
# machines
#
# 'machines' can be used to distinguish virtual machines if virtualization is supported.
#
# selected_events
#
# 'selected_events' contains a record for each kind of event that has been sampled.
#
# symbols
#
# 'symbols' contains a record for each symbol. Only symbols that have samples are present.
#
# threads
#
# 'threads' contains a record for each thread.
#
# Views:
#
# Most of the tables have views for more friendly display. The views are:
#
# calls_view
# call_paths_view
# comm_threads_view
# dsos_view
# machines_view
# samples_view
# symbols_view
# threads_view
#
# More examples of browsing the database with psql:
# Note that some of the examples are not the most optimal SQL query.
# Note that call information is only available if the script's 'calls' option has been used.
#
# Top 10 function calls (not aggregated by symbol):
#
# SELECT * FROM calls_view ORDER BY elapsed_time DESC LIMIT 10;
#
# Top 10 function calls (aggregated by symbol):
#
# SELECT symbol_id,(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,
# SUM(elapsed_time) AS tot_elapsed_time,SUM(branch_count) AS tot_branch_count
# FROM calls_view GROUP BY symbol_id ORDER BY tot_elapsed_time DESC LIMIT 10;
#
# Note that the branch count gives a rough estimation of cpu usage, so functions
# that took a long time but have a relatively low branch count must have spent time
# waiting.
#
# Find symbols by pattern matching on part of the name (e.g. names containing 'alloc'):
#
# SELECT * FROM symbols_view WHERE name LIKE '%alloc%';
#
# Top 10 function calls for a specific symbol (e.g. whose symbol_id is 187):
#
# SELECT * FROM calls_view WHERE symbol_id = 187 ORDER BY elapsed_time DESC LIMIT 10;
#
# Show function calls made by function in the same context (i.e. same call path) (e.g. one with call_path_id 254):
#
# SELECT * FROM calls_view WHERE parent_call_path_id = 254;
#
# Show branches made during a function call (e.g. where call_id is 29357 and return_id is 29370 and tid is 29670)
#
# SELECT * FROM samples_view WHERE id >= 29357 AND id <= 29370 AND tid = 29670 AND event LIKE 'branches%';
#
# Show transactions:
#
# SELECT * FROM samples_view WHERE event = 'transactions';
#
# Note transaction start has 'in_tx' true whereas, transaction end has 'in_tx' false.
# Transaction aborts have branch_type_name 'transaction abort'
#
# Show transaction aborts:
#
# SELECT * FROM samples_view WHERE event = 'transactions' AND branch_type_name = 'transaction abort';
#
# To print a call stack requires walking the call_paths table. For example this python script:
# #!/usr/bin/python2
#
# import sys
# from PySide.QtSql import *
#
# if __name__ == '__main__':
# if (len(sys.argv) < 3):
# print >> sys.stderr, "Usage is: printcallstack.py <database name> <call_path_id>"
# raise Exception("Too few arguments")
# dbname = sys.argv[1]
# call_path_id = sys.argv[2]
# db = QSqlDatabase.addDatabase('QPSQL')
# db.setDatabaseName(dbname)
# if not db.open():
# raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
# query = QSqlQuery(db)
# print " id ip symbol_id symbol dso_id dso_short_name"
# while call_path_id != 0 and call_path_id != 1:
# ret = query.exec_('SELECT * FROM call_paths_view WHERE id = ' + str(call_path_id))
# if not ret:
# raise Exception("Query failed: " + query.lastError().text())
# if not query.next():
# raise Exception("Query failed")
# print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5))
# call_path_id = query.value(6)
from PySide.QtSql import *
......@@ -244,6 +380,91 @@ if perf_db_export_calls:
'parent_call_path_id bigint,'
'flags integer)')
do_query(query, 'CREATE VIEW machines_view AS '
'SELECT '
'id,'
'pid,'
'root_dir,'
'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest'
' FROM machines')
do_query(query, 'CREATE VIEW dsos_view AS '
'SELECT '
'id,'
'machine_id,'
'(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
'short_name,'
'long_name,'
'build_id'
' FROM dsos')
do_query(query, 'CREATE VIEW symbols_view AS '
'SELECT '
'id,'
'name,'
'(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,'
'dso_id,'
'sym_start,'
'sym_end,'
'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding'
' FROM symbols')
do_query(query, 'CREATE VIEW threads_view AS '
'SELECT '
'id,'
'machine_id,'
'(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
'process_id,'
'pid,'
'tid'
' FROM threads')
do_query(query, 'CREATE VIEW comm_threads_view AS '
'SELECT '
'comm_id,'
'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
'thread_id,'
'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
' FROM comm_threads')
if perf_db_export_calls:
do_query(query, 'CREATE VIEW call_paths_view AS '
'SELECT '
'c.id,'
'to_hex(c.ip) AS ip,'
'c.symbol_id,'
'(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,'
'(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,'
'(SELECT dso FROM symbols_view WHERE id = c.symbol_id) AS dso_short_name,'
'c.parent_id,'
'to_hex(p.ip) AS parent_ip,'
'p.symbol_id AS parent_symbol_id,'
'(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,'
'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
'(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name'
' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
do_query(query, 'CREATE VIEW calls_view AS '
'SELECT '
'calls.id,'
'thread_id,'
'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
'(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
'call_path_id,'
'to_hex(ip) AS ip,'
'symbol_id,'
'(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
'call_time,'
'return_time,'
'return_time - call_time AS elapsed_time,'
'branch_count,'
'call_id,'
'return_id,'
'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,'
'parent_call_path_id'
' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
do_query(query, 'CREATE VIEW samples_view AS '
'SELECT '
'id,'
......
......@@ -1527,7 +1527,7 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act,
static int
do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
{
struct dso *dso = act->dso;
struct map *map = act->ms.map;
if (browser->hists->dso_filter) {
pstack__remove(browser->pstack, &browser->hists->dso_filter);
......@@ -1535,11 +1535,11 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
browser->hists->dso_filter = NULL;
ui_helpline__pop();
} else {
if (dso == NULL)
if (map == NULL)
return 0;
ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
dso->kernel ? "the Kernel" : dso->short_name);
browser->hists->dso_filter = dso;
__map__is_kernel(map) ? "the Kernel" : map->dso->short_name);
browser->hists->dso_filter = map->dso;
perf_hpp__set_elide(HISTC_DSO, true);
pstack__push(browser->pstack, &browser->hists->dso_filter);
}
......@@ -1551,17 +1551,18 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
static int
add_dso_opt(struct hist_browser *browser, struct popup_action *act,
char **optstr, struct dso *dso)
char **optstr, struct map *map)
{
if (dso == NULL)
if (map == NULL)
return 0;
if (asprintf(optstr, "Zoom %s %s DSO",
browser->hists->dso_filter ? "out of" : "into",
dso->kernel ? "the Kernel" : dso->short_name) < 0)
__map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0)
return 0;
act->dso = dso;
act->ms.map = map;
act->dso = map->dso;
act->fn = do_zoom_dso;
return 1;
}
......@@ -1814,6 +1815,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
while (1) {
struct thread *thread = NULL;
struct dso *dso = NULL;
struct map *map = NULL;
int choice = 0;
int socked_id = -1;
......@@ -1823,7 +1825,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (browser->he_selection != NULL) {
thread = hist_browser__selected_thread(browser);
dso = browser->selection->map ? browser->selection->map->dso : NULL;
map = browser->selection->map;
if (map)
dso = map->dso;
socked_id = browser->he_selection->socket;
}
switch (key) {
......@@ -2014,7 +2018,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
nr_options += add_thread_opt(browser, &actions[nr_options],
&options[nr_options], thread);
nr_options += add_dso_opt(browser, &actions[nr_options],
&options[nr_options], dso);
&options[nr_options], map);
nr_options += add_map_opt(browser, &actions[nr_options],
&options[nr_options],
browser->selection ?
......
......@@ -926,6 +926,8 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool,
#define PERF_ITRACE_DEFAULT_PERIOD 100000
#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16
#define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024
#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64
#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
{
......@@ -936,6 +938,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
}
/*
......@@ -950,6 +953,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
const char *p;
char *endptr;
bool period_type_set = false;
bool period_set = false;
synth_opts->set = true;
......@@ -971,6 +975,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
p += 1;
if (isdigit(*p)) {
synth_opts->period = strtoull(p, &endptr, 10);
period_set = true;
p = endptr;
while (*p == ' ' || *p == ',')
p += 1;
......@@ -1041,6 +1046,23 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
synth_opts->callchain_sz = val;
}
break;
case 'l':
synth_opts->last_branch = true;
synth_opts->last_branch_sz =
PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
while (*p == ' ' || *p == ',')
p += 1;
if (isdigit(*p)) {
unsigned int val;
val = strtoul(p, &endptr, 10);
p = endptr;
if (!val ||
val > PERF_ITRACE_MAX_LAST_BRANCH_SZ)
goto out_err;
synth_opts->last_branch_sz = val;
}
break;
case ' ':
case ',':
break;
......@@ -1053,7 +1075,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
if (!period_type_set)
synth_opts->period_type =
PERF_ITRACE_DEFAULT_PERIOD_TYPE;
if (!synth_opts->period)
if (!period_set)
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
}
......
......@@ -63,7 +63,9 @@ enum itrace_period_type {
* @calls: limit branch samples to calls (can be combined with @returns)
* @returns: limit branch samples to returns (can be combined with @calls)
* @callchain: add callchain to 'instructions' events
* @last_branch: add branch context to 'instruction' events
* @callchain_sz: maximum callchain size
* @last_branch_sz: branch context size
* @period: 'instructions' events period
* @period_type: 'instructions' events period type
*/
......@@ -79,7 +81,9 @@ struct itrace_synth_opts {
bool calls;
bool returns;
bool callchain;
bool last_branch;
unsigned int callchain_sz;
unsigned int last_branch_sz;
unsigned long long period;
enum itrace_period_type period_type;
};
......
......@@ -378,7 +378,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
for (pos = maps__first(maps); pos; pos = map__next(pos)) {
size_t size;
if (pos->dso->kernel)
if (__map__is_kernel(pos))
continue;
size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
......
......@@ -257,6 +257,7 @@ struct events_stats {
u64 total_non_filtered_period;
u64 total_lost;
u64 total_lost_samples;
u64 total_aux_lost;
u64 total_invalid_chains;
u32 nr_events[PERF_RECORD_HEADER_MAX];
u32 nr_non_filtered_samples;
......
......@@ -165,6 +165,13 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
__perf_evlist__propagate_maps(evlist, entry);
}
void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
{
evsel->evlist = NULL;
list_del_init(&evsel->node);
evlist->nr_entries -= 1;
}
void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
struct list_head *list)
{
......@@ -617,6 +624,21 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
return NULL;
}
struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
u64 id)
{
struct perf_sample_id *sid;
if (!id)
return NULL;
sid = perf_evlist__id2sid(evlist, id);
if (sid)
return sid->evsel;
return NULL;
}
static int perf_evlist__event2id(struct perf_evlist *evlist,
union perf_event *event, u64 *id)
{
......
......@@ -73,6 +73,7 @@ void perf_evlist__exit(struct perf_evlist *evlist);
void perf_evlist__delete(struct perf_evlist *evlist);
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel);
int perf_evlist__add_default(struct perf_evlist *evlist);
int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
struct perf_event_attr *attrs, size_t nr_attrs);
......@@ -104,6 +105,8 @@ int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mas
int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
u64 id);
struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
......
......@@ -695,7 +695,7 @@ iter_finish_normal_entry(struct hist_entry_iter *iter,
}
static int
iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
struct hist_entry **he_cache;
......@@ -707,7 +707,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
* cumulated only one time to prevent entries more than 100%
* overhead.
*/
he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1));
he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1));
if (he_cache == NULL)
return -ENOMEM;
......@@ -868,6 +868,8 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
if (err)
return err;
iter->max_stack = max_stack_depth;
err = iter->ops->prepare_entry(iter, al);
if (err)
goto out;
......
......@@ -90,6 +90,7 @@ struct hist_entry_iter {
int curr;
bool hide_unresolved;
int max_stack;
struct perf_evsel *evsel;
struct perf_sample *sample;
......
......@@ -650,7 +650,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
if (data->from_mtc && timestamp < data->timestamp &&
data->timestamp - timestamp < decoder->tsc_slip)
return 1;
while (timestamp < data->timestamp)
if (timestamp < data->timestamp)
timestamp += (1ULL << 56);
if (pkt_info->last_packet_type != INTEL_PT_CYC) {
if (data->from_mtc)
......@@ -1191,7 +1191,7 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
timestamp);
timestamp = decoder->timestamp;
}
while (timestamp < decoder->timestamp) {
if (timestamp < decoder->timestamp) {
intel_pt_log_to("Wraparound timestamp", timestamp);
timestamp += (1ULL << 56);
decoder->tsc_timestamp = timestamp;
......
......@@ -29,18 +29,18 @@
static FILE *f;
static char log_name[MAX_LOG_NAME];
static bool enable_logging;
bool intel_pt_enable_logging;
void intel_pt_log_enable(void)
{
enable_logging = true;
intel_pt_enable_logging = true;
}
void intel_pt_log_disable(void)
{
if (f)
fflush(f);
enable_logging = false;
intel_pt_enable_logging = false;
}
void intel_pt_log_set_name(const char *name)
......@@ -80,7 +80,7 @@ static void intel_pt_print_no_data(uint64_t pos, int indent)
static int intel_pt_log_open(void)
{
if (!enable_logging)
if (!intel_pt_enable_logging)
return -1;
if (f)
......@@ -91,14 +91,14 @@ static int intel_pt_log_open(void)
f = fopen(log_name, "w+");
if (!f) {
enable_logging = false;
intel_pt_enable_logging = false;
return -1;
}
return 0;
}
void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
uint64_t pos, const unsigned char *buf)
{
char desc[INTEL_PT_PKT_DESC_MAX];
......@@ -111,7 +111,7 @@ void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
fprintf(f, "%s\n", desc);
}
void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
{
char desc[INTEL_PT_INSN_DESC_MAX];
size_t len = intel_pt_insn->length;
......@@ -128,7 +128,8 @@ void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
fprintf(f, "Bad instruction!\n");
}
void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
uint64_t ip)
{
char desc[INTEL_PT_INSN_DESC_MAX];
......@@ -142,7 +143,7 @@ void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
fprintf(f, "Bad instruction!\n");
}
void intel_pt_log(const char *fmt, ...)
void __intel_pt_log(const char *fmt, ...)
{
va_list args;
......
......@@ -25,20 +25,46 @@ void intel_pt_log_enable(void);
void intel_pt_log_disable(void);
void intel_pt_log_set_name(const char *name);
void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
uint64_t pos, const unsigned char *buf);
struct intel_pt_insn;
void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
uint64_t ip);
__attribute__((format(printf, 1, 2)))
void intel_pt_log(const char *fmt, ...);
void __intel_pt_log(const char *fmt, ...);
#define intel_pt_log(fmt, ...) \
do { \
if (intel_pt_enable_logging) \
__intel_pt_log(fmt, ##__VA_ARGS__); \
} while (0)
#define intel_pt_log_packet(arg, ...) \
do { \
if (intel_pt_enable_logging) \
__intel_pt_log_packet(arg, ##__VA_ARGS__); \
} while (0)
#define intel_pt_log_insn(arg, ...) \
do { \
if (intel_pt_enable_logging) \
__intel_pt_log_insn(arg, ##__VA_ARGS__); \
} while (0)
#define intel_pt_log_insn_no_data(arg, ...) \
do { \
if (intel_pt_enable_logging) \
__intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \
} while (0)
#define x64_fmt "0x%" PRIx64
extern bool intel_pt_enable_logging;
static inline void intel_pt_log_at(const char *msg, uint64_t u)
{
intel_pt_log("%s at " x64_fmt "\n", msg, u);
......
......@@ -22,6 +22,7 @@
#include "../perf.h"
#include "session.h"
#include "machine.h"
#include "sort.h"
#include "tool.h"
#include "event.h"
#include "evlist.h"
......@@ -63,6 +64,7 @@ struct intel_pt {
bool data_queued;
bool est_tsc;
bool sync_switch;
bool mispred_all;
int have_sched_switch;
u32 pmu_type;
u64 kernel_start;
......@@ -115,6 +117,9 @@ struct intel_pt_queue {
void *decoder;
const struct intel_pt_state *state;
struct ip_callchain *chain;
struct branch_stack *last_branch;
struct branch_stack *last_branch_rb;
size_t last_branch_pos;
union perf_event *event_buf;
bool on_heap;
bool stop;
......@@ -675,6 +680,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
goto out_free;
}
if (pt->synth_opts.last_branch) {
size_t sz = sizeof(struct branch_stack);
sz += pt->synth_opts.last_branch_sz *
sizeof(struct branch_entry);
ptq->last_branch = zalloc(sz);
if (!ptq->last_branch)
goto out_free;
ptq->last_branch_rb = zalloc(sz);
if (!ptq->last_branch_rb)
goto out_free;
}
ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
if (!ptq->event_buf)
goto out_free;
......@@ -720,7 +738,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
if (!params.period) {
params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
params.period = 1000;
params.period = 1;
}
}
......@@ -732,6 +750,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
out_free:
zfree(&ptq->event_buf);
zfree(&ptq->last_branch);
zfree(&ptq->last_branch_rb);
zfree(&ptq->chain);
free(ptq);
return NULL;
......@@ -746,6 +766,8 @@ static void intel_pt_free_queue(void *priv)
thread__zput(ptq->thread);
intel_pt_decoder_free(ptq->decoder);
zfree(&ptq->event_buf);
zfree(&ptq->last_branch);
zfree(&ptq->last_branch_rb);
zfree(&ptq->chain);
free(ptq);
}
......@@ -876,6 +898,58 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
return 0;
}
static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
{
struct branch_stack *bs_src = ptq->last_branch_rb;
struct branch_stack *bs_dst = ptq->last_branch;
size_t nr = 0;
bs_dst->nr = bs_src->nr;
if (!bs_src->nr)
return;
nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
memcpy(&bs_dst->entries[0],
&bs_src->entries[ptq->last_branch_pos],
sizeof(struct branch_entry) * nr);
if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
memcpy(&bs_dst->entries[nr],
&bs_src->entries[0],
sizeof(struct branch_entry) * ptq->last_branch_pos);
}
}
static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
{
ptq->last_branch_pos = 0;
ptq->last_branch_rb->nr = 0;
}
static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
{
const struct intel_pt_state *state = ptq->state;
struct branch_stack *bs = ptq->last_branch_rb;
struct branch_entry *be;
if (!ptq->last_branch_pos)
ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
ptq->last_branch_pos -= 1;
be = &bs->entries[ptq->last_branch_pos];
be->from = state->from_ip;
be->to = state->to_ip;
be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
/* No support for mispredict */
be->flags.mispred = ptq->pt->mispred_all;
if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
bs->nr += 1;
}
static int intel_pt_inject_event(union perf_event *event,
struct perf_sample *sample, u64 type,
bool swapped)
......@@ -890,6 +964,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
struct dummy_branch_stack {
u64 nr;
struct branch_entry entries;
} dummy_bs;
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = PERF_RECORD_MISC_USER;
......@@ -909,8 +990,20 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.flags = ptq->flags;
sample.insn_len = ptq->insn_len;
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
/*
* perf report cannot handle events without a branch stack when using
* SORT_MODE__BRANCH so make a dummy one.
*/
if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
dummy_bs = (struct dummy_branch_stack){
.nr = 1,
.entries = {
.from = sample.ip,
.to = sample.addr,
},
};
sample.branch_stack = (struct branch_stack *)&dummy_bs;
}
if (pt->synth_opts.inject) {
ret = intel_pt_inject_event(event, &sample,
......@@ -961,6 +1054,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.callchain = ptq->chain;
}
if (pt->synth_opts.last_branch) {
intel_pt_copy_last_branch_rb(ptq);
sample.branch_stack = ptq->last_branch;
}
if (pt->synth_opts.inject) {
ret = intel_pt_inject_event(event, &sample,
pt->instructions_sample_type,
......@@ -974,6 +1072,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
ret);
if (pt->synth_opts.last_branch)
intel_pt_reset_last_branch_rb(ptq);
return ret;
}
......@@ -1008,6 +1109,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
sample.callchain = ptq->chain;
}
if (pt->synth_opts.last_branch) {
intel_pt_copy_last_branch_rb(ptq);
sample.branch_stack = ptq->last_branch;
}
if (pt->synth_opts.inject) {
ret = intel_pt_inject_event(event, &sample,
pt->transactions_sample_type,
......@@ -1021,6 +1127,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
ret);
if (pt->synth_opts.callchain)
intel_pt_reset_last_branch_rb(ptq);
return ret;
}
......@@ -1116,6 +1225,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return err;
}
if (pt->synth_opts.last_branch)
intel_pt_update_last_branch_rb(ptq);
if (!pt->sync_switch)
return 0;
......@@ -1763,6 +1875,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
pt->instructions_sample_period = attr.sample_period;
if (pt->synth_opts.callchain)
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
if (pt->synth_opts.last_branch)
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
err = intel_pt_synth_event(session, &attr, id);
......@@ -1782,6 +1896,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
attr.sample_period = 1;
if (pt->synth_opts.callchain)
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
if (pt->synth_opts.last_branch)
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
err = intel_pt_synth_event(session, &attr, id);
......@@ -1808,6 +1924,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
attr.sample_period = 1;
attr.sample_type |= PERF_SAMPLE_ADDR;
attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
err = intel_pt_synth_event(session, &attr, id);
......@@ -1852,6 +1969,16 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist)
return false;
}
static int intel_pt_perf_config(const char *var, const char *value, void *data)
{
struct intel_pt *pt = data;
if (!strcmp(var, "intel-pt.mispred-all"))
pt->mispred_all = perf_config_bool(var, value);
return 0;
}
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
......@@ -1896,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (!pt)
return -ENOMEM;
perf_config(intel_pt_perf_config, pt);
err = auxtrace_queues__init(&pt->queues);
if (err)
goto err_free;
......
......@@ -27,6 +27,8 @@
extern int parse_events_debug;
#endif
int parse_events_parse(void *data, void *scanner);
static int get_config_terms(struct list_head *head_config,
struct list_head *head_terms __maybe_unused);
static struct perf_pmu_event_symbol *perf_pmu_events_list;
/*
......@@ -416,7 +418,8 @@ static void tracepoint_error(struct parse_events_error *error, int err,
static int add_tracepoint(struct list_head *list, int *idx,
char *sys_name, char *evt_name,
struct parse_events_error *error __maybe_unused)
struct parse_events_error *error __maybe_unused,
struct list_head *head_config)
{
struct perf_evsel *evsel;
......@@ -426,13 +429,22 @@ static int add_tracepoint(struct list_head *list, int *idx,
return PTR_ERR(evsel);
}
if (head_config) {
LIST_HEAD(config_terms);
if (get_config_terms(head_config, &config_terms))
return -ENOMEM;
list_splice(&config_terms, &evsel->config_terms);
}
list_add_tail(&evsel->node, list);
return 0;
}
static int add_tracepoint_multi_event(struct list_head *list, int *idx,
char *sys_name, char *evt_name,
struct parse_events_error *error)
struct parse_events_error *error,
struct list_head *head_config)
{
char evt_path[MAXPATHLEN];
struct dirent *evt_ent;
......@@ -456,7 +468,8 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
if (!strglobmatch(evt_ent->d_name, evt_name))
continue;
ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, error);
ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name,
error, head_config);
}
closedir(evt_dir);
......@@ -465,16 +478,20 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
static int add_tracepoint_event(struct list_head *list, int *idx,
char *sys_name, char *evt_name,
struct parse_events_error *error)
struct parse_events_error *error,
struct list_head *head_config)
{
return strpbrk(evt_name, "*?") ?
add_tracepoint_multi_event(list, idx, sys_name, evt_name, error) :
add_tracepoint(list, idx, sys_name, evt_name, error);
add_tracepoint_multi_event(list, idx, sys_name, evt_name,
error, head_config) :
add_tracepoint(list, idx, sys_name, evt_name,
error, head_config);
}
static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
char *sys_name, char *evt_name,
struct parse_events_error *error)
struct parse_events_error *error,
struct list_head *head_config)
{
struct dirent *events_ent;
DIR *events_dir;
......@@ -498,23 +515,13 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
continue;
ret = add_tracepoint_event(list, idx, events_ent->d_name,
evt_name, error);
evt_name, error, head_config);
}
closedir(events_dir);
return ret;
}
int parse_events_add_tracepoint(struct list_head *list, int *idx,
char *sys, char *event,
struct parse_events_error *error)
{
if (strpbrk(sys, "*?"))
return add_tracepoint_multi_sys(list, idx, sys, event, error);
else
return add_tracepoint_event(list, idx, sys, event, error);
}
static int
parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
{
......@@ -599,7 +606,11 @@ static int check_type_val(struct parse_events_term *term,
return -EINVAL;
}
static int config_term(struct perf_event_attr *attr,
typedef int config_term_func_t(struct perf_event_attr *attr,
struct parse_events_term *term,
struct parse_events_error *err);
static int config_term_common(struct perf_event_attr *attr,
struct parse_events_term *term,
struct parse_events_error *err)
{
......@@ -610,12 +621,6 @@ do { \
} while (0)
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_USER:
/*
* Always succeed for sysfs terms, as we dont know
* at this point what type they need to have.
*/
return 0;
case PARSE_EVENTS__TERM_TYPE_CONFIG:
CHECK_TYPE_VAL(NUM);
attr->config = term->val.num;
......@@ -658,6 +663,9 @@ do { \
CHECK_TYPE_VAL(STR);
break;
default:
err->str = strdup("unknown term");
err->idx = term->err_term;
err->help = parse_events_formats_error_string(NULL);
return -EINVAL;
}
......@@ -665,9 +673,44 @@ do { \
#undef CHECK_TYPE_VAL
}
static int config_term_pmu(struct perf_event_attr *attr,
struct parse_events_term *term,
struct parse_events_error *err)
{
if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER)
/*
* Always succeed for sysfs terms, as we dont know
* at this point what type they need to have.
*/
return 0;
else
return config_term_common(attr, term, err);
}
static int config_term_tracepoint(struct perf_event_attr *attr,
struct parse_events_term *term,
struct parse_events_error *err)
{
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
return config_term_common(attr, term, err);
default:
if (err) {
err->idx = term->err_term;
err->str = strdup("unknown term");
err->help = strdup("valid terms: call-graph,stack-size\n");
}
return -EINVAL;
}
return 0;
}
static int config_attr(struct perf_event_attr *attr,
struct list_head *head,
struct parse_events_error *err)
struct parse_events_error *err,
config_term_func_t config_term)
{
struct parse_events_term *term;
......@@ -722,6 +765,27 @@ do { \
return 0;
}
int parse_events_add_tracepoint(struct list_head *list, int *idx,
char *sys, char *event,
struct parse_events_error *error,
struct list_head *head_config)
{
if (head_config) {
struct perf_event_attr attr;
if (config_attr(&attr, head_config, error,
config_term_tracepoint))
return -EINVAL;
}
if (strpbrk(sys, "*?"))
return add_tracepoint_multi_sys(list, idx, sys, event,
error, head_config);
else
return add_tracepoint_event(list, idx, sys, event,
error, head_config);
}
int parse_events_add_numeric(struct parse_events_evlist *data,
struct list_head *list,
u32 type, u64 config,
......@@ -735,7 +799,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data,
attr.config = config;
if (head_config) {
if (config_attr(&attr, head_config, data->error))
if (config_attr(&attr, head_config, data->error,
config_term_common))
return -EINVAL;
if (get_config_terms(head_config, &config_terms))
......@@ -795,7 +860,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
* Configure hardcoded terms first, no need to check
* return value when called with fail == 0 ;)
*/
if (config_attr(&attr, head_config, data->error))
if (config_attr(&attr, head_config, data->error, config_term_pmu))
return -EINVAL;
if (get_config_terms(head_config, &config_terms))
......@@ -1861,3 +1926,29 @@ void parse_events_evlist_error(struct parse_events_evlist *data,
err->str = strdup(str);
WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
}
/*
* Return string contains valid config terms of an event.
* @additional_terms: For terms such as PMU sysfs terms.
*/
char *parse_events_formats_error_string(char *additional_terms)
{
char *str;
static const char *static_terms = "config,config1,config2,name,"
"period,freq,branch_type,time,"
"call-graph,stack-size\n";
/* valid terms */
if (additional_terms) {
if (!asprintf(&str, "valid terms: %s,%s",
additional_terms, static_terms))
goto fail;
} else {
if (!asprintf(&str, "valid terms: %s", static_terms))
goto fail;
}
return str;
fail:
return NULL;
}
......@@ -119,7 +119,8 @@ int parse_events__modifier_group(struct list_head *list, char *event_mod);
int parse_events_name(struct list_head *list, char *name);
int parse_events_add_tracepoint(struct list_head *list, int *idx,
char *sys, char *event,
struct parse_events_error *error);
struct parse_events_error *error,
struct list_head *head_config);
int parse_events_add_numeric(struct parse_events_evlist *data,
struct list_head *list,
u32 type, u64 config,
......@@ -156,5 +157,6 @@ int print_hwcache_events(const char *event_glob, bool name_only);
extern int is_valid_tracepoint(const char *event_string);
int valid_event_mount(const char *eventfs);
char *parse_events_formats_error_string(char *additional_terms);
#endif /* __PERF_PARSE_EVENTS_H */
......@@ -174,7 +174,7 @@ modifier_bp [rwx]{1,3}
<config>{
/*
* Please update formats_error_string any time
* Please update parse_events_formats_error_string any time
* new static term is added.
*/
config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
......
......@@ -67,6 +67,7 @@ static inc_group_count(struct list_head *list,
%type <head> event_legacy_cache
%type <head> event_legacy_mem
%type <head> event_legacy_tracepoint
%type <tracepoint_name> tracepoint_name
%type <head> event_legacy_numeric
%type <head> event_legacy_raw
%type <head> event_def
......@@ -84,6 +85,10 @@ static inc_group_count(struct list_head *list,
u64 num;
struct list_head *head;
struct parse_events_term *term;
struct tracepoint_name {
char *sys;
char *event;
} tracepoint_name;
}
%%
......@@ -368,38 +373,60 @@ PE_PREFIX_MEM PE_VALUE sep_dc
}
event_legacy_tracepoint:
PE_NAME '-' PE_NAME ':' PE_NAME
tracepoint_name
{
struct parse_events_evlist *data = _data;
struct parse_events_error *error = data->error;
struct list_head *list;
char sys_name[128];
snprintf(&sys_name, 128, "%s-%s", $1, $3);
ALLOC_LIST(list);
if (parse_events_add_tracepoint(list, &data->idx, &sys_name, $5, error)) {
if (error)
error->idx = @1.first_column;
if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
error, NULL))
return -1;
}
$$ = list;
}
|
PE_NAME ':' PE_NAME
tracepoint_name '/' event_config '/'
{
struct parse_events_evlist *data = _data;
struct parse_events_error *error = data->error;
struct list_head *list;
ALLOC_LIST(list);
if (parse_events_add_tracepoint(list, &data->idx, $1, $3, error)) {
if (error)
error->idx = @1.first_column;
if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
error, $3))
return -1;
}
$$ = list;
}
tracepoint_name:
PE_NAME '-' PE_NAME ':' PE_NAME
{
char sys_name[128];
struct tracepoint_name tracepoint;
snprintf(&sys_name, 128, "%s-%s", $1, $3);
tracepoint.sys = &sys_name;
tracepoint.event = $5;
$$ = tracepoint;
}
|
PE_NAME ':' PE_NAME
{
struct tracepoint_name tracepoint = {$1, $3};
$$ = tracepoint;
}
event_legacy_numeric:
PE_VALUE ':' PE_VALUE
{
......
......@@ -626,38 +626,26 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
return -1;
}
static char *formats_error_string(struct list_head *formats)
static char *pmu_formats_string(struct list_head *formats)
{
struct perf_pmu_format *format;
char *err, *str;
static const char *static_terms = "config,config1,config2,name,"
"period,freq,branch_type,time,"
"call-graph,stack-size\n";
char *str;
struct strbuf buf;
unsigned i = 0;
if (!asprintf(&str, "valid terms:"))
if (!formats)
return NULL;
strbuf_init(&buf, 0);
/* sysfs exported terms */
list_for_each_entry(format, formats, list) {
char c = i++ ? ',' : ' ';
err = str;
if (!asprintf(&str, "%s%c%s", err, c, format->name))
goto fail;
free(err);
}
list_for_each_entry(format, formats, list)
strbuf_addf(&buf, i++ ? ",%s" : "%s",
format->name);
/* static terms */
err = str;
if (!asprintf(&str, "%s,%s", err, static_terms))
goto fail;
str = strbuf_detach(&buf, NULL);
strbuf_release(&buf);
free(err);
return str;
fail:
free(err);
return NULL;
}
/*
......@@ -693,9 +681,12 @@ static int pmu_config_term(struct list_head *formats,
if (verbose)
printf("Invalid event/parameter '%s'\n", term->config);
if (err) {
char *pmu_term = pmu_formats_string(formats);
err->idx = term->err_term;
err->str = strdup("unknown term");
err->help = formats_error_string(formats);
err->help = parse_events_formats_error_string(pmu_term);
free(pmu_term);
}
return -EINVAL;
}
......
......@@ -319,7 +319,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
if (thread__resolve_callchain(al->thread, evsel,
sample, NULL, NULL,
PERF_MAX_STACK_DEPTH) != 0) {
scripting_max_stack) != 0) {
pr_err("Failed to resolve callchain. Skipping\n");
goto exit;
}
......
......@@ -1101,6 +1101,9 @@ static int machines__deliver_event(struct machines *machines,
case PERF_RECORD_UNTHROTTLE:
return tool->unthrottle(tool, event, sample, machine);
case PERF_RECORD_AUX:
if (tool->aux == perf_event__process_aux &&
(event->aux.flags & PERF_AUX_FLAG_TRUNCATED))
evlist->stats.total_aux_lost += 1;
return tool->aux(tool, event, sample, machine);
case PERF_RECORD_ITRACE_START:
return tool->itrace_start(tool, event, sample, machine);
......@@ -1346,6 +1349,13 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
}
}
if (session->tool->aux == perf_event__process_aux &&
stats->total_aux_lost != 0) {
ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
stats->total_aux_lost,
stats->nr_events[PERF_RECORD_AUX]);
}
if (stats->nr_unknown_events != 0) {
ui__warning("Found %u unknown events!\n\n"
"Is this an older tool processing a perf.data "
......@@ -1790,7 +1800,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
if (thread__resolve_callchain(al->thread, evsel,
sample, NULL, NULL,
PERF_MAX_STACK_DEPTH) != 0) {
stack_depth) != 0) {
if (verbose)
error("Failed to resolve callchain. Skipping\n");
return;
......
......@@ -78,6 +78,8 @@ struct scripting_ops {
int (*generate_script) (struct pevent *pevent, const char *outfile);
};
extern unsigned int scripting_max_stack;
int script_spec_register(const char *spec, struct scripting_ops *ops);
void setup_perl_scripting(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment