Commit 510457ec authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.13-20170718' of...

Merge tag 'perf-core-for-mingo-4.13-20170718' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- Initial support for namespaces, using setns to access files in
  namespaces, grabbing their build-ids, etc. We still need to work
  more to deal with namespaces that vanish before we can get the
  needed data to do analysis, but this should be as good as what is
  in bcc now (Krister Johansen)

- Add header record types to pipe-mode, now this command:

  $ perf record -o - -e cycles sleep 1 | perf report --stdio --header

  Will show the same as in non-pipe mode, i.e. involving a perf.data
  file (David Carrillo-Cisneros)

- Implement a visual marker for fused x86 instructions in the annotate
  TUI browser, available now in 'perf report', more work needed to have
  it available as well in 'perf top' (Jin Yao)

  Further explanation from one of Jin's patches:

       │   ┌──cmpl   $0x0,argp_program_version_hook
 81.93 │   ├──je     20
       │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
       │   │↓ jne    29
       │   │↓ jmp    43
 11.47 │20:└─→cmpxch %esi,0x38a999(%rip)

  That means the cmpl+je is a fused instruction pair and they should be
  considered together.

- Record the branch type and then show statistics and info about
  in callchain entries (Jin Yao)

  Example from one of Jin's patches:

  # perf record -g -j any,save_type
  # perf report --branch-history --stdio --no-children

  38.50%  div.c:45                [.] main                    div
          |
          ---main div.c:42 (RET CROSS_2M cycles:2)
             compute_flag div.c:28 (cycles:2)
             compute_flag div.c:27 (RET CROSS_2M cycles:1)
             rand rand.c:28 (cycles:1)
             rand rand.c:28 (RET CROSS_2M cycles:1)
             __random random.c:298 (cycles:1)
             __random random.c:297 (COND_BWD CROSS_2M cycles:1)
             __random random.c:295 (cycles:1)
             __random random.c:295 (COND_BWD CROSS_2M cycles:1)
             __random random.c:295 (cycles:1)
             __random random.c:295 (RET CROSS_2M cycles:9)

- Beautify the fcntl syscall, which is an interesting one in the sense
  that infrastructure had to be put in place to change the formatters of
  some arguments according to the value in a previous one, i.e. cmd
  dictates how arg and the syscall return will be formatted.
  (Arnaldo Carvalho de Melo

Infrastructure changes:

- 'perf test attr' fixes (Jiri Olsa)

Vendor events changes:

- Add POWER9 PMU events Sukadev (Bhattiprolu)

- Support additional POWER8+ PVR in PMU mapfile (Shriya)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 3bda69c1 b851dd49
......@@ -109,6 +109,9 @@ enum {
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
X86_BR_CALL_STACK = 1 << 16,/* call stack */
X86_BR_IND_JMP = 1 << 17,/* indirect jump */
X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
......@@ -510,6 +513,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].in_tx = 0;
cpuc->lbr_entries[i].abort = 0;
cpuc->lbr_entries[i].cycles = 0;
cpuc->lbr_entries[i].type = 0;
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
......@@ -596,6 +600,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[out].in_tx = in_tx;
cpuc->lbr_entries[out].abort = abort;
cpuc->lbr_entries[out].cycles = cycles;
cpuc->lbr_entries[out].type = 0;
cpuc->lbr_entries[out].reserved = 0;
out++;
}
......@@ -673,6 +678,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_CALL)
mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
mask |= X86_BR_TYPE_SAVE;
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
......@@ -926,6 +935,43 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
return ret;
}
#define X86_BR_TYPE_MAP_MAX 16
static int branch_map[X86_BR_TYPE_MAP_MAX] = {
PERF_BR_CALL, /* X86_BR_CALL */
PERF_BR_RET, /* X86_BR_RET */
PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
PERF_BR_SYSRET, /* X86_BR_SYSRET */
PERF_BR_UNKNOWN, /* X86_BR_INT */
PERF_BR_UNKNOWN, /* X86_BR_IRET */
PERF_BR_COND, /* X86_BR_JCC */
PERF_BR_UNCOND, /* X86_BR_JMP */
PERF_BR_UNKNOWN, /* X86_BR_IRQ */
PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
PERF_BR_UNKNOWN, /* X86_BR_ABORT */
PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
PERF_BR_CALL, /* X86_BR_ZERO_CALL */
PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
PERF_BR_IND, /* X86_BR_IND_JMP */
};
static int
common_branch_type(int type)
{
int i;
type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
if (type) {
i = __ffs(type);
if (i < X86_BR_TYPE_MAP_MAX)
return branch_map[i];
}
return PERF_BR_UNKNOWN;
}
/*
* implement actual branch filter based on user demand.
* Hardware may not exactly satisfy that request, thus
......@@ -942,7 +988,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
bool compress = false;
/* if sampling all branches, then nothing to filter */
if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
return;
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
......@@ -963,6 +1010,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].from = 0;
compress = true;
}
if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
cpuc->lbr_entries[i].type = common_branch_type(type);
}
if (!compress)
......
......@@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */
PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
......@@ -198,9 +200,30 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
PERF_SAMPLE_BRANCH_TYPE_SAVE =
1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
/*
* Common flow change classification
*/
enum {
PERF_BR_UNKNOWN = 0, /* unknown */
PERF_BR_COND = 1, /* conditional */
PERF_BR_UNCOND = 2, /* unconditional */
PERF_BR_IND = 3, /* indirect */
PERF_BR_CALL = 4, /* function call */
PERF_BR_IND_CALL = 5, /* indirect function call */
PERF_BR_RET = 6, /* function return */
PERF_BR_SYSCALL = 7, /* syscall */
PERF_BR_SYSRET = 8, /* syscall return */
PERF_BR_COND_CALL = 9, /* conditional function call */
PERF_BR_COND_RET = 10, /* conditional function return */
PERF_BR_MAX,
};
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
......@@ -1015,6 +1038,7 @@ union perf_mem_data_src {
* in_tx: running in a hardware transaction
* abort: aborting a hardware transaction
* cycles: cycles from last branch (or 0 if not supported)
* type: branch type
*/
struct perf_branch_entry {
__u64 from;
......@@ -1024,7 +1048,8 @@ struct perf_branch_entry {
in_tx:1, /* in transaction */
abort:1, /* transaction abort */
cycles:16, /* cycle count to last branch */
reserved:44;
type:4, /* branch type */
reserved:40;
};
#endif /* _UAPI_LINUX_PERF_EVENT_H */
......@@ -10,3 +10,6 @@
#ifndef __NR_getcpu
# define __NR_getcpu 318
#endif
#ifndef __NR_setns
# define __NR_setns 346
#endif
......@@ -10,3 +10,6 @@
#ifndef __NR_getcpu
# define __NR_getcpu 309
#endif
#ifndef __NR_setns
#define __NR_setns 308
#endif
#ifndef _UAPI_ASM_X86_UNISTD_H
#define _UAPI_ASM_X86_UNISTD_H
/* x32 syscall flag bit */
#define __X32_SYSCALL_BIT 0x40000000
#ifndef __KERNEL__
# ifdef __i386__
# include <asm/unistd_32.h>
# elif defined(__ILP32__)
# include <asm/unistd_x32.h>
# else
# include <asm/unistd_64.h>
# endif
#endif
#endif /* _UAPI_ASM_X86_UNISTD_H */
......@@ -64,7 +64,8 @@ FEATURE_TESTS_BASIC := \
get_cpuid \
bpf \
sched_getcpu \
sdt
sdt \
setns
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
# of all feature tests
......
......@@ -49,7 +49,8 @@ FILES= \
test-sdt.bin \
test-cxx.bin \
test-jvmti.bin \
test-sched_getcpu.bin
test-sched_getcpu.bin \
test-setns.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
......@@ -95,6 +96,9 @@ $(OUTPUT)test-glibc.bin:
$(OUTPUT)test-sched_getcpu.bin:
$(BUILD)
$(OUTPUT)test-setns.bin:
$(BUILD)
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
......
......@@ -153,6 +153,10 @@
# include "test-sdt.c"
#undef main
#define main main_test_setns
# include "test-setns.c"
#undef main
int main(int argc, char *argv[])
{
main_test_libpython();
......@@ -188,6 +192,7 @@ int main(int argc, char *argv[])
main_test_libcrypto();
main_test_sched_getcpu();
main_test_sdt();
main_test_setns();
return 0;
}
#define _GNU_SOURCE
#include <sched.h>
int main(void)
{
return setns(0, 0);
}
#ifndef _ASM_GENERIC_FCNTL_H
#define _ASM_GENERIC_FCNTL_H
#include <linux/types.h>
/*
* FMODE_EXEC is 0x20
* FMODE_NONOTIFY is 0x4000000
* These cannot be used by userspace O_* until internal and external open
* flags are split.
* -Eric Paris
*/
/*
* When introducing new O_* bits, please check its uniqueness in fcntl_init().
*/
#define O_ACCMODE 00000003
#define O_RDONLY 00000000
#define O_WRONLY 00000001
#define O_RDWR 00000002
#ifndef O_CREAT
#define O_CREAT 00000100 /* not fcntl */
#endif
#ifndef O_EXCL
#define O_EXCL 00000200 /* not fcntl */
#endif
#ifndef O_NOCTTY
#define O_NOCTTY 00000400 /* not fcntl */
#endif
#ifndef O_TRUNC
#define O_TRUNC 00001000 /* not fcntl */
#endif
#ifndef O_APPEND
#define O_APPEND 00002000
#endif
#ifndef O_NONBLOCK
#define O_NONBLOCK 00004000
#endif
#ifndef O_DSYNC
#define O_DSYNC 00010000 /* used to be O_SYNC, see below */
#endif
#ifndef FASYNC
#define FASYNC 00020000 /* fcntl, for BSD compatibility */
#endif
#ifndef O_DIRECT
#define O_DIRECT 00040000 /* direct disk access hint */
#endif
#ifndef O_LARGEFILE
#define O_LARGEFILE 00100000
#endif
#ifndef O_DIRECTORY
#define O_DIRECTORY 00200000 /* must be a directory */
#endif
#ifndef O_NOFOLLOW
#define O_NOFOLLOW 00400000 /* don't follow links */
#endif
#ifndef O_NOATIME
#define O_NOATIME 01000000
#endif
#ifndef O_CLOEXEC
#define O_CLOEXEC 02000000 /* set close_on_exec */
#endif
/*
* Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
* the O_SYNC flag. We continue to use the existing numerical value
* for O_DSYNC semantics now, but using the correct symbolic name for it.
* This new value is used to request true Posix O_SYNC semantics. It is
* defined in this strange way to make sure applications compiled against
* new headers get at least O_DSYNC semantics on older kernels.
*
* This has the nice side-effect that we can simply test for O_DSYNC
* wherever we do not care if O_DSYNC or O_SYNC is used.
*
* Note: __O_SYNC must never be used directly.
*/
#ifndef O_SYNC
#define __O_SYNC 04000000
#define O_SYNC (__O_SYNC|O_DSYNC)
#endif
#ifndef O_PATH
#define O_PATH 010000000
#endif
#ifndef __O_TMPFILE
#define __O_TMPFILE 020000000
#endif
/* a horrid kludge trying to make sure that this will fail on old kernels */
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
#ifndef O_NDELAY
#define O_NDELAY O_NONBLOCK
#endif
#define F_DUPFD 0 /* dup */
#define F_GETFD 1 /* get close_on_exec */
#define F_SETFD 2 /* set/clear close_on_exec */
#define F_GETFL 3 /* get file->f_flags */
#define F_SETFL 4 /* set file->f_flags */
#ifndef F_GETLK
#define F_GETLK 5
#define F_SETLK 6
#define F_SETLKW 7
#endif
#ifndef F_SETOWN
#define F_SETOWN 8 /* for sockets. */
#define F_GETOWN 9 /* for sockets. */
#endif
#ifndef F_SETSIG
#define F_SETSIG 10 /* for sockets. */
#define F_GETSIG 11 /* for sockets. */
#endif
#ifndef CONFIG_64BIT
#ifndef F_GETLK64
#define F_GETLK64 12 /* using 'struct flock64' */
#define F_SETLK64 13
#define F_SETLKW64 14
#endif
#endif
#ifndef F_SETOWN_EX
#define F_SETOWN_EX 15
#define F_GETOWN_EX 16
#endif
#ifndef F_GETOWNER_UIDS
#define F_GETOWNER_UIDS 17
#endif
/*
* Open File Description Locks
*
* Usually record locks held by a process are released on *any* close and are
* not inherited across a fork().
*
* These cmd values will set locks that conflict with process-associated
* record locks, but are "owned" by the open file description, not the
* process. This means that they are inherited across fork() like BSD (flock)
* locks, and they are only released automatically when the last reference to
* the the open file against which they were acquired is put.
*/
#define F_OFD_GETLK 36
#define F_OFD_SETLK 37
#define F_OFD_SETLKW 38
#define F_OWNER_TID 0
#define F_OWNER_PID 1
#define F_OWNER_PGRP 2
struct f_owner_ex {
int type;
__kernel_pid_t pid;
};
/* for F_[GET|SET]FL */
#define FD_CLOEXEC 1 /* actually anything with low bit set goes */
/* for posix fcntl() and lockf() */
#ifndef F_RDLCK
#define F_RDLCK 0
#define F_WRLCK 1
#define F_UNLCK 2
#endif
/* for old implementation of bsd flock () */
#ifndef F_EXLCK
#define F_EXLCK 4 /* or 3 */
#define F_SHLCK 8 /* or 4 */
#endif
/* operations for bsd flock(), also used by the kernel implementation */
#define LOCK_SH 1 /* shared lock */
#define LOCK_EX 2 /* exclusive lock */
#define LOCK_NB 4 /* or'd with one of the above to prevent
blocking */
#define LOCK_UN 8 /* remove lock */
#define LOCK_MAND 32 /* This is a mandatory flock ... */
#define LOCK_READ 64 /* which allows concurrent read operations */
#define LOCK_WRITE 128 /* which allows concurrent write operations */
#define LOCK_RW 192 /* which allows concurrent read & write ops */
#define F_LINUX_SPECIFIC_BASE 1024
#ifndef HAVE_ARCH_STRUCT_FLOCK
#ifndef __ARCH_FLOCK_PAD
#define __ARCH_FLOCK_PAD
#endif
struct flock {
short l_type;
short l_whence;
__kernel_off_t l_start;
__kernel_off_t l_len;
__kernel_pid_t l_pid;
__ARCH_FLOCK_PAD
};
#endif
#ifndef HAVE_ARCH_STRUCT_FLOCK64
#ifndef __ARCH_FLOCK64_PAD
#define __ARCH_FLOCK64_PAD
#endif
struct flock64 {
short l_type;
short l_whence;
__kernel_loff_t l_start;
__kernel_loff_t l_len;
__kernel_pid_t l_pid;
__ARCH_FLOCK64_PAD
};
#endif
#endif /* _ASM_GENERIC_FCNTL_H */
......@@ -42,6 +42,27 @@
#define F_SEAL_WRITE 0x0008 /* prevent writes */
/* (1U << 31) is reserved for signed error codes */
/*
* Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
* underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
* the specific file.
*/
#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
/*
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
* used to clear any hints previously set.
*/
#define RWF_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NONE 1
#define RWH_WRITE_LIFE_SHORT 2
#define RWH_WRITE_LIFE_MEDIUM 3
#define RWH_WRITE_LIFE_LONG 4
#define RWH_WRITE_LIFE_EXTREME 5
/*
* Types of directory notifications that may be requested.
*/
......
......@@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */
PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
......@@ -198,9 +200,30 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
PERF_SAMPLE_BRANCH_TYPE_SAVE =
1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
/*
* Common flow change classification
*/
enum {
PERF_BR_UNKNOWN = 0, /* unknown */
PERF_BR_COND = 1, /* conditional */
PERF_BR_UNCOND = 2, /* unconditional */
PERF_BR_IND = 3, /* indirect */
PERF_BR_CALL = 4, /* function call */
PERF_BR_IND_CALL = 5, /* indirect function call */
PERF_BR_RET = 6, /* function return */
PERF_BR_SYSCALL = 7, /* syscall */
PERF_BR_SYSRET = 8, /* syscall return */
PERF_BR_COND_CALL = 9, /* conditional function call */
PERF_BR_COND_RET = 10, /* conditional function return */
PERF_BR_MAX,
};
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
......@@ -1015,6 +1038,7 @@ union perf_mem_data_src {
* in_tx: running in a hardware transaction
* abort: aborting a hardware transaction
* cycles: cycles from last branch (or 0 if not supported)
* type: branch type
*/
struct perf_branch_entry {
__u64 from;
......@@ -1024,7 +1048,8 @@ struct perf_branch_entry {
in_tx:1, /* in transaction */
abort:1, /* transaction abort */
cycles:16, /* cycle count to last branch */
reserved:44;
type:4, /* branch type */
reserved:40;
};
#endif /* _UAPI_LINUX_PERF_EVENT_H */
......@@ -50,6 +50,6 @@ libperf-y += util/
libperf-y += arch/
libperf-y += ui/
libperf-y += scripts/
libperf-y += trace/beauty/
libperf-$(CONFIG_AUDIT) += trace/beauty/
gtk-y += ui/gtk/
......@@ -61,6 +61,11 @@ OPTIONS
--verbose::
Be more verbose.
--target-ns=PID:
Obtain mount namespace information from the target pid. This is
used when creating a uprobe for a process that resides in a
different mount namespace from the perf(1) utility.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1]
......@@ -130,6 +130,11 @@ OPTIONS
--max-probes=NUM::
Set the maximum number of probe points for an event. Default is 128.
--target-ns=PID:
Obtain mount namespace information from the target pid. This is
used when creating a uprobe for a process that resides in a
different mount namespace from the perf(1) utility.
-x::
--exec=PATH::
Specify path to the executable or shared library file for user
......@@ -264,6 +269,15 @@ Add probes at malloc() function on libc
./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc
Add a uprobe to a target process running in a different mount namespace
./perf probe --target-ns <target pid> -x /lib64/libc.so.6 malloc
Add a USDT probe to a target process running in a different mount namespace
./perf probe --target-ns <target pid> -x /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64/jre/lib/amd64/server/libjvm.so %sdt_hotspot:thread__sleep__end
SEE ALSO
--------
linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1]
......@@ -332,6 +332,7 @@ following filters are defined:
- no_tx: only when the target is not in a hardware transaction
- abort_tx: only when the target is a hardware transaction abort
- cond: conditional branches
- save_type: save branch type during sampling in case binary is not available later
+
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
......
......@@ -398,6 +398,11 @@ struct auxtrace_error_event {
char msg[MAX_AUXTRACE_ERROR_MSG];
};
PERF_RECORD_HEADER_FEATURE = 80,
Describes a header feature. These are records used in pipe-mode that
contain information that otherwise would be in perf.data file's header.
Event types
Define the event attributes with their IDs.
......@@ -422,8 +427,9 @@ struct perf_pipe_file_header {
};
The information about attrs, data, and event_types is instead in the
synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA and
PERF_RECORD_HEADER_EVENT_TYPE that are generated by perf record in pipe-mode.
synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA,
PERF_RECORD_HEADER_EVENT_TYPE, and PERF_RECORD_HEADER_FEATURE
that are generated by perf record in pipe-mode.
References:
......
......@@ -330,6 +330,11 @@ ifeq ($(feature-sched_getcpu), 1)
CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT
endif
ifeq ($(feature-setns), 1)
CFLAGS += -DHAVE_SETNS_SUPPORT
$(call detected,CONFIG_SETNS)
endif
ifndef NO_LIBELF
CFLAGS += -DHAVE_LIBELF_SUPPORT
EXTLIBS += -lelf
......
......@@ -126,7 +126,7 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
struct rb_node *tmp;
int i = 0;
map = get_target_map(pev->target, pev->uprobes);
map = get_target_map(pev->target, pev->nsi, pev->uprobes);
if (!map || map__load(map) < 0)
return;
......
......@@ -76,3 +76,49 @@ static struct ins x86__instructions[] = {
{ .name = "xbeginq", .ops = &jump_ops, },
{ .name = "retq", .ops = &ret_ops, },
};
static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
const char *ins2)
{
if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
return false;
if (arch->model == 0x1e) {
/* Nehalem */
if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
strstr(ins1, "test")) {
return true;
}
} else {
/* Newer platform */
if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
strstr(ins1, "test") ||
strstr(ins1, "add") ||
strstr(ins1, "sub") ||
strstr(ins1, "and") ||
strstr(ins1, "inc") ||
strstr(ins1, "dec")) {
return true;
}
}
return false;
}
static int x86__cpuid_parse(struct arch *arch, char *cpuid)
{
unsigned int family, model, stepping;
int ret;
/*
* cpuid = "GenuineIntel,family,model,stepping"
*/
ret = sscanf(cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping);
if (ret == 3) {
arch->family = family;
arch->model = model;
return 0;
}
return -1;
}
......@@ -397,6 +397,7 @@ int cmd_annotate(int argc, const char **argv)
.namespaces = perf_event__process_namespaces,
.attr = perf_event__process_attr,
.build_id = perf_event__process_build_id,
.feature = perf_event__process_feature,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
......
......@@ -14,6 +14,7 @@
#include <unistd.h>
#include "builtin.h"
#include "perf.h"
#include "namespaces.h"
#include "util/cache.h"
#include "util/debug.h"
#include "util/header.h"
......@@ -165,33 +166,41 @@ static int build_id_cache__add_kcore(const char *filename, bool force)
return 0;
}
static int build_id_cache__add_file(const char *filename)
static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi)
{
char sbuild_id[SBUILD_ID_SIZE];
u8 build_id[BUILD_ID_SIZE];
int err;
struct nscookie nsc;
if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
nsinfo__mountns_enter(nsi, &nsc);
err = filename__read_build_id(filename, &build_id, sizeof(build_id));
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
return -1;
}
build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
err = build_id_cache__add_s(sbuild_id, filename,
err = build_id_cache__add_s(sbuild_id, filename, nsi,
false, false);
pr_debug("Adding %s %s: %s\n", sbuild_id, filename,
err ? "FAIL" : "Ok");
return err;
}
static int build_id_cache__remove_file(const char *filename)
static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi)
{
u8 build_id[BUILD_ID_SIZE];
char sbuild_id[SBUILD_ID_SIZE];
struct nscookie nsc;
int err;
if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
nsinfo__mountns_enter(nsi, &nsc);
err = filename__read_build_id(filename, &build_id, sizeof(build_id));
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
return -1;
}
......@@ -204,13 +213,13 @@ static int build_id_cache__remove_file(const char *filename)
return err;
}
static int build_id_cache__purge_path(const char *pathname)
static int build_id_cache__purge_path(const char *pathname, struct nsinfo *nsi)
{
struct strlist *list;
struct str_node *pos;
int err;
err = build_id_cache__list_build_ids(pathname, &list);
err = build_id_cache__list_build_ids(pathname, nsi, &list);
if (err)
goto out;
......@@ -234,7 +243,7 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
char filename[PATH_MAX];
u8 build_id[BUILD_ID_SIZE];
if (dso__build_id_filename(dso, filename, sizeof(filename)) &&
if (dso__build_id_filename(dso, filename, sizeof(filename), false) &&
filename__read_build_id(filename, build_id,
sizeof(build_id)) != sizeof(build_id)) {
if (errno == ENOENT)
......@@ -256,24 +265,30 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f
return 0;
}
static int build_id_cache__update_file(const char *filename)
static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
{
u8 build_id[BUILD_ID_SIZE];
char sbuild_id[SBUILD_ID_SIZE];
struct nscookie nsc;
int err = 0;
int err;
if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
nsinfo__mountns_enter(nsi, &nsc);
err = filename__read_build_id(filename, &build_id, sizeof(build_id));
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
return -1;
}
err = 0;
build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
if (build_id_cache__cached(sbuild_id))
err = build_id_cache__remove_s(sbuild_id);
if (!err)
err = build_id_cache__add_s(sbuild_id, filename, false, false);
err = build_id_cache__add_s(sbuild_id, filename, nsi, false,
false);
pr_debug("Updating %s %s: %s\n", sbuild_id, filename,
err ? "FAIL" : "Ok");
......@@ -286,6 +301,7 @@ int cmd_buildid_cache(int argc, const char **argv)
struct strlist *list;
struct str_node *pos;
int ret = 0;
int ns_id = -1;
bool force = false;
char const *add_name_list_str = NULL,
*remove_name_list_str = NULL,
......@@ -299,6 +315,7 @@ int cmd_buildid_cache(int argc, const char **argv)
.mode = PERF_DATA_MODE_READ,
};
struct perf_session *session = NULL;
struct nsinfo *nsi = NULL;
const struct option buildid_cache_options[] = {
OPT_STRING('a', "add", &add_name_list_str,
......@@ -315,6 +332,7 @@ int cmd_buildid_cache(int argc, const char **argv)
OPT_STRING('u', "update", &update_name_list_str, "file list",
"file(s) to update"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"),
OPT_END()
};
const char * const buildid_cache_usage[] = {
......@@ -330,6 +348,9 @@ int cmd_buildid_cache(int argc, const char **argv)
!missing_filename && !update_name_list_str))
usage_with_options(buildid_cache_usage, buildid_cache_options);
if (ns_id > 0)
nsi = nsinfo__new(ns_id);
if (missing_filename) {
file.path = missing_filename;
file.force = force;
......@@ -348,7 +369,7 @@ int cmd_buildid_cache(int argc, const char **argv)
list = strlist__new(add_name_list_str, NULL);
if (list) {
strlist__for_each_entry(pos, list)
if (build_id_cache__add_file(pos->s)) {
if (build_id_cache__add_file(pos->s, nsi)) {
if (errno == EEXIST) {
pr_debug("%s already in the cache\n",
pos->s);
......@@ -366,7 +387,7 @@ int cmd_buildid_cache(int argc, const char **argv)
list = strlist__new(remove_name_list_str, NULL);
if (list) {
strlist__for_each_entry(pos, list)
if (build_id_cache__remove_file(pos->s)) {
if (build_id_cache__remove_file(pos->s, nsi)) {
if (errno == ENOENT) {
pr_debug("%s wasn't in the cache\n",
pos->s);
......@@ -384,7 +405,7 @@ int cmd_buildid_cache(int argc, const char **argv)
list = strlist__new(purge_name_list_str, NULL);
if (list) {
strlist__for_each_entry(pos, list)
if (build_id_cache__purge_path(pos->s)) {
if (build_id_cache__purge_path(pos->s, nsi)) {
if (errno == ENOENT) {
pr_debug("%s wasn't in the cache\n",
pos->s);
......@@ -405,7 +426,7 @@ int cmd_buildid_cache(int argc, const char **argv)
list = strlist__new(update_name_list_str, NULL);
if (list) {
strlist__for_each_entry(pos, list)
if (build_id_cache__update_file(pos->s)) {
if (build_id_cache__update_file(pos->s, nsi)) {
if (errno == ENOENT) {
pr_debug("%s wasn't in the cache\n",
pos->s);
......@@ -424,6 +445,7 @@ int cmd_buildid_cache(int argc, const char **argv)
out:
perf_session__delete(session);
nsinfo__zput(nsi);
return ret;
}
......@@ -770,6 +770,7 @@ int cmd_inject(int argc, const char **argv)
.finished_round = perf_event__repipe_oe_synth,
.build_id = perf_event__repipe_op2_synth,
.id_index = perf_event__repipe_op2_synth,
.feature = perf_event__repipe_op2_synth,
},
.input_name = "-",
.samples = LIST_HEAD_INIT(inject.samples),
......
......@@ -58,6 +58,7 @@ static struct {
struct line_range line_range;
char *target;
struct strfilter *filter;
struct nsinfo *nsi;
} params;
/* Parse an event definition. Note that any error must die. */
......@@ -80,6 +81,9 @@ static int parse_probe_event(const char *str)
params.target_used = true;
}
if (params.nsi)
pev->nsi = nsinfo__get(params.nsi);
/* Parse a perf-probe command into event */
ret = parse_perf_probe_command(str, pev);
pr_debug("%d arguments\n", pev->nargs);
......@@ -189,7 +193,7 @@ static int opt_set_target(const struct option *opt, const char *str,
/* Expand given path to absolute path, except for modulename */
if (params.uprobes || strchr(str, '/')) {
tmp = realpath(str, NULL);
tmp = nsinfo__realpath(str, params.nsi);
if (!tmp) {
pr_warning("Failed to get the absolute path of %s: %m\n", str);
return ret;
......@@ -208,6 +212,34 @@ static int opt_set_target(const struct option *opt, const char *str,
return ret;
}
static int opt_set_target_ns(const struct option *opt __maybe_unused,
const char *str, int unset __maybe_unused)
{
int ret = -ENOENT;
pid_t ns_pid;
struct nsinfo *nsip;
if (str) {
errno = 0;
ns_pid = (pid_t)strtol(str, NULL, 10);
if (errno != 0) {
ret = -errno;
pr_warning("Failed to parse %s as a pid: %s\n", str,
strerror(errno));
return ret;
}
nsip = nsinfo__new(ns_pid);
if (nsip && nsip->need_setns)
params.nsi = nsinfo__get(nsip);
nsinfo__put(nsip);
ret = 0;
}
return ret;
}
/* Command option callbacks */
#ifdef HAVE_DWARF_SUPPORT
......@@ -299,6 +331,7 @@ static void cleanup_params(void)
line_range__clear(&params.line_range);
free(params.target);
strfilter__delete(params.filter);
nsinfo__put(params.nsi);
memset(&params, 0, sizeof(params));
}
......@@ -383,7 +416,7 @@ static int del_perf_probe_caches(struct strfilter *filter)
}
strlist__for_each_entry(nd, bidlist) {
cache = probe_cache__new(nd->s);
cache = probe_cache__new(nd->s, NULL);
if (!cache)
continue;
if (probe_cache__filter_purge(cache, filter) < 0 ||
......@@ -554,6 +587,8 @@ __cmd_probe(int argc, const char **argv)
OPT_BOOLEAN(0, "cache", &probe_conf.cache, "Manipulate probe cache"),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_CALLBACK(0, "target-ns", NULL, "pid",
"target pid for namespace contexts", opt_set_target_ns),
OPT_END()
};
int ret;
......@@ -634,15 +669,15 @@ __cmd_probe(int argc, const char **argv)
pr_err_with_code(" Error: Failed to show event list.", ret);
return ret;
case 'F':
ret = show_available_funcs(params.target, params.filter,
params.uprobes);
ret = show_available_funcs(params.target, params.nsi,
params.filter, params.uprobes);
if (ret < 0)
pr_err_with_code(" Error: Failed to show functions.", ret);
return ret;
#ifdef HAVE_DWARF_SUPPORT
case 'L':
ret = show_line_range(&params.line_range, params.target,
params.uprobes);
params.nsi, params.uprobes);
if (ret < 0)
pr_err_with_code(" Error: Failed to show lines.", ret);
return ret;
......
......@@ -799,6 +799,13 @@ static int record__synthesize(struct record *rec, bool tail)
return 0;
if (file->is_pipe) {
err = perf_event__synthesize_features(
tool, session, rec->evlist, process_synthesized_event);
if (err < 0) {
pr_err("Couldn't synthesize features.\n");
return err;
}
err = perf_event__synthesize_attrs(tool, session,
process_synthesized_event);
if (err < 0) {
......@@ -1821,7 +1828,7 @@ int cmd_record(int argc, const char **argv)
record.opts.tail_synthesize = true;
if (rec->evlist->nr_entries == 0 &&
perf_evlist__add_default(rec->evlist) < 0) {
__perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out;
}
......
......@@ -38,6 +38,7 @@
#include "util/time-utils.h"
#include "util/auxtrace.h"
#include "util/units.h"
#include "util/branch.h"
#include <dlfcn.h>
#include <errno.h>
......@@ -73,6 +74,7 @@ struct report {
u64 queue_size;
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct branch_type_stat brtype_stat;
};
static int report__config(const char *var, const char *value, void *cb)
......@@ -150,6 +152,22 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
return err;
}
static int hist_iter__branch_callback(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused,
bool single __maybe_unused,
void *arg)
{
struct hist_entry *he = iter->he;
struct report *rep = arg;
struct branch_info *bi;
bi = he->branch_info;
branch_type_count(&rep->brtype_stat, &bi->flags,
bi->from.addr, bi->to.addr);
return 0;
}
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -188,6 +206,8 @@ static int process_sample_event(struct perf_tool *tool,
*/
if (!sample->branch_stack)
goto out_put;
iter.add_entry_cb = hist_iter__branch_callback;
iter.ops = &hist_iter_branch;
} else if (rep->mem_mode) {
iter.ops = &hist_iter_mem;
......@@ -410,6 +430,9 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
perf_read_values_destroy(&rep->show_threads_values);
}
if (sort__mode == SORT_MODE__BRANCH)
branch_type_stat_display(stdout, &rep->brtype_stat);
return 0;
}
......@@ -718,6 +741,7 @@ int cmd_report(int argc, const char **argv)
.id_index = perf_event__process_id_index,
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.feature = perf_event__process_feature,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
......@@ -943,6 +967,8 @@ int cmd_report(int argc, const char **argv)
if (has_br_stack && branch_call_mode)
symbol_conf.show_branchflag_count = true;
memset(&report.brtype_stat, 0, sizeof(struct branch_type_stat));
/*
* Branch mode is a tristate:
* -1 means default, so decide based on the file having branch data.
......@@ -988,6 +1014,10 @@ int cmd_report(int argc, const char **argv)
/* Force tty output for header output and per-thread stat. */
if (report.header || report.header_only || report.show_threads)
use_browser = 0;
if (report.header || report.header_only)
report.tool.show_feat_hdr = SHOW_FEAT_HEADER;
if (report.show_full_info)
report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO;
if (strcmp(input_name, "-") != 0)
setup_browser(true);
......
......@@ -2682,6 +2682,7 @@ int cmd_script(int argc, const char **argv)
.attr = process_attr,
.event_update = perf_event__process_event_update,
.tracing_data = perf_event__process_tracing_data,
.feature = perf_event__process_feature,
.build_id = perf_event__process_build_id,
.id_index = perf_event__process_id_index,
.auxtrace_info = perf_event__process_auxtrace_info,
......@@ -2972,10 +2973,13 @@ int cmd_script(int argc, const char **argv)
return -1;
if (header || header_only) {
script.tool.show_feat_hdr = SHOW_FEAT_HEADER;
perf_session__fprintf_info(session, stdout, show_full_info);
if (header_only)
goto out_delete;
}
if (show_full_info)
script.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO;
if (symbol__init(&session->header.env) < 0)
goto out_delete;
......
......@@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
return err;
}
err = symbol__disassemble(sym, map, NULL, 0, NULL);
err = symbol__disassemble(sym, map, NULL, 0, NULL, NULL);
if (err == 0) {
out_assign:
top->sym_filter_entry = he;
......
This diff is collapsed.
......@@ -16,6 +16,7 @@ arch/x86/include/uapi/asm/perf_regs.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/include/uapi/asm/kvm_perf.h
arch/x86/include/uapi/asm/svm.h
arch/x86/include/uapi/asm/unistd.h
arch/x86/include/uapi/asm/vmx.h
arch/powerpc/include/uapi/asm/kvm.h
arch/s390/include/uapi/asm/kvm.h
......
......@@ -7,6 +7,7 @@
#include <linux/perf_event.h>
extern bool test_attr__enabled;
void test_attr__ready(void);
void test_attr__init(void);
void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
int fd, int group_fd, unsigned long flags);
......
......@@ -19,3 +19,7 @@
004d0000,1,power8.json,core
004d0100,1,power8.json,core
004d0200,1,power8.json,core
004c0100,1,power8.json,core
004e0100,1,power9.json,core
004e0200,1,power9.json,core
004e1200,1,power9.json,core
[
{,
"EventCode": "0x1002A",
"EventName": "PM_CMPLU_STALL_LARX",
"BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied",
"PublicDescription": ""
},
{,
"EventCode": "0x1003C",
"EventName": "PM_CMPLU_STALL_DMISS_L2L3",
"BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3",
"PublicDescription": ""
},
{,
"EventCode": "0x14048",
"EventName": "PM_INST_FROM_ON_CHIP_CACHE",
"BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)",
"PublicDescription": ""
},
{,
"EventCode": "0x3E054",
"EventName": "PM_LD_MISS_L1",
"BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load.",
"PublicDescription": ""
},
{,
"EventCode": "0x400F0",
"EventName": "PM_LD_MISS_L1",
"BriefDescription": "Load Missed L1, at execution time (not gated by finish, which means this counter can be greater than loads finished)",
"PublicDescription": ""
},
{,
"EventCode": "0x1404A",
"EventName": "PM_INST_FROM_RL2L3_SHR",
"BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)",
"PublicDescription": ""
},
{,
"EventCode": "0x1C058",
"EventName": "PM_DTLB_MISS_16G",
"BriefDescription": "Data TLB Miss page size 16G",
"PublicDescription": ""
},
{,
"EventCode": "0x1D15C",
"EventName": "PM_MRK_DTLB_MISS_1G",
"BriefDescription": "Marked Data TLB reload (after a miss) page size 2M. Implies radix translation was used",
"PublicDescription": ""
},
{,
"EventCode": "0x1E056",
"EventName": "PM_CMPLU_STALL_FLUSH_ANY_THREAD",
"BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion",
"PublicDescription": ""
},
{,
"EventCode": "0x101E6",
"EventName": "PM_THRESH_EXC_4096",
"BriefDescription": "Threshold counter exceed a count of 4096",
"PublicDescription": ""
},
{,
"EventCode": "0x2C01A",
"EventName": "PM_CMPLU_STALL_LHS",
"BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data",
"PublicDescription": ""
},
{,
"EventCode": "0x2D016",
"EventName": "PM_CMPLU_STALL_FXU",
"BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes",
"PublicDescription": ""
},
{,
"EventCode": "0x24046",
"EventName": "PM_INST_FROM_RL2L3_MOD",
"BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)",
"PublicDescription": ""
},
{,
"EventCode": "0x2404A",
"EventName": "PM_INST_FROM_RL4",
"BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)",
"PublicDescription": ""
},
{,
"EventCode": "0x2F140",
"EventName": "PM_MRK_DPTEG_FROM_L2_MEPF",
"BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included",
"PublicDescription": ""
},
{,
"EventCode": "0x2D15E",
"EventName": "PM_MRK_DTLB_MISS_16G",
"BriefDescription": "Marked Data TLB Miss page size 16G",
"PublicDescription": ""
},
{,
"EventCode": "0x3F14A",
"EventName": "PM_MRK_DPTEG_FROM_RMEM",
"BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included",
"PublicDescription": ""
},
{,
"EventCode": "0x3D156",
"EventName": "PM_MRK_DTLB_MISS_64K",
"BriefDescription": "Marked Data TLB Miss page size 64K",
"PublicDescription": ""
},
{,
"EventCode": "0x3006C",
"EventName": "PM_RUN_CYC_SMT2_MODE",
"BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT2 mode",
"PublicDescription": ""
},
{,
"EventCode": "0x300F4",
"EventName": "PM_THRD_CONC_RUN_INST",
"BriefDescription": "PPC Instructions Finished by this thread when all threads in the core had the run-latch set",
"PublicDescription": ""
},
{,
"EventCode": "0x4C014",
"EventName": "PM_CMPLU_STALL_LMQ_FULL",
"BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full",
"PublicDescription": ""
},
{,
"EventCode": "0x4C016",
"EventName": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT",
"BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict",
"PublicDescription": ""
},
{,
"EventCode": "0x4D014",
"EventName": "PM_CMPLU_STALL_LOAD_FINISH",
"BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish",
"PublicDescription": ""
},
{,
"EventCode": "0x4D016",
"EventName": "PM_CMPLU_STALL_FXLONG",
"BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)",
"PublicDescription": ""
},
{,
"EventCode": "0x4D12A",
"EventName": "PM_MRK_DATA_FROM_RL4_CYC",
"BriefDescription": "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load",
"PublicDescription": ""
},
{,
"EventCode": "0x4C15E",
"EventName": "PM_MRK_DTLB_MISS_16M",
"BriefDescription": "Marked Data TLB Miss page size 16M",
"PublicDescription": ""
},
{,
"EventCode": "0x401E4",
"EventName": "PM_MRK_DTLB_MISS",
"BriefDescription": "Marked dtlb miss",
"PublicDescription": ""
},
{,
"EventCode": "0x401EA",
"EventName": "PM_THRESH_EXC_128",
"BriefDescription": "Threshold counter exceeded a value of 128",
"PublicDescription": ""
},
{,
"EventCode": "0x400F6",
"EventName": "PM_BR_MPRED_CMPL",
"BriefDescription": "Number of Branch Mispredicts",
"PublicDescription": ""
}
]
[
{,
"EventCode": "0x10058",
"EventName": "PM_MEM_LOC_THRESH_IFU",
"BriefDescription": "Local Memory above threshold for IFU speculation control",
"PublicDescription": ""
},
{,
"EventCode": "0x4505E",
"EventName": "PM_FLOP_CMPL",
"BriefDescription": "Floating Point Operation Finished",
"PublicDescription": ""
},
{,
"EventCode": "0x1415A",
"EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC",
"BriefDescription": "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load",
"PublicDescription": ""
},
{,
"EventCode": "0x2D028",
"EventName": "PM_RADIX_PWC_L2_PDE_FROM_L2",
"BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L2 data cache",
"PublicDescription": ""
},
{,
"EventCode": "0x2D154",
"EventName": "PM_MRK_DERAT_MISS_64K",
"BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 64K",
"PublicDescription": ""
},
{,
"EventCode": "0x30012",
"EventName": "PM_FLUSH_COMPLETION",
"BriefDescription": "The instruction that was next to complete did not complete because it suffered a flush",
"PublicDescription": ""
},
{,
"EventCode": "0x4016E",
"EventName": "PM_THRESH_NOT_MET",
"BriefDescription": "Threshold counter did not meet threshold",
"PublicDescription": ""
}
]
This diff is collapsed.
This diff is collapsed.
[
{,
"EventCode": "0x10008",
"EventName": "PM_RUN_SPURR",
"BriefDescription": "Run SPURR",
"PublicDescription": ""
},
{,
"EventCode": "0x1000A",
"EventName": "PM_PMC3_REWIND",
"BriefDescription": "PMC3 rewind event. A rewind happens when a speculative event (such as latency or CPI stack) is selected on PMC3 and the stall reason or reload source did not match the one programmed in PMC3. When this occurs, the count in PMC3 will not change.",
"PublicDescription": ""
},
{,
"EventCode": "0x1C040",
"EventName": "PM_DATA_FROM_L2_NO_CONFLICT",
"BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load",
"PublicDescription": ""
},
{,
"EventCode": "0x1C050",
"EventName": "PM_DATA_CHIP_PUMP_CPRED",
"BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load",
"PublicDescription": ""
},
{,
"EventCode": "0x1D15E",
"EventName": "PM_MRK_RUN_CYC",
"BriefDescription": "Run cycles in which a marked instruction is in the pipeline",
"PublicDescription": ""
},
{,
"EventCode": "0x15158",
"EventName": "PM_SYNC_MRK_L2HIT",
"BriefDescription": "Marked L2 Hits that can throw a synchronous interrupt",
"PublicDescription": ""
},
{,
"EventCode": "0x20010",
"EventName": "PM_PMC1_OVERFLOW",
"BriefDescription": "Overflow from counter 1",
"PublicDescription": ""
},
{,
"EventCode": "0x2C040",
"EventName": "PM_DATA_FROM_L2_MEPF",
"BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load",
"PublicDescription": ""
},
{,
"EventCode": "0x2005A",
"EventName": "PM_DARQ1_7_9_ENTRIES",
"BriefDescription": "Cycles in which 7 to 9 DARQ1 entries (out of 12) are in use",
"PublicDescription": ""
},
{,
"EventCode": "0x2C05C",
"EventName": "PM_INST_GRP_PUMP_CPRED",
"BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch (demand only)",
"PublicDescription": ""
},
{,
"EventCode": "0x2D156",
"EventName": "PM_MRK_DTLB_MISS_4K",
"BriefDescription": "Marked Data TLB Miss page size 4k",
"PublicDescription": ""
},
{,
"EventCode": "0x2E05A",
"EventName": "PM_LRQ_REJECT",
"BriefDescription": "Internal LSU reject from LRQ. Rejects cause the load to go back to LRQ, but it stays contained within the LSU once it gets issued. This event counts the number of times the LRQ attempts to relaunch an instruction after a reject. Any load can suffer multiple rejects",
"PublicDescription": ""
},
{,
"EventCode": "0x2E05C",
"EventName": "PM_LSU_REJECT_ERAT_MISS",
"BriefDescription": "LSU Reject due to ERAT (up to 4 per cycles)",
"PublicDescription": ""
},
{,
"EventCode": "0x200F6",
"EventName": "PM_LSU_DERAT_MISS",
"BriefDescription": "DERAT Reloaded due to a DERAT miss",
"PublicDescription": ""
},
{,
"EventCode": "0x3C048",
"EventName": "PM_DATA_FROM_DL2L3_SHR",
"BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load",
"PublicDescription": ""
},
{,
"EventCode": "0x3404A",
"EventName": "PM_INST_FROM_RMEM",
"BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)",
"PublicDescription": ""
},
{,
"EventCode": "0x3C058",
"EventName": "PM_LARX_FIN",
"BriefDescription": "Larx finished",
"PublicDescription": ""
},
{,
"EventCode": "0x3E050",
"EventName": "PM_DARQ1_4_6_ENTRIES",
"BriefDescription": "Cycles in which 4, 5, or 6 DARQ1 entries (out of 12) are in use",
"PublicDescription": ""
},
{,
"EventCode": "0x3006E",
"EventName": "PM_NEST_REF_CLK",
"BriefDescription": "Multiply by 4 to obtain the number of PB cycles",
"PublicDescription": ""
},
{,
"EventCode": "0x301E2",
"EventName": "PM_MRK_ST_CMPL",
"BriefDescription": "Marked store completed and sent to nest",
"PublicDescription": ""
},
{,
"EventCode": "0x4D02C",
"EventName": "PM_PMC1_REWIND",
"BriefDescription": "",
"PublicDescription": ""
},
{,
"EventCode": "0x4003E",
"EventName": "PM_LD_CMPL",
"BriefDescription": "count of Loads completed",
"PublicDescription": ""
},
{,
"EventCode": "0x4C040",
"EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER",
"BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load",
"PublicDescription": ""
},
{,
"EventCode": "0x4C042",
"EventName": "PM_DATA_FROM_L3",
"BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a demand load",
"PublicDescription": ""
},
{,
"EventCode": "0x4C048",
"EventName": "PM_DATA_FROM_DL2L3_MOD",
"BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load",
"PublicDescription": ""
},
{,
"EventCode": "0x4D056",
"EventName": "PM_NON_FMA_FLOP_CMPL",
"BriefDescription": "Non FMA instruction completed",
"PublicDescription": ""
}
]
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
[
{,
"EventCode": "0x0",
"EventName": "PM_SUSPENDED",
"BriefDescription": "Counter OFF",
"PublicDescription": ""
},
{,
"EventCode": "0x10026",
"EventName": "PM_TABLEWALK_CYC",
"BriefDescription": "Cycles when an instruction tablewalk is active",
"PublicDescription": ""
},
{,
"EventCode": "0x1E04C",
"EventName": "PM_DPTEG_FROM_LL4",
"BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included",
"PublicDescription": ""
},
{,
"EventCode": "0x1F14E",
"EventName": "PM_MRK_DPTEG_FROM_L2MISS",
"BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included",
"PublicDescription": ""
},
{,
"EventCode": "0x10060",
"EventName": "PM_TM_TRANS_RUN_CYC",
"BriefDescription": "run cycles in transactional state",
"PublicDescription": ""
},
{,
"EventCode": "0x2C012",
"EventName": "PM_CMPLU_STALL_DCACHE_MISS",
"BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest",
"PublicDescription": ""
},
{,
"EventCode": "0x2E04C",
"EventName": "PM_DPTEG_FROM_MEMORY",
"BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included",
"PublicDescription": ""
},
{,
"EventCode": "0x2C056",
"EventName": "PM_DTLB_MISS_4K",
"BriefDescription": "Data TLB Miss page size 4k",
"PublicDescription": ""
},
{,
"EventCode": "0x3000C",
"EventName": "PM_FREQ_DOWN",
"BriefDescription": "Power Management: Below Threshold B",
"PublicDescription": ""
},
{,
"EventCode": "0x3D142",
"EventName": "PM_MRK_DATA_FROM_LMEM",
"BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a marked load",
"PublicDescription": ""
},
{,
"EventCode": "0x3F142",
"EventName": "PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT",
"BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included",
"PublicDescription": ""
},
{,
"EventCode": "0x301E8",
"EventName": "PM_THRESH_EXC_64",
"BriefDescription": "Threshold counter exceeded a value of 64",
"PublicDescription": ""
},
{,
"EventCode": "0x40118",
"EventName": "PM_MRK_DCACHE_RELOAD_INTV",
"BriefDescription": "Combined Intervention event",
"PublicDescription": ""
},
{,
"EventCode": "0x4C01E",
"EventName": "PM_CMPLU_STALL_CRYPTO",
"BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish",
"PublicDescription": ""
},
{,
"EventCode": "0x4D018",
"EventName": "PM_CMPLU_STALL_BRU",
"BriefDescription": "Completion stall due to a Branch Unit",
"PublicDescription": ""
},
{,
"EventCode": "0x4D128",
"EventName": "PM_MRK_DATA_FROM_LMEM_CYC",
"BriefDescription": "Duration in cycles to reload from the local chip's Memory due to a marked load",
"PublicDescription": ""
},
{,
"EventCode": "0x4E04E",
"EventName": "PM_DPTEG_FROM_L3MISS",
"BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included",
"PublicDescription": ""
},
{,
"EventCode": "0x4F142",
"EventName": "PM_MRK_DPTEG_FROM_L3",
"BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included",
"PublicDescription": ""
},
{,
"EventCode": "0x4F148",
"EventName": "PM_MRK_DPTEG_FROM_DL2L3_MOD",
"BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included",
"PublicDescription": ""
},
{,
"EventCode": "0x40050",
"EventName": "PM_SYS_PUMP_MPRED_RTY",
"BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
"PublicDescription": ""
},
{,
"EventCode": "0x40056",
"EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
"BriefDescription": "Local memory above threshold for LSU medium",
"PublicDescription": ""
},
{,
"EventCode": "0x4D054",
"EventName": "PM_8FLOP_CMPL",
"BriefDescription": "8 FLOP instruction completed",
"PublicDescription": ""
},
{,
"EventCode": "0x45050",
"EventName": "PM_1FLOP_CMPL",
"BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed",
"PublicDescription": ""
},
{,
"EventCode": "0x45052",
"EventName": "PM_4FLOP_CMPL",
"BriefDescription": "4 FLOP instruction completed",
"PublicDescription": ""
}
]
This diff is collapsed.
......@@ -36,6 +36,7 @@
#define ENV "PERF_TEST_ATTR"
static char *dir;
static bool ready;
void test_attr__init(void)
{
......@@ -67,6 +68,9 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
FILE *file;
char path[PATH_MAX];
if (!ready)
return 0;
snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir,
attr->type, attr->config, fd);
......@@ -136,7 +140,7 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
{
int errno_saved = errno;
if (store_event(attr, pid, cpu, fd, group_fd, flags)) {
if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) {
pr_err("test attr FAILED");
exit(128);
}
......@@ -144,6 +148,12 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
errno = errno_saved;
}
void test_attr__ready(void)
{
if (unlikely(test_attr__enabled) && !ready)
ready = true;
}
static int run_dir(const char *d, const char *perf)
{
char v[] = "-vvvvv";
......
......@@ -9,6 +9,20 @@ import logging
import shutil
import ConfigParser
def data_equal(a, b):
# Allow multiple values in assignment separated by '|'
a_list = a.split('|')
b_list = b.split('|')
for a_item in a_list:
for b_item in b_list:
if (a_item == b_item):
return True
elif (a_item == '*') or (b_item == '*'):
return True
return False
class Fail(Exception):
def __init__(self, test, msg):
self.msg = msg
......@@ -82,34 +96,25 @@ class Event(dict):
self.add(base)
self.add(data)
def compare_data(self, a, b):
# Allow multiple values in assignment separated by '|'
a_list = a.split('|')
b_list = b.split('|')
for a_item in a_list:
for b_item in b_list:
if (a_item == b_item):
return True
elif (a_item == '*') or (b_item == '*'):
return True
return False
def equal(self, other):
for t in Event.terms:
log.debug(" [%s] %s %s" % (t, self[t], other[t]));
if not self.has_key(t) or not other.has_key(t):
return False
if not self.compare_data(self[t], other[t]):
if not data_equal(self[t], other[t]):
return False
return True
def optional(self):
if self.has_key('optional') and self['optional'] == '1':
return True
return False
def diff(self, other):
for t in Event.terms:
if not self.has_key(t) or not other.has_key(t):
continue
if not self.compare_data(self[t], other[t]):
if not data_equal(self[t], other[t]):
log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
# Test file description needs to have following sections:
......@@ -218,9 +223,9 @@ class Test(object):
self.perf, self.command, tempdir, self.args)
ret = os.WEXITSTATUS(os.system(cmd))
log.info(" '%s' ret %d " % (cmd, ret))
log.info(" '%s' ret '%s', expected '%s'" % (cmd, str(ret), str(self.ret)))
if ret != int(self.ret):
if not data_equal(str(ret), str(self.ret)):
raise Unsup(self)
def compare(self, expect, result):
......@@ -244,7 +249,10 @@ class Test(object):
log.debug(" match: [%s] matches %s" % (exp_name, str(exp_list)))
# we did not any matching event - fail
if (not exp_list):
if not exp_list:
if exp_event.optional():
log.debug(" %s does not match, but is optional" % exp_name)
else:
exp_event.diff(res_event)
raise Fail(self, 'match failure');
......
......@@ -7,7 +7,7 @@ cpu=*
type=0|1
size=112
config=0
sample_period=4000
sample_period=*
sample_type=263
read_format=0
disabled=1
......@@ -15,7 +15,7 @@ inherit=1
pinned=0
exclusive=0
exclude_user=0
exclude_kernel=0
exclude_kernel=0|1
exclude_hv=0
exclude_idle=0
mmap=1
......@@ -25,7 +25,7 @@ inherit_stat=0
enable_on_exec=1
task=0
watermark=0
precise_ip=0
precise_ip=0|1|2|3
mmap_data=0
sample_id_all=1
exclude_host=0|1
......
......@@ -8,14 +8,14 @@ type=0
size=112
config=0
sample_period=0
sample_type=0
sample_type=65536
read_format=3
disabled=1
inherit=1
pinned=0
exclusive=0
exclude_user=0
exclude_kernel=0
exclude_kernel=0|1
exclude_hv=0
exclude_idle=0
mmap=0
......
[config]
command = record
args = -C 0 kill >/dev/null 2>&1
ret = 1
[event:base-record]
cpu=0
......
[config]
command = record
args = kill >/dev/null 2>&1
ret = 1
[event:base-record]
[config]
command = record
args = -b kill >/dev/null 2>&1
ret = 1
[event:base-record]
sample_period=4000
sample_type=2311
branch_sample_type=8
[config]
command = record
args = -j any kill >/dev/null 2>&1
ret = 1
[event:base-record]
sample_period=4000
sample_type=2311
branch_sample_type=8
[config]
command = record
args = -j any_call kill >/dev/null 2>&1
ret = 1
[event:base-record]
sample_period=4000
sample_type=2311
branch_sample_type=16
[config]
command = record
args = -j any_ret kill >/dev/null 2>&1
ret = 1
[event:base-record]
sample_period=4000
sample_type=2311
branch_sample_type=32
[config]
command = record
args = -j hv kill >/dev/null 2>&1
ret = 1
[event:base-record]
sample_period=4000
sample_type=2311
branch_sample_type=8
[config]
command = record
args = -j ind_call kill >/dev/null 2>&1
ret = 1
[event:base-record]
sample_period=4000
sample_type=2311
branch_sample_type=64
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment