Commit 4a98be82 authored by Thomas Gleixner's avatar Thomas Gleixner

Merge tag 'perf-core-for-mingo-5.1-20190311' of...

Merge tag 'perf-core-for-mingo-5.1-20190311' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo:

kernel:

  Stephane Eranian :

  - Restore mmap record type correctly when handling PERF_RECORD_MMAP2
    events, as the same template is used for all the threads interested
    in mmap events, some may want just PERF_RECORD_MMAP, while some
    may want the extra info in MMAP2 records.

perf probe:

  Adrian Hunter:

  - Fix getting the kernel map, because since changes related to x86 PTI
    entry trampolines handling, there are more than one kernel map.

perf script:

  Andi Kleen:

  - Support insn output for normal samples, i.e.:

    perf script -F ip,sym,insn --xed

    Will fetch the sample IP from the thread address space and feed it
    to Intel's XED disassembler, producing lines such as:

      ffffffffa4068804 native_write_msr            wrmsr
      ffffffffa415b95e __hrtimer_next_event_base   movq  0x18(%rax), %rdx

    That match 'perf annotate's output.

  - Make the --cpu filter apply to  PERF_RECORD_COMM/FORK/... events, in
    addition to PERF_RECORD_SAMPLE.

perf report:

  - Add a new --samples option to save a small random number of samples
    per hist entry, using a reservoir technique to select a representative
    number of samples.

    Then allow browsing the samples using 'perf script' as part of the hist
    entry context menu. This automatically adds the right filters, so only
    the thread or CPU of the sample is displayed. Then we use less' search
    functionality to directly jump to the time stamp of the selected sample.

    It uses different menus for assembler and source display.  Assembler
    needs xed installed and source needs debuginfo.

  - Fix the UI browser scripts pop up menu when there are many scripts
    available.

perf report:

  Andi Kleen:

  - Add 'time' sort option. E.g.:

    % perf report --sort time,overhead,symbol --time-quantum 1ms --stdio
    ...
         0.67%  277061.87300  [.] _dl_start
         0.50%  277061.87300  [.] f1
         0.50%  277061.87300  [.] f2
         0.33%  277061.87300  [.] main
         0.29%  277061.87300  [.] _dl_lookup_symbol_x
         0.29%  277061.87300  [.] dl_main
         0.29%  277061.87300  [.] do_lookup_x
         0.17%  277061.87300  [.] _dl_debug_initialize
         0.17%  277061.87300  [.] _dl_init_paths
         0.08%  277061.87300  [.] check_match
         0.04%  277061.87300  [.] _dl_count_modids
         1.33%  277061.87400  [.] f1
         1.33%  277061.87400  [.] f2
         1.33%  277061.87400  [.] main
         1.17%  277061.87500  [.] main
         1.08%  277061.87500  [.] f1
         1.08%  277061.87500  [.] f2
         1.00%  277061.87600  [.] main
         0.83%  277061.87600  [.] f1
         0.83%  277061.87600  [.] f2
         1.00%  277061.87700  [.] main

tools headers:

  Arnaldo Carvalho de Melo:

  - Update x86's syscall_64.tbl, no change in tools/perf behaviour.

  -  Sync copies asm-generic/unistd.h and linux/in with the kernel sources.

perf data:

  Jiri Olsa:

  - Prep work to support having perf.data stored as a directory, with one
    file per CPU, that ultimately will allow having one ring buffer reading
    thread per CPU.

Vendor events:

  Martin Liška:

  - perf PMU events for AMD Family 17h.

perf script python:

  Tony Jones:

  - Add python3 support for the remaining Intel PT related scripts, with
    these we should have a clean build of perf with python3 while still
    supporting the build with python2.

libbpf:

  Arnaldo Carvalho de Melo:

  - Fix the build on uCLibc, adding the missing stdarg.h since we use
    va_list in one typedef.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parents c634dc6b dfcbc2f2
......@@ -7189,6 +7189,7 @@ static void perf_event_mmap_output(struct perf_event *event,
struct perf_output_handle handle;
struct perf_sample_data sample;
int size = mmap_event->event_id.header.size;
u32 type = mmap_event->event_id.header.type;
int ret;
if (!perf_event_mmap_match(event, data))
......@@ -7232,6 +7233,7 @@ static void perf_event_mmap_output(struct perf_event *event,
perf_output_end(&handle);
out:
mmap_event->event_id.header.size = size;
mmap_event->event_id.header.type = type;
}
static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
......
......@@ -17,5 +17,7 @@
#define __ARCH_WANT_RENAMEAT
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
#include <asm-generic/unistd.h>
......@@ -38,8 +38,10 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy)
__SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
#define __NR_io_cancel 3
__SYSCALL(__NR_io_cancel, sys_io_cancel)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_io_getevents 4
__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents)
__SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents)
#endif
/* fs/xattr.c */
#define __NR_setxattr 5
......@@ -179,7 +181,7 @@ __SYSCALL(__NR_fchownat, sys_fchownat)
#define __NR_fchown 55
__SYSCALL(__NR_fchown, sys_fchown)
#define __NR_openat 56
__SC_COMP(__NR_openat, sys_openat, compat_sys_openat)
__SYSCALL(__NR_openat, sys_openat)
#define __NR_close 57
__SYSCALL(__NR_close, sys_close)
#define __NR_vhangup 58
......@@ -222,10 +224,12 @@ __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
__SYSCALL(__NR3264_sendfile, sys_sendfile64)
/* fs/select.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_pselect6 72
__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6)
__SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32)
#define __NR_ppoll 73
__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll)
__SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32)
#endif
/* fs/signalfd.c */
#define __NR_signalfd4 74
......@@ -269,16 +273,20 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
/* fs/timerfd.c */
#define __NR_timerfd_create 85
__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_timerfd_settime 86
__SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \
compat_sys_timerfd_settime)
__SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \
sys_timerfd_settime)
#define __NR_timerfd_gettime 87
__SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \
compat_sys_timerfd_gettime)
__SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \
sys_timerfd_gettime)
#endif
/* fs/utimes.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_utimensat 88
__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat)
__SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat)
#endif
/* kernel/acct.c */
#define __NR_acct 89
......@@ -309,8 +317,10 @@ __SYSCALL(__NR_set_tid_address, sys_set_tid_address)
__SYSCALL(__NR_unshare, sys_unshare)
/* kernel/futex.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_futex 98
__SC_COMP(__NR_futex, sys_futex, compat_sys_futex)
__SC_3264(__NR_futex, sys_futex_time32, sys_futex)
#endif
#define __NR_set_robust_list 99
__SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
compat_sys_set_robust_list)
......@@ -319,8 +329,10 @@ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
compat_sys_get_robust_list)
/* kernel/hrtimer.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_nanosleep 101
__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep)
__SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep)
#endif
/* kernel/itimer.c */
#define __NR_getitimer 102
......@@ -341,23 +353,29 @@ __SYSCALL(__NR_delete_module, sys_delete_module)
/* kernel/posix-timers.c */
#define __NR_timer_create 107
__SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_timer_gettime 108
__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime)
__SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime)
#endif
#define __NR_timer_getoverrun 109
__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_timer_settime 110
__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime)
__SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime)
#endif
#define __NR_timer_delete 111
__SYSCALL(__NR_timer_delete, sys_timer_delete)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_clock_settime 112
__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime)
__SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime)
#define __NR_clock_gettime 113
__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime)
__SC_3264(__NR_clock_gettime, sys_clock_gettime32, sys_clock_gettime)
#define __NR_clock_getres 114
__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres)
__SC_3264(__NR_clock_getres, sys_clock_getres_time32, sys_clock_getres)
#define __NR_clock_nanosleep 115
__SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \
compat_sys_clock_nanosleep)
__SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \
sys_clock_nanosleep)
#endif
/* kernel/printk.c */
#define __NR_syslog 116
......@@ -388,9 +406,11 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield)
__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
#define __NR_sched_get_priority_min 126
__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_sched_rr_get_interval 127
__SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \
compat_sys_sched_rr_get_interval)
__SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \
sys_sched_rr_get_interval)
#endif
/* kernel/signal.c */
#define __NR_restart_syscall 128
......@@ -411,9 +431,11 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
__SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
#define __NR_rt_sigpending 136
__SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_rt_sigtimedwait 137
__SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \
compat_sys_rt_sigtimedwait)
__SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \
sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32)
#endif
#define __NR_rt_sigqueueinfo 138
__SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
compat_sys_rt_sigqueueinfo)
......@@ -467,10 +489,15 @@ __SYSCALL(__NR_uname, sys_newuname)
__SYSCALL(__NR_sethostname, sys_sethostname)
#define __NR_setdomainname 162
__SYSCALL(__NR_setdomainname, sys_setdomainname)
#ifdef __ARCH_WANT_SET_GET_RLIMIT
/* getrlimit and setrlimit are superseded with prlimit64 */
#define __NR_getrlimit 163
__SC_COMP(__NR_getrlimit, sys_getrlimit, compat_sys_getrlimit)
#define __NR_setrlimit 164
__SC_COMP(__NR_setrlimit, sys_setrlimit, compat_sys_setrlimit)
#endif
#define __NR_getrusage 165
__SC_COMP(__NR_getrusage, sys_getrusage, compat_sys_getrusage)
#define __NR_umask 166
......@@ -481,12 +508,14 @@ __SYSCALL(__NR_prctl, sys_prctl)
__SYSCALL(__NR_getcpu, sys_getcpu)
/* kernel/time.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_gettimeofday 169
__SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
#define __NR_settimeofday 170
__SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
#define __NR_adjtimex 171
__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex)
__SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
#endif
/* kernel/timer.c */
#define __NR_getpid 172
......@@ -511,11 +540,13 @@ __SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
__SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
#define __NR_mq_unlink 181
__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_mq_timedsend 182
__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend)
__SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
#define __NR_mq_timedreceive 183
__SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \
compat_sys_mq_timedreceive)
__SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \
sys_mq_timedreceive)
#endif
#define __NR_mq_notify 184
__SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
#define __NR_mq_getsetattr 185
......@@ -536,8 +567,10 @@ __SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
__SYSCALL(__NR_semget, sys_semget)
#define __NR_semctl 191
__SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_semtimedop 192
__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop)
__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32)
#endif
#define __NR_semop 193
__SYSCALL(__NR_semop, sys_semop)
......@@ -658,8 +691,10 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#define __NR_accept4 242
__SYSCALL(__NR_accept4, sys_accept4)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_recvmmsg 243
__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg)
__SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32)
#endif
/*
* Architectures may provide up to 16 syscalls of their own
......@@ -667,8 +702,10 @@ __SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg)
*/
#define __NR_arch_specific_syscall 244
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_wait4 260
__SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
#endif
#define __NR_prlimit64 261
__SYSCALL(__NR_prlimit64, sys_prlimit64)
#define __NR_fanotify_init 262
......@@ -678,10 +715,11 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#define __NR_name_to_handle_at 264
__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
#define __NR_open_by_handle_at 265
__SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \
compat_sys_open_by_handle_at)
__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_clock_adjtime 266
__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime)
__SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime)
#endif
#define __NR_syncfs 267
__SYSCALL(__NR_syncfs, sys_syncfs)
#define __NR_setns 268
......@@ -734,15 +772,60 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
__SYSCALL(__NR_pkey_free, sys_pkey_free)
#define __NR_statx 291
__SYSCALL(__NR_statx, sys_statx)
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_io_pgetevents 292
__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
__SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents)
#endif
#define __NR_rseq 293
__SYSCALL(__NR_rseq, sys_rseq)
#define __NR_kexec_file_load 294
__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
/* 295 through 402 are unassigned to sync up with generic numbers, don't use */
#if __BITS_PER_LONG == 32
#define __NR_clock_gettime64 403
__SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
#define __NR_clock_settime64 404
__SYSCALL(__NR_clock_settime64, sys_clock_settime)
#define __NR_clock_adjtime64 405
__SYSCALL(__NR_clock_adjtime64, sys_clock_adjtime)
#define __NR_clock_getres_time64 406
__SYSCALL(__NR_clock_getres_time64, sys_clock_getres)
#define __NR_clock_nanosleep_time64 407
__SYSCALL(__NR_clock_nanosleep_time64, sys_clock_nanosleep)
#define __NR_timer_gettime64 408
__SYSCALL(__NR_timer_gettime64, sys_timer_gettime)
#define __NR_timer_settime64 409
__SYSCALL(__NR_timer_settime64, sys_timer_settime)
#define __NR_timerfd_gettime64 410
__SYSCALL(__NR_timerfd_gettime64, sys_timerfd_gettime)
#define __NR_timerfd_settime64 411
__SYSCALL(__NR_timerfd_settime64, sys_timerfd_settime)
#define __NR_utimensat_time64 412
__SYSCALL(__NR_utimensat_time64, sys_utimensat)
#define __NR_pselect6_time64 413
__SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64)
#define __NR_ppoll_time64 414
__SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64)
#define __NR_io_pgetevents_time64 416
__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
#define __NR_recvmmsg_time64 417
__SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64)
#define __NR_mq_timedsend_time64 418
__SYSCALL(__NR_mq_timedsend_time64, sys_mq_timedsend)
#define __NR_mq_timedreceive_time64 419
__SYSCALL(__NR_mq_timedreceive_time64, sys_mq_timedreceive)
#define __NR_semtimedop_time64 420
__SYSCALL(__NR_semtimedop_time64, sys_semtimedop)
#define __NR_rt_sigtimedwait_time64 421
__SC_COMP(__NR_rt_sigtimedwait_time64, sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time64)
#define __NR_futex_time64 422
__SYSCALL(__NR_futex_time64, sys_futex)
#define __NR_sched_rr_get_interval_time64 423
__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
#endif
#undef __NR_syscalls
#define __NR_syscalls 295
#define __NR_syscalls 424
/*
* 32 bit systems traditionally used different
......
......@@ -292,10 +292,11 @@ struct sockaddr_in {
#define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000)
/* Defines for Multicast INADDR */
#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */
#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */
#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */
#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */
#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */
#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */
#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */
#define INADDR_ALLSNOOPERS_GROUP 0xe000006aU /* 224.0.0.106 */
#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */
#endif
/* <asm/byteorder.h> contains the htonl type stuff.. */
......
......@@ -10,6 +10,7 @@
#ifndef __LIBBPF_LIBBPF_H
#define __LIBBPF_LIBBPF_H
#include <stdarg.h>
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
......
......@@ -584,6 +584,20 @@ llvm.*::
llvm.opts::
Options passed to llc.
samples.*::
samples.context::
Define how many ns worth of time to show
around samples in perf report sample context browser.
scripts.*::
Any option defines a script that is added to the scripts menu
in the interactive perf browser and whose output is displayed.
The name of the option is the name, the value is a script command line.
The script gets the same options passed as a full perf script,
in particular -i perfdata file, --cpu, --tid
SEE ALSO
--------
linkperf:perf[1]
......@@ -105,6 +105,8 @@ OPTIONS
guest machine
- sample: Number of sample
- period: Raw number of event count of sample
- time: Separate the samples by time stamp with the resolution specified by
--time-quantum (default 100ms). Specify with overhead and before it.
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
......@@ -459,6 +461,10 @@ include::itrace.txt[]
--socket-filter::
Only report the samples on the processor socket that match with this filter
--samples=N::
Save N individual samples for each histogram entry to show context in perf
report tui browser.
--raw-trace::
When displaying traceevent output, do not use print fmt or plugins.
......@@ -477,6 +483,9 @@ include::itrace.txt[]
Please note that not all mmaps are stored, options affecting which ones
are include 'perf record --data', for instance.
--ns::
Show time stamps in nanoseconds.
--stats::
Display overall events statistics without any further processing.
(like the one at the end of the perf report -D command)
......@@ -494,6 +503,10 @@ include::itrace.txt[]
The period/hits keywords set the base the percentage is computed
on - the samples period or the number of samples (hits).
--time-quantum::
Configure time quantum for time sort key. Default 100ms.
Accepts s, us, ms, ns units.
include::callchain-overhead-calculation.txt[]
SEE ALSO
......
......@@ -15,6 +15,7 @@ To see callchains in a more compact form: perf report -g folded
Show individual samples with: perf script
Limit to show entries above 5% only: perf report --percent-limit 5
Profiling branch (mis)predictions with: perf record -b / perf report
To show assembler sample contexts use perf record -b / perf script -F +brstackinsn --xed
Treat branches as callchains: perf report --branch-history
To count events in every 1000 msec: perf stat -I 1000
Print event counts in CSV format with: perf stat -x,
......@@ -34,3 +35,9 @@ Show current config key-value pairs: perf config --list
Show user configuration overrides: perf config --user --list
To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node`
To report cacheline events from previous recording: perf c2c report
To browse sample contexts use perf report --sample 10 and select in context menu
To separate samples by time use perf report --sort time,overhead,sym
To set sample time separation other than 100ms with --sort time use --time-quantum
Add -I to perf report to sample register values visible in perf report context.
To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context
To show context switches in perf report sample context add --switch-events to perf record.
......@@ -343,6 +343,8 @@
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
334 common rseq __x64_sys_rseq
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
#
# x32-specific system call numbers start at 512 to avoid cache impact
......@@ -361,7 +363,7 @@
520 x32 execve __x32_compat_sys_execve/ptregs
521 x32 ptrace __x32_compat_sys_ptrace
522 x32 rt_sigpending __x32_compat_sys_rt_sigpending
523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait
523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64
524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
525 x32 sigaltstack __x32_compat_sys_sigaltstack
526 x32 timer_create __x32_compat_sys_timer_create
......@@ -375,7 +377,7 @@
534 x32 preadv __x32_compat_sys_preadv64
535 x32 pwritev __x32_compat_sys_pwritev64
536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo
537 x32 recvmmsg __x32_compat_sys_recvmmsg
537 x32 recvmmsg __x32_compat_sys_recvmmsg_time64
538 x32 sendmmsg __x32_compat_sys_sendmmsg
539 x32 process_vm_readv __x32_compat_sys_process_vm_readv
540 x32 process_vm_writev __x32_compat_sys_process_vm_writev
......
......@@ -14,5 +14,6 @@ perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-$(CONFIG_AUXTRACE) += auxtrace.o
perf-$(CONFIG_AUXTRACE) += archinsn.o
perf-$(CONFIG_AUXTRACE) += intel-pt.o
perf-$(CONFIG_AUXTRACE) += intel-bts.o
// SPDX-License-Identifier: GPL-2.0
#include "perf.h"
#include "archinsn.h"
#include "util/intel-pt-decoder/insn.h"
#include "machine.h"
#include "thread.h"
#include "symbol.h"
void arch_fetch_insn(struct perf_sample *sample,
struct thread *thread,
struct machine *machine)
{
struct insn insn;
int len;
bool is64bit = false;
if (!sample->ip)
return;
len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit);
if (len <= 0)
return;
insn_init(&insn, sample->insn, len, is64bit);
insn_get_length(&insn);
if (insn_complete(&insn) && insn.length <= len)
sample->insn_len = insn.length;
}
......@@ -392,7 +392,7 @@ static int record__process_auxtrace(struct perf_tool *tool,
size_t padding;
u8 pad[8] = {0};
if (!perf_data__is_pipe(data)) {
if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
off_t file_offset;
int fd = perf_data__fd(data);
int err;
......@@ -837,6 +837,8 @@ static void record__init_features(struct record *rec)
if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
perf_header__clear_feat(&session->header, HEADER_STAT);
}
......
......@@ -47,9 +47,11 @@
#include <errno.h>
#include <inttypes.h>
#include <regex.h>
#include "sane_ctype.h"
#include <signal.h>
#include <linux/bitmap.h>
#include <linux/stringify.h>
#include <linux/time64.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
......@@ -926,6 +928,43 @@ report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
return parse_callchain_report_opt(arg);
}
static int
parse_time_quantum(const struct option *opt, const char *arg,
int unset __maybe_unused)
{
unsigned long *time_q = opt->value;
char *end;
*time_q = strtoul(arg, &end, 0);
if (end == arg)
goto parse_err;
if (*time_q == 0) {
pr_err("time quantum cannot be 0");
return -1;
}
while (isspace(*end))
end++;
if (*end == 0)
return 0;
if (!strcmp(end, "s")) {
*time_q *= NSEC_PER_SEC;
return 0;
}
if (!strcmp(end, "ms")) {
*time_q *= NSEC_PER_MSEC;
return 0;
}
if (!strcmp(end, "us")) {
*time_q *= NSEC_PER_USEC;
return 0;
}
if (!strcmp(end, "ns"))
return 0;
parse_err:
pr_err("Cannot parse time quantum `%s'\n", arg);
return -1;
}
int
report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
const char *arg, int unset __maybe_unused)
......@@ -1120,6 +1159,8 @@ int cmd_report(int argc, const char **argv)
OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
"Enable kernel symbol demangling"),
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
OPT_INTEGER(0, "samples", &symbol_conf.res_sample,
"Number of samples to save per histogram entry for individual browsing"),
OPT_CALLBACK(0, "percent-limit", &report, "percent",
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
......@@ -1147,6 +1188,10 @@ int cmd_report(int argc, const char **argv)
OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period",
"Set percent type local/global-period/hits",
annotate_parse_percent_type),
OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Show times in nanosecs"),
OPT_CALLBACK(0, "time-quantum", &symbol_conf.time_quantum, "time (ms|us|ns|s)",
"Set time quantum for time sort key (default 100ms)",
parse_time_quantum),
OPT_END()
};
struct perf_data data = {
......
......@@ -29,10 +29,12 @@
#include "util/time-utils.h"
#include "util/path.h"
#include "print_binary.h"
#include "archinsn.h"
#include <linux/bitmap.h>
#include <linux/kernel.h>
#include <linux/stringify.h>
#include <linux/time64.h>
#include <sys/utsname.h>
#include "asm/bug.h"
#include "util/mem-events.h"
#include "util/dump-insn.h"
......@@ -58,11 +60,11 @@ static bool no_callchain;
static bool latency_format;
static bool system_wide;
static bool print_flags;
static bool nanosecs;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
static struct perf_stat_config stat_config;
static int max_blocks;
static bool native_arch;
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
......@@ -688,7 +690,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
secs = nsecs / NSEC_PER_SEC;
nsecs -= secs * NSEC_PER_SEC;
if (nanosecs)
if (symbol_conf.nanosecs)
printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs);
else {
char sample_time[32];
......@@ -1227,6 +1229,12 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
return len + dlen;
}
__weak void arch_fetch_insn(struct perf_sample *sample __maybe_unused,
struct thread *thread __maybe_unused,
struct machine *machine __maybe_unused)
{
}
static int perf_sample__fprintf_insn(struct perf_sample *sample,
struct perf_event_attr *attr,
struct thread *thread,
......@@ -1234,9 +1242,12 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
{
int printed = 0;
if (sample->insn_len == 0 && native_arch)
arch_fetch_insn(sample, thread, machine);
if (PRINT_FIELD(INSNLEN))
printed += fprintf(fp, " ilen: %d", sample->insn_len);
if (PRINT_FIELD(INSN)) {
if (PRINT_FIELD(INSN) && sample->insn_len) {
int i;
printed += fprintf(fp, " insn:");
......@@ -1922,6 +1933,13 @@ static int cleanup_scripting(void)
return scripting_ops ? scripting_ops->stop_script() : 0;
}
static bool filter_cpu(struct perf_sample *sample)
{
if (cpu_list)
return !test_bit(sample->cpu, cpu_bitmap);
return false;
}
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -1956,7 +1974,7 @@ static int process_sample_event(struct perf_tool *tool,
if (al.filtered)
goto out_put;
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
if (filter_cpu(sample))
goto out_put;
if (scripting_ops)
......@@ -2041,9 +2059,11 @@ static int process_comm_event(struct perf_tool *tool,
sample->tid = event->comm.tid;
sample->pid = event->comm.pid;
}
perf_sample__fprintf_start(sample, thread, evsel,
if (!filter_cpu(sample)) {
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_COMM, stdout);
perf_event__fprintf(event, stdout);
perf_event__fprintf(event, stdout);
}
ret = 0;
out:
thread__put(thread);
......@@ -2077,9 +2097,11 @@ static int process_namespaces_event(struct perf_tool *tool,
sample->tid = event->namespaces.tid;
sample->pid = event->namespaces.pid;
}
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_NAMESPACES, stdout);
perf_event__fprintf(event, stdout);
if (!filter_cpu(sample)) {
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_NAMESPACES, stdout);
perf_event__fprintf(event, stdout);
}
ret = 0;
out:
thread__put(thread);
......@@ -2111,9 +2133,11 @@ static int process_fork_event(struct perf_tool *tool,
sample->tid = event->fork.tid;
sample->pid = event->fork.pid;
}
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_FORK, stdout);
perf_event__fprintf(event, stdout);
if (!filter_cpu(sample)) {
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_FORK, stdout);
perf_event__fprintf(event, stdout);
}
thread__put(thread);
return 0;
......@@ -2141,9 +2165,11 @@ static int process_exit_event(struct perf_tool *tool,
sample->tid = event->fork.tid;
sample->pid = event->fork.pid;
}
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_EXIT, stdout);
perf_event__fprintf(event, stdout);
if (!filter_cpu(sample)) {
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_EXIT, stdout);
perf_event__fprintf(event, stdout);
}
if (perf_event__process_exit(tool, event, sample, machine) < 0)
err = -1;
......@@ -2177,9 +2203,11 @@ static int process_mmap_event(struct perf_tool *tool,
sample->tid = event->mmap.tid;
sample->pid = event->mmap.pid;
}
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_MMAP, stdout);
perf_event__fprintf(event, stdout);
if (!filter_cpu(sample)) {
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_MMAP, stdout);
perf_event__fprintf(event, stdout);
}
thread__put(thread);
return 0;
}
......@@ -2209,9 +2237,11 @@ static int process_mmap2_event(struct perf_tool *tool,
sample->tid = event->mmap2.tid;
sample->pid = event->mmap2.pid;
}
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_MMAP2, stdout);
perf_event__fprintf(event, stdout);
if (!filter_cpu(sample)) {
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_MMAP2, stdout);
perf_event__fprintf(event, stdout);
}
thread__put(thread);
return 0;
}
......@@ -2236,9 +2266,11 @@ static int process_switch_event(struct perf_tool *tool,
return -1;
}
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_SWITCH, stdout);
perf_event__fprintf(event, stdout);
if (!filter_cpu(sample)) {
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_SWITCH, stdout);
perf_event__fprintf(event, stdout);
}
thread__put(thread);
return 0;
}
......@@ -2259,9 +2291,11 @@ process_lost_event(struct perf_tool *tool,
if (thread == NULL)
return -1;
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_LOST, stdout);
perf_event__fprintf(event, stdout);
if (!filter_cpu(sample)) {
perf_sample__fprintf_start(sample, thread, evsel,
PERF_RECORD_LOST, stdout);
perf_event__fprintf(event, stdout);
}
thread__put(thread);
return 0;
}
......@@ -2948,7 +2982,8 @@ static int check_ev_match(char *dir_name, char *scriptname,
* will list all statically runnable scripts, select one, execute it and
* show the output in a perf browser.
*/
int find_scripts(char **scripts_array, char **scripts_path_array)
int find_scripts(char **scripts_array, char **scripts_path_array, int num,
int pathlen)
{
struct dirent *script_dirent, *lang_dirent;
char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
......@@ -2993,7 +3028,10 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
/* Skip those real time scripts: xxxtop.p[yl] */
if (strstr(script_dirent->d_name, "top."))
continue;
sprintf(scripts_path_array[i], "%s/%s", lang_path,
if (i >= num)
break;
snprintf(scripts_path_array[i], pathlen, "%s/%s",
lang_path,
script_dirent->d_name);
temp = strchr(script_dirent->d_name, '.');
snprintf(scripts_array[i],
......@@ -3232,7 +3270,7 @@ static int parse_insn_trace(const struct option *opt __maybe_unused,
{
parse_output_fields(NULL, "+insn,-event,-period", 0);
itrace_parse_synth_opts(opt, "i0ns", 0);
nanosecs = true;
symbol_conf.nanosecs = true;
return 0;
}
......@@ -3250,7 +3288,7 @@ static int parse_call_trace(const struct option *opt __maybe_unused,
{
parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
itrace_parse_synth_opts(opt, "cewp", 0);
nanosecs = true;
symbol_conf.nanosecs = true;
return 0;
}
......@@ -3260,7 +3298,7 @@ static int parse_callret_trace(const struct option *opt __maybe_unused,
{
parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0);
itrace_parse_synth_opts(opt, "crewp", 0);
nanosecs = true;
symbol_conf.nanosecs = true;
return 0;
}
......@@ -3277,6 +3315,7 @@ int cmd_script(int argc, const char **argv)
.set = false,
.default_no_sample = true,
};
struct utsname uts;
char *script_path = NULL;
const char **__argv;
int i, j, err = 0;
......@@ -3395,7 +3434,7 @@ int cmd_script(int argc, const char **argv)
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
OPT_INTEGER(0, "max-blocks", &max_blocks,
"Maximum number of code blocks to dump with brstackinsn"),
OPT_BOOLEAN(0, "ns", &nanosecs,
OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs,
"Use 9 decimal places when displaying time"),
OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
"Instruction Tracing options\n" ITRACE_HELP,
......@@ -3615,6 +3654,12 @@ int cmd_script(int argc, const char **argv)
if (symbol__init(&session->header.env) < 0)
goto out_delete;
uname(&uts);
if (!strcmp(uts.machine, session->header.env.arch) ||
(!strcmp(uts.machine, "x86_64") &&
!strcmp(session->header.env.arch, "i386")))
native_arch = true;
script.session = session;
script__setup_sample_type(&script);
......
......@@ -40,5 +40,6 @@ int cmd_mem(int argc, const char **argv);
int cmd_data(int argc, const char **argv);
int cmd_ftrace(int argc, const char **argv);
int find_scripts(char **scripts_array, char **scripts_path_array);
int find_scripts(char **scripts_array, char **scripts_path_array, int num,
int pathlen);
#endif
[
{
"EventName": "bp_l1_btb_correct",
"EventCode": "0x8a",
"BriefDescription": "L1 BTB Correction."
},
{
"EventName": "bp_l2_btb_correct",
"EventCode": "0x8b",
"BriefDescription": "L2 BTB Correction."
}
]
[
{
"EventName": "ic_fw32",
"EventCode": "0x80",
"BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
},
{
"EventName": "ic_fw32_miss",
"EventCode": "0x81",
"BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
},
{
"EventName": "ic_cache_fill_l2",
"EventCode": "0x82",
"BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
},
{
"EventName": "ic_cache_fill_sys",
"EventCode": "0x83",
"BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
},
{
"EventName": "bp_l1_tlb_miss_l2_hit",
"EventCode": "0x84",
"BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
},
{
"EventName": "bp_l1_tlb_miss_l2_miss",
"EventCode": "0x85",
"BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs."
},
{
"EventName": "bp_snp_re_sync",
"EventCode": "0x86",
"BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
},
{
"EventName": "ic_fetch_stall.ic_stall_any",
"EventCode": "0x87",
"BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
"PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
"UMask": "0x4"
},
{
"EventName": "ic_fetch_stall.ic_stall_dq_empty",
"EventCode": "0x87",
"BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
"PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
"UMask": "0x2"
},
{
"EventName": "ic_fetch_stall.ic_stall_back_pressure",
"EventCode": "0x87",
"BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
"PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
"UMask": "0x1"
},
{
"EventName": "ic_cache_inval.l2_invalidating_probe",
"EventCode": "0x8c",
"BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).",
"PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).",
"UMask": "0x2"
},
{
"EventName": "ic_cache_inval.fill_invalidated",
"EventCode": "0x8c",
"BriefDescription": "IC line invalidated due to overwriting fill response.",
"PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.",
"UMask": "0x1"
},
{
"EventName": "bp_tlb_rel",
"EventCode": "0x99",
"BriefDescription": "The number of ITLB reload requests."
},
{
"EventName": "l2_request_g1.rd_blk_l",
"EventCode": "0x60",
"BriefDescription": "Requests to L2 Group1.",
"PublicDescription": "Requests to L2 Group1.",
"UMask": "0x80"
},
{
"EventName": "l2_request_g1.rd_blk_x",
"EventCode": "0x60",
"BriefDescription": "Requests to L2 Group1.",
"PublicDescription": "Requests to L2 Group1.",
"UMask": "0x40"
},
{
"EventName": "l2_request_g1.ls_rd_blk_c_s",
"EventCode": "0x60",
"BriefDescription": "Requests to L2 Group1.",
"PublicDescription": "Requests to L2 Group1.",
"UMask": "0x20"
},
{
"EventName": "l2_request_g1.cacheable_ic_read",
"EventCode": "0x60",
"BriefDescription": "Requests to L2 Group1.",
"PublicDescription": "Requests to L2 Group1.",
"UMask": "0x10"
},
{
"EventName": "l2_request_g1.change_to_x",
"EventCode": "0x60",
"BriefDescription": "Requests to L2 Group1.",
"PublicDescription": "Requests to L2 Group1.",
"UMask": "0x8"
},
{
"EventName": "l2_request_g1.prefetch_l2",
"EventCode": "0x60",
"BriefDescription": "Requests to L2 Group1.",
"PublicDescription": "Requests to L2 Group1.",
"UMask": "0x4"
},
{
"EventName": "l2_request_g1.l2_hw_pf",
"EventCode": "0x60",
"BriefDescription": "Requests to L2 Group1.",
"PublicDescription": "Requests to L2 Group1.",
"UMask": "0x2"
},
{
"EventName": "l2_request_g1.other_requests",
"EventCode": "0x60",
"BriefDescription": "Events covered by l2_request_g2.",
"PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.",
"UMask": "0x1"
},
{
"EventName": "l2_request_g2.group1",
"EventCode": "0x61",
"BriefDescription": "All Group 1 commands not in unit0.",
"PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.",
"UMask": "0x80"
},
{
"EventName": "l2_request_g2.ls_rd_sized",
"EventCode": "0x61",
"BriefDescription": "RdSized, RdSized32, RdSized64.",
"PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.",
"UMask": "0x40"
},
{
"EventName": "l2_request_g2.ls_rd_sized_nc",
"EventCode": "0x61",
"BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.",
"PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.",
"UMask": "0x20"
},
{
"EventName": "l2_request_g2.ic_rd_sized",
"EventCode": "0x61",
"BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"UMask": "0x10"
},
{
"EventName": "l2_request_g2.ic_rd_sized_nc",
"EventCode": "0x61",
"BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"UMask": "0x8"
},
{
"EventName": "l2_request_g2.smc_inval",
"EventCode": "0x61",
"BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"UMask": "0x4"
},
{
"EventName": "l2_request_g2.bus_locks_originator",
"EventCode": "0x61",
"BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"UMask": "0x2"
},
{
"EventName": "l2_request_g2.bus_locks_responses",
"EventCode": "0x61",
"BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
"UMask": "0x1"
},
{
"EventName": "l2_latency.l2_cycles_waiting_on_fills",
"EventCode": "0x62",
"BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
"PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
"UMask": "0x1"
},
{
"EventName": "l2_wcb_req.wcb_write",
"EventCode": "0x63",
"PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
"BriefDescription": "LS to L2 WCB write requests.",
"UMask": "0x40"
},
{
"EventName": "l2_wcb_req.wcb_close",
"EventCode": "0x63",
"BriefDescription": "LS to L2 WCB close requests.",
"PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
"UMask": "0x20"
},
{
"EventName": "l2_wcb_req.zero_byte_store",
"EventCode": "0x63",
"BriefDescription": "LS to L2 WCB zero byte store requests.",
"PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
"UMask": "0x4"
},
{
"EventName": "l2_wcb_req.cl_zero",
"EventCode": "0x63",
"PublicDescription": "LS to L2 WCB cache line zeroing requests.",
"BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
"UMask": "0x1"
},
{
"EventName": "l2_cache_req_stat.ls_rd_blk_cs",
"EventCode": "0x64",
"BriefDescription": "LS ReadBlock C/S Hit.",
"PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.",
"UMask": "0x80"
},
{
"EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
"EventCode": "0x64",
"BriefDescription": "LS Read Block L Hit X.",
"PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.",
"UMask": "0x40"
},
{
"EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
"EventCode": "0x64",
"BriefDescription": "LsRdBlkL Hit Shared.",
"PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.",
"UMask": "0x20"
},
{
"EventName": "l2_cache_req_stat.ls_rd_blk_x",
"EventCode": "0x64",
"BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.",
"PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.",
"UMask": "0x10"
},
{
"EventName": "l2_cache_req_stat.ls_rd_blk_c",
"EventCode": "0x64",
"BriefDescription": "LS Read Block C S L X Change to X Miss.",
"PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.",
"UMask": "0x8"
},
{
"EventName": "l2_cache_req_stat.ic_fill_hit_x",
"EventCode": "0x64",
"BriefDescription": "IC Fill Hit Exclusive Stale.",
"PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.",
"UMask": "0x4"
},
{
"EventName": "l2_cache_req_stat.ic_fill_hit_s",
"EventCode": "0x64",
"BriefDescription": "IC Fill Hit Shared.",
"PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.",
"UMask": "0x2"
},
{
"EventName": "l2_cache_req_stat.ic_fill_miss",
"EventCode": "0x64",
"BriefDescription": "IC Fill Miss.",
"PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.",
"UMask": "0x1"
},
{
"EventName": "l2_fill_pending.l2_fill_busy",
"EventCode": "0x6d",
"BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.",
"PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.",
"UMask": "0x1"
}
]
[
{
"EventName": "ex_ret_instr",
"EventCode": "0xc0",
"BriefDescription": "Retired Instructions."
},
{
"EventName": "ex_ret_cops",
"EventCode": "0xc1",
"BriefDescription": "Retired Uops.",
"PublicDescription": "The number of uOps retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 4."
},
{
"EventName": "ex_ret_brn",
"EventCode": "0xc2",
"BriefDescription": "[Retired Branch Instructions.",
"PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
},
{
"EventName": "ex_ret_brn_misp",
"EventCode": "0xc3",
"BriefDescription": "Retired Branch Instructions Mispredicted.",
"PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)."
},
{
"EventName": "ex_ret_brn_tkn",
"EventCode": "0xc4",
"BriefDescription": "Retired Taken Branch Instructions.",
"PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
},
{
"EventName": "ex_ret_brn_tkn_misp",
"EventCode": "0xc5",
"BriefDescription": "Retired Taken Branch Instructions Mispredicted.",
"PublicDescription": "The number of retired taken branch instructions that were mispredicted."
},
{
"EventName": "ex_ret_brn_far",
"EventCode": "0xc6",
"BriefDescription": "Retired Far Control Transfers.",
"PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction."
},
{
"EventName": "ex_ret_brn_resync",
"EventCode": "0xc7",
"BriefDescription": "Retired Branch Resyncs.",
"PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare."
},
{
"EventName": "ex_ret_near_ret",
"EventCode": "0xc8",
"BriefDescription": "Retired Near Returns.",
"PublicDescription": "The number of near return instructions (RET or RET Iw) retired."
},
{
"EventName": "ex_ret_near_ret_mispred",
"EventCode": "0xc9",
"BriefDescription": "Retired Near Returns Mispredicted.",
"PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction."
},
{
"EventName": "ex_ret_brn_ind_misp",
"EventCode": "0xca",
"BriefDescription": "Retired Indirect Branch Instructions Mispredicted.",
"PublicDescription": "Retired Indirect Branch Instructions Mispredicted."
},
{
"EventName": "ex_ret_mmx_fp_instr.sse_instr",
"EventCode": "0xcb",
"BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
"PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
"UMask": "0x4"
},
{
"EventName": "ex_ret_mmx_fp_instr.mmx_instr",
"EventCode": "0xcb",
"BriefDescription": "MMX instructions.",
"PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.",
"UMask": "0x2"
},
{
"EventName": "ex_ret_mmx_fp_instr.x87_instr",
"EventCode": "0xcb",
"BriefDescription": "x87 instructions.",
"PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.",
"UMask": "0x1"
},
{
"EventName": "ex_ret_cond",
"EventCode": "0xd1",
"BriefDescription": "Retired Conditional Branch Instructions."
},
{
"EventName": "ex_ret_cond_misp",
"EventCode": "0xd2",
"BriefDescription": "Retired Conditional Branch Instructions Mispredicted."
},
{
"EventName": "ex_div_busy",
"EventCode": "0xd3",
"BriefDescription": "Div Cycles Busy count."
},
{
"EventName": "ex_div_count",
"EventCode": "0xd4",
"BriefDescription": "Div Op Count."
},
{
"EventName": "ex_tagged_ibs_ops.ibs_count_rollover",
"EventCode": "0x1cf",
"BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
"PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
"UMask": "0x4"
},
{
"EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret",
"EventCode": "0x1cf",
"BriefDescription": "Number of Ops tagged by IBS that retired.",
"PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
"UMask": "0x2"
},
{
"EventName": "ex_tagged_ibs_ops.ibs_tagged_ops",
"EventCode": "0x1cf",
"BriefDescription": "Number of Ops tagged by IBS.",
"PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
"UMask": "0x1"
},
{
"EventName": "ex_ret_fus_brnch_inst",
"EventCode": "0x1d0",
"BriefDescription": "The number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3."
}
]
[
{
"EventName": "fpu_pipe_assignment.dual",
"EventCode": "0x00",
"BriefDescription": "Total number multi-pipe uOps.",
"PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.",
"UMask": "0xf0"
},
{
"EventName": "fpu_pipe_assignment.total",
"EventCode": "0x00",
"BriefDescription": "Total number uOps.",
"PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.",
"UMask": "0xf"
},
{
"EventName": "fp_sched_empty",
"EventCode": "0x01",
"BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler."
},
{
"EventName": "fp_retx87_fp_ops.all",
"EventCode": "0x02",
"BriefDescription": "All Ops.",
"PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.",
"UMask": "0x7"
},
{
"EventName": "fp_retx87_fp_ops.div_sqr_r_ops",
"EventCode": "0x02",
"BriefDescription": "Divide and square root Ops.",
"PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.",
"UMask": "0x4"
},
{
"EventName": "fp_retx87_fp_ops.mul_ops",
"EventCode": "0x02",
"BriefDescription": "Multiply Ops.",
"PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.",
"UMask": "0x2"
},
{
"EventName": "fp_retx87_fp_ops.add_sub_ops",
"EventCode": "0x02",
"BriefDescription": "Add/subtract Ops.",
"PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.",
"UMask": "0x1"
},
{
"EventName": "fp_ret_sse_avx_ops.all",
"EventCode": "0x03",
"BriefDescription": "All FLOPS.",
"PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
"UMask": "0xff"
},
{
"EventName": "fp_ret_sse_avx_ops.dp_mult_add_flops",
"EventCode": "0x03",
"BriefDescription": "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
"PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
"UMask": "0x80"
},
{
"EventName": "fp_ret_sse_avx_ops.dp_div_flops",
"EventCode": "0x03",
"BriefDescription": "Double precision divide/square root FLOPS.",
"PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision divide/square root FLOPS.",
"UMask": "0x40"
},
{
"EventName": "fp_ret_sse_avx_ops.dp_mult_flops",
"EventCode": "0x03",
"BriefDescription": "Double precision multiply FLOPS.",
"PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply FLOPS.",
"UMask": "0x20"
},
{
"EventName": "fp_ret_sse_avx_ops.dp_add_sub_flops",
"EventCode": "0x03",
"BriefDescription": "Double precision add/subtract FLOPS.",
"PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision add/subtract FLOPS.",
"UMask": "0x10"
},
{
"EventName": "fp_ret_sse_avx_ops.sp_mult_add_flops",
"EventCode": "0x03",
"BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
"PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
"UMask": "0x8"
},
{
"EventName": "fp_ret_sse_avx_ops.sp_div_flops",
"EventCode": "0x03",
"BriefDescription": "Single-precision divide/square root FLOPS.",
"PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.",
"UMask": "0x4"
},
{
"EventName": "fp_ret_sse_avx_ops.sp_mult_flops",
"EventCode": "0x03",
"BriefDescription": "Single-precision multiply FLOPS.",
"PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.",
"UMask": "0x2"
},
{
"EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops",
"EventCode": "0x03",
"BriefDescription": "Single-precision add/subtract FLOPS.",
"PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.",
"UMask": "0x1"
},
{
"EventName": "fp_num_mov_elim_scal_op.optimized",
"EventCode": "0x04",
"BriefDescription": "Number of Scalar Ops optimized.",
"PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.",
"UMask": "0x8"
},
{
"EventName": "fp_num_mov_elim_scal_op.opt_potential",
"EventCode": "0x04",
"BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).",
"PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).",
"UMask": "0x4"
},
{
"EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim",
"EventCode": "0x04",
"BriefDescription": "Number of SSE Move Ops eliminated.",
"PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.",
"UMask": "0x2"
},
{
"EventName": "fp_num_mov_elim_scal_op.sse_mov_ops",
"EventCode": "0x04",
"BriefDescription": "Number of SSE Move Ops.",
"PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.",
"UMask": "0x1"
},
{
"EventName": "fp_retired_ser_ops.x87_ctrl_ret",
"EventCode": "0x05",
"BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.",
"PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.",
"UMask": "0x8"
},
{
"EventName": "fp_retired_ser_ops.x87_bot_ret",
"EventCode": "0x05",
"BriefDescription": "x87 bottom-executing uOps retired.",
"PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.",
"UMask": "0x4"
},
{
"EventName": "fp_retired_ser_ops.sse_ctrl_ret",
"EventCode": "0x05",
"BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.",
"PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.",
"UMask": "0x2"
},
{
"EventName": "fp_retired_ser_ops.sse_bot_ret",
"EventCode": "0x05",
"BriefDescription": "SSE bottom-executing uOps retired.",
"PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.",
"UMask": "0x1"
}
]
[
{
"EventName": "ls_locks.bus_lock",
"EventCode": "0x25",
"BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.",
"PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.",
"UMask": "0x1"
},
{
"EventName": "ls_dispatch.ld_st_dispatch",
"EventCode": "0x29",
"BriefDescription": "Load-op-Stores.",
"PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.",
"UMask": "0x4"
},
{
"EventName": "ls_dispatch.store_dispatch",
"EventCode": "0x29",
"BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
"PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
"UMask": "0x2"
},
{
"EventName": "ls_dispatch.ld_dispatch",
"EventCode": "0x29",
"BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
"PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
"UMask": "0x1"
},
{
"EventName": "ls_stlf",
"EventCode": "0x35",
"BriefDescription": "Number of STLF hits."
},
{
"EventName": "ls_dc_accesses",
"EventCode": "0x40",
"BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event."
},
{
"EventName": "ls_l1_d_tlb_miss.all",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss or Reload off all sizes.",
"PublicDescription": "L1 DTLB Miss or Reload off all sizes.",
"UMask": "0xff"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 1G size.",
"PublicDescription": "L1 DTLB Miss of a page of 1G size.",
"UMask": "0x80"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 2M size.",
"PublicDescription": "L1 DTLB Miss of a page of 2M size.",
"UMask": "0x40"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 32K size.",
"PublicDescription": "L1 DTLB Miss of a page of 32K size.",
"UMask": "0x20"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 4K size.",
"PublicDescription": "L1 DTLB Miss of a page of 4K size.",
"UMask": "0x10"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 1G size.",
"PublicDescription": "L1 DTLB Reload of a page of 1G size.",
"UMask": "0x8"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 2M size.",
"PublicDescription": "L1 DTLB Reload of a page of 2M size.",
"UMask": "0x4"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 32K size.",
"PublicDescription": "L1 DTLB Reload of a page of 32K size.",
"UMask": "0x2"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 4K size.",
"PublicDescription": "L1 DTLB Reload of a page of 4K size.",
"UMask": "0x1"
},
{
"EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside",
"EventCode": "0x46",
"BriefDescription": "Tablewalker allocation.",
"PublicDescription": "Tablewalker allocation.",
"UMask": "0xc"
},
{
"EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside",
"EventCode": "0x46",
"BriefDescription": "Tablewalker allocation.",
"PublicDescription": "Tablewalker allocation.",
"UMask": "0x3"
},
{
"EventName": "ls_misal_accesses",
"EventCode": "0x47",
"BriefDescription": "Misaligned loads."
},
{
"EventName": "ls_pref_instr_disp.prefetch_nta",
"EventCode": "0x4b",
"BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.",
"PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.",
"UMask": "0x4"
},
{
"EventName": "ls_pref_instr_disp.store_prefetch_w",
"EventCode": "0x4b",
"BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.",
"PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.",
"UMask": "0x2"
},
{
"EventName": "ls_pref_instr_disp.load_prefetch_w",
"EventCode": "0x4b",
"BriefDescription": "Prefetch, Prefetch_T0_T1_T2.",
"PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.",
"UMask": "0x1"
},
{
"EventName": "ls_inef_sw_pref.mab_mch_cnt",
"EventCode": "0x52",
"BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
"PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
"UMask": "0x2"
},
{
"EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit",
"EventCode": "0x52",
"BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
"PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
"UMask": "0x1"
},
{
"EventName": "ls_not_halted_cyc",
"EventCode": "0x76",
"BriefDescription": "Cycles not in Halt."
}
]
[
{
"EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
"EventCode": "0x28a",
"BriefDescription": "OC to IC mode switch.",
"PublicDescription": "OC Mode Switch. OC to IC mode switch.",
"UMask": "0x2"
},
{
"EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
"EventCode": "0x28a",
"BriefDescription": "IC to OC mode switch.",
"PublicDescription": "OC Mode Switch. IC to OC mode switch.",
"UMask": "0x1"
},
{
"EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
"EventCode": "0xaf",
"BriefDescription": "RETIRE Tokens unavailable.",
"PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
"UMask": "0x40"
},
{
"EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
"EventCode": "0xaf",
"BriefDescription": "AGSQ Tokens unavailable.",
"PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
"UMask": "0x20"
},
{
"EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
"EventCode": "0xaf",
"BriefDescription": "ALU tokens total unavailable.",
"PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
"UMask": "0x10"
},
{
"EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
"EventCode": "0xaf",
"BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.",
"PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.",
"UMask": "0x8"
},
{
"EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall",
"EventCode": "0xaf",
"BriefDescription": "ALSQ 3 Tokens unavailable.",
"PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.",
"UMask": "0x4"
},
{
"EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
"EventCode": "0xaf",
"BriefDescription": "ALSQ 2 Tokens unavailable.",
"PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
"UMask": "0x2"
},
{
"EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
"EventCode": "0xaf",
"BriefDescription": "ALSQ 1 Tokens unavailable.",
"PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
"UMask": "0x1"
}
]
......@@ -33,3 +33,4 @@ GenuineIntel-6-25,v2,westmereep-sp,core
GenuineIntel-6-2F,v2,westmereex,core
GenuineIntel-6-55-[01234],v1,skylakex,core
GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core
AuthenticAMD-23-[[:xdigit:]]+,v1,amdfam17h,core
......@@ -10,6 +10,8 @@
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
from __future__ import print_function
import os
import sys
import struct
......@@ -199,6 +201,18 @@ import datetime
from PySide.QtSql import *
if sys.version_info < (3, 0):
def toserverstr(str):
return str
def toclientstr(str):
return str
else:
# Assume UTF-8 server_encoding and client_encoding
def toserverstr(str):
return bytes(str, "UTF_8")
def toclientstr(str):
return bytes(str, "UTF_8")
# Need to access PostgreSQL C library directly to use COPY FROM STDIN
from ctypes import *
libpq = CDLL("libpq.so.5")
......@@ -234,12 +248,17 @@ perf_db_export_mode = True
perf_db_export_calls = False
perf_db_export_callchains = False
def printerr(*args, **kw_args):
print(*args, file=sys.stderr, **kw_args)
def printdate(*args, **kw_args):
print(datetime.datetime.today(), *args, sep=' ', **kw_args)
def usage():
print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
print >> sys.stderr, "where: columns 'all' or 'branches'"
print >> sys.stderr, " calls 'calls' => create calls and call_paths table"
print >> sys.stderr, " callchains 'callchains' => create call_paths table"
printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]")
printerr("where: columns 'all' or 'branches'")
printerr(" calls 'calls' => create calls and call_paths table")
printerr(" callchains 'callchains' => create call_paths table")
raise Exception("Too few arguments")
if (len(sys.argv) < 2):
......@@ -273,7 +292,7 @@ def do_query(q, s):
return
raise Exception("Query failed: " + q.lastError().text())
print datetime.datetime.today(), "Creating database..."
printdate("Creating database...")
db = QSqlDatabase.addDatabase('QPSQL')
query = QSqlQuery(db)
......@@ -506,12 +525,12 @@ do_query(query, 'CREATE VIEW samples_view AS '
' FROM samples')
file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0)
file_trailer = "\377\377"
file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0)
file_trailer = b"\377\377"
def open_output_file(file_name):
path_name = output_dir_name + "/" + file_name
file = open(path_name, "w+")
file = open(path_name, "wb+")
file.write(file_header)
return file
......@@ -526,13 +545,13 @@ def copy_output_file_direct(file, table_name):
# Use COPY FROM STDIN because security may prevent postgres from accessing the files directly
def copy_output_file(file, table_name):
conn = PQconnectdb("dbname = " + dbname)
conn = PQconnectdb(toclientstr("dbname = " + dbname))
if (PQstatus(conn)):
raise Exception("COPY FROM STDIN PQconnectdb failed")
file.write(file_trailer)
file.seek(0)
sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')"
res = PQexec(conn, sql)
res = PQexec(conn, toclientstr(sql))
if (PQresultStatus(res) != 4):
raise Exception("COPY FROM STDIN PQexec failed")
data = file.read(65536)
......@@ -566,7 +585,7 @@ if perf_db_export_calls:
call_file = open_output_file("call_table.bin")
def trace_begin():
print datetime.datetime.today(), "Writing to intermediate files..."
printdate("Writing to intermediate files...")
# id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs
evsel_table(0, "unknown")
machine_table(0, 0, "unknown")
......@@ -582,7 +601,7 @@ def trace_begin():
unhandled_count = 0
def trace_end():
print datetime.datetime.today(), "Copying to database..."
printdate("Copying to database...")
copy_output_file(evsel_file, "selected_events")
copy_output_file(machine_file, "machines")
copy_output_file(thread_file, "threads")
......@@ -597,7 +616,7 @@ def trace_end():
if perf_db_export_calls:
copy_output_file(call_file, "calls")
print datetime.datetime.today(), "Removing intermediate files..."
printdate("Removing intermediate files...")
remove_output_file(evsel_file)
remove_output_file(machine_file)
remove_output_file(thread_file)
......@@ -612,7 +631,7 @@ def trace_end():
if perf_db_export_calls:
remove_output_file(call_file)
os.rmdir(output_dir_name)
print datetime.datetime.today(), "Adding primary keys"
printdate("Adding primary keys")
do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
do_query(query, 'ALTER TABLE machines ADD PRIMARY KEY (id)')
do_query(query, 'ALTER TABLE threads ADD PRIMARY KEY (id)')
......@@ -627,7 +646,7 @@ def trace_end():
if perf_db_export_calls:
do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
print datetime.datetime.today(), "Adding foreign keys"
printdate("Adding foreign keys")
do_query(query, 'ALTER TABLE threads '
'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)')
......@@ -663,8 +682,8 @@ def trace_end():
do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)')
if (unhandled_count):
print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
print datetime.datetime.today(), "Done"
printdate("Warning: ", unhandled_count, " unhandled events")
printdate("Done")
def trace_unhandled(event_name, context, event_fields_dict):
global unhandled_count
......@@ -674,12 +693,14 @@ def sched__sched_switch(*x):
pass
def evsel_table(evsel_id, evsel_name, *x):
evsel_name = toserverstr(evsel_name)
n = len(evsel_name)
fmt = "!hiqi" + str(n) + "s"
value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name)
evsel_file.write(value)
def machine_table(machine_id, pid, root_dir, *x):
root_dir = toserverstr(root_dir)
n = len(root_dir)
fmt = "!hiqiii" + str(n) + "s"
value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir)
......@@ -690,6 +711,7 @@ def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
thread_file.write(value)
def comm_table(comm_id, comm_str, *x):
comm_str = toserverstr(comm_str)
n = len(comm_str)
fmt = "!hiqi" + str(n) + "s"
value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
......@@ -701,6 +723,9 @@ def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
comm_thread_file.write(value)
def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x):
short_name = toserverstr(short_name)
long_name = toserverstr(long_name)
build_id = toserverstr(build_id)
n1 = len(short_name)
n2 = len(long_name)
n3 = len(build_id)
......@@ -709,12 +734,14 @@ def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x):
dso_file.write(value)
def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x):
symbol_name = toserverstr(symbol_name)
n = len(symbol_name)
fmt = "!hiqiqiqiqiii" + str(n) + "s"
value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name)
symbol_file.write(value)
def branch_type_table(branch_type, name, *x):
name = toserverstr(name)
n = len(name)
fmt = "!hiii" + str(n) + "s"
value = struct.pack(fmt, 2, 4, branch_type, n, name)
......
......@@ -10,6 +10,8 @@
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
from __future__ import print_function
import os
import sys
import struct
......@@ -60,11 +62,17 @@ perf_db_export_mode = True
perf_db_export_calls = False
perf_db_export_callchains = False
def printerr(*args, **keyword_args):
print(*args, file=sys.stderr, **keyword_args)
def printdate(*args, **kw_args):
print(datetime.datetime.today(), *args, sep=' ', **kw_args)
def usage():
print >> sys.stderr, "Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]"
print >> sys.stderr, "where: columns 'all' or 'branches'"
print >> sys.stderr, " calls 'calls' => create calls and call_paths table"
print >> sys.stderr, " callchains 'callchains' => create call_paths table"
printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]");
printerr("where: columns 'all' or 'branches'");
printerr(" calls 'calls' => create calls and call_paths table");
printerr(" callchains 'callchains' => create call_paths table");
raise Exception("Too few arguments")
if (len(sys.argv) < 2):
......@@ -100,7 +108,7 @@ def do_query_(q):
return
raise Exception("Query failed: " + q.lastError().text())
print datetime.datetime.today(), "Creating database..."
printdate("Creating database ...")
db_exists = False
try:
......@@ -378,7 +386,7 @@ if perf_db_export_calls:
call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
def trace_begin():
print datetime.datetime.today(), "Writing records..."
printdate("Writing records...")
do_query(query, 'BEGIN TRANSACTION')
# id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs
evsel_table(0, "unknown")
......@@ -397,14 +405,14 @@ unhandled_count = 0
def trace_end():
do_query(query, 'END TRANSACTION')
print datetime.datetime.today(), "Adding indexes"
printdate("Adding indexes")
if perf_db_export_calls:
do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)')
if (unhandled_count):
print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
print datetime.datetime.today(), "Done"
printdate("Warning: ", unhandled_count, " unhandled events")
printdate("Done")
def trace_unhandled(event_name, context, event_fields_dict):
global unhandled_count
......
......@@ -88,11 +88,20 @@
# 7fab593ea956 48 89 15 3b 13 22 00 movq %rdx, 0x22133b(%rip)
# 8107675243232 2 ls 22011 22011 hardware interrupt No 7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
from __future__ import print_function
import sys
import weakref
import threading
import string
import cPickle
try:
# Python2
import cPickle as pickle
# size of pickled integer big enough for record size
glb_nsz = 8
except ImportError:
import pickle
glb_nsz = 16
import re
import os
from PySide.QtCore import *
......@@ -102,6 +111,15 @@ from decimal import *
from ctypes import *
from multiprocessing import Process, Array, Value, Event
# xrange is range in Python3
try:
xrange
except NameError:
xrange = range
def printerr(*args, **keyword_args):
print(*args, file=sys.stderr, **keyword_args)
# Data formatting helpers
def tohex(ip):
......@@ -1004,10 +1022,6 @@ class ChildDataItemFinder():
glb_chunk_sz = 10000
# size of pickled integer big enough for record size
glb_nsz = 8
# Background process for SQL data fetcher
class SQLFetcherProcess():
......@@ -1066,7 +1080,7 @@ class SQLFetcherProcess():
return True
if space >= glb_nsz:
# Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer
nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL)
nd = pickle.dumps(0, pickle.HIGHEST_PROTOCOL)
self.buffer[self.local_head : self.local_head + len(nd)] = nd
self.local_head = 0
if self.local_tail - self.local_head > sz:
......@@ -1084,9 +1098,9 @@ class SQLFetcherProcess():
self.wait_event.wait()
def AddToBuffer(self, obj):
d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL)
d = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
n = len(d)
nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL)
nd = pickle.dumps(n, pickle.HIGHEST_PROTOCOL)
sz = n + glb_nsz
self.WaitForSpace(sz)
pos = self.local_head
......@@ -1198,12 +1212,12 @@ class SQLFetcher(QObject):
pos = self.local_tail
if len(self.buffer) - pos < glb_nsz:
pos = 0
n = cPickle.loads(self.buffer[pos : pos + glb_nsz])
n = pickle.loads(self.buffer[pos : pos + glb_nsz])
if n == 0:
pos = 0
n = cPickle.loads(self.buffer[0 : glb_nsz])
n = pickle.loads(self.buffer[0 : glb_nsz])
pos += glb_nsz
obj = cPickle.loads(self.buffer[pos : pos + n])
obj = pickle.loads(self.buffer[pos : pos + n])
self.local_tail = pos + n
return obj
......@@ -2973,7 +2987,7 @@ class DBRef():
def Main():
if (len(sys.argv) < 2):
print >> sys.stderr, "Usage is: exported-sql-viewer.py {<database name> | --help-only}"
printerr("Usage is: exported-sql-viewer.py {<database name> | --help-only}");
raise Exception("Too few arguments")
dbname = sys.argv[1]
......@@ -2986,8 +3000,8 @@ def Main():
is_sqlite3 = False
try:
f = open(dbname)
if f.read(15) == "SQLite format 3":
f = open(dbname, "rb")
if f.read(15) == b'SQLite format 3':
is_sqlite3 = True
f.close()
except:
......
......@@ -611,14 +611,16 @@ void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence)
browser->top = browser->entries;
break;
case SEEK_CUR:
browser->top = browser->top + browser->top_idx + offset;
browser->top = (char **)browser->top + offset;
break;
case SEEK_END:
browser->top = browser->top + browser->nr_entries - 1 + offset;
browser->top = (char **)browser->entries + browser->nr_entries - 1 + offset;
break;
default:
return;
}
assert((char **)browser->top < (char **)browser->entries + browser->nr_entries);
assert((char **)browser->top >= (char **)browser->entries);
}
unsigned int ui_browser__argv_refresh(struct ui_browser *browser)
......@@ -630,7 +632,9 @@ unsigned int ui_browser__argv_refresh(struct ui_browser *browser)
browser->top = browser->entries;
pos = (char **)browser->top;
while (idx < browser->nr_entries) {
while (idx < browser->nr_entries &&
row < (unsigned)SLtt_Screen_Rows - 1) {
assert(pos < (char **)browser->entries + browser->nr_entries);
if (!browser->filter || !browser->filter(browser, *pos)) {
ui_browser__gotorc(browser, row, 0);
browser->write(browser, pos, row);
......
......@@ -3,6 +3,7 @@ perf-y += hists.o
perf-y += map.o
perf-y += scripts.o
perf-y += header.o
perf-y += res_sample.o
CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST
CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST
......
......@@ -750,7 +750,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
continue;
case 'r':
{
script_browse(NULL);
script_browse(NULL, NULL);
continue;
}
case 'k':
......
......@@ -7,6 +7,7 @@
#include <string.h>
#include <linux/rbtree.h>
#include <sys/ttydefaults.h>
#include <linux/time64.h>
#include "../../util/callchain.h"
#include "../../util/evsel.h"
......@@ -30,6 +31,7 @@
#include "srcline.h"
#include "string2.h"
#include "units.h"
#include "time-utils.h"
#include "sane_ctype.h"
......@@ -1224,6 +1226,8 @@ void hist_browser__init_hpp(void)
hist_browser__hpp_color_overhead_guest_us;
perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
hist_browser__hpp_color_overhead_acc;
res_sample_init();
}
static int hist_browser__show_entry(struct hist_browser *browser,
......@@ -2338,9 +2342,12 @@ static int switch_data_file(void)
}
struct popup_action {
unsigned long time;
struct thread *thread;
struct map_symbol ms;
int socket;
struct perf_evsel *evsel;
enum rstype rstype;
int (*fn)(struct hist_browser *browser, struct popup_action *act);
};
......@@ -2527,45 +2534,136 @@ static int
do_run_script(struct hist_browser *browser __maybe_unused,
struct popup_action *act)
{
char script_opt[64];
memset(script_opt, 0, sizeof(script_opt));
char *script_opt;
int len;
int n = 0;
len = 100;
if (act->thread)
len += strlen(thread__comm_str(act->thread));
else if (act->ms.sym)
len += strlen(act->ms.sym->name);
script_opt = malloc(len);
if (!script_opt)
return -1;
script_opt[0] = 0;
if (act->thread) {
scnprintf(script_opt, sizeof(script_opt), " -c %s ",
n = scnprintf(script_opt, len, " -c %s ",
thread__comm_str(act->thread));
} else if (act->ms.sym) {
scnprintf(script_opt, sizeof(script_opt), " -S %s ",
n = scnprintf(script_opt, len, " -S %s ",
act->ms.sym->name);
}
script_browse(script_opt);
if (act->time) {
char start[32], end[32];
unsigned long starttime = act->time;
unsigned long endtime = act->time + symbol_conf.time_quantum;
if (starttime == endtime) { /* Display 1ms as fallback */
starttime -= 1*NSEC_PER_MSEC;
endtime += 1*NSEC_PER_MSEC;
}
timestamp__scnprintf_usec(starttime, start, sizeof start);
timestamp__scnprintf_usec(endtime, end, sizeof end);
n += snprintf(script_opt + n, len - n, " --time %s,%s", start, end);
}
script_browse(script_opt, act->evsel);
free(script_opt);
return 0;
}
static int
add_script_opt(struct hist_browser *browser __maybe_unused,
do_res_sample_script(struct hist_browser *browser __maybe_unused,
struct popup_action *act)
{
struct hist_entry *he;
he = hist_browser__selected_entry(browser);
res_sample_browse(he->res_samples, he->num_res, act->evsel, act->rstype);
return 0;
}
static int
add_script_opt_2(struct hist_browser *browser __maybe_unused,
struct popup_action *act, char **optstr,
struct thread *thread, struct symbol *sym)
struct thread *thread, struct symbol *sym,
struct perf_evsel *evsel, const char *tstr)
{
if (thread) {
if (asprintf(optstr, "Run scripts for samples of thread [%s]",
thread__comm_str(thread)) < 0)
if (asprintf(optstr, "Run scripts for samples of thread [%s]%s",
thread__comm_str(thread), tstr) < 0)
return 0;
} else if (sym) {
if (asprintf(optstr, "Run scripts for samples of symbol [%s]",
sym->name) < 0)
if (asprintf(optstr, "Run scripts for samples of symbol [%s]%s",
sym->name, tstr) < 0)
return 0;
} else {
if (asprintf(optstr, "Run scripts for all samples") < 0)
if (asprintf(optstr, "Run scripts for all samples%s", tstr) < 0)
return 0;
}
act->thread = thread;
act->ms.sym = sym;
act->evsel = evsel;
act->fn = do_run_script;
return 1;
}
static int
add_script_opt(struct hist_browser *browser,
struct popup_action *act, char **optstr,
struct thread *thread, struct symbol *sym,
struct perf_evsel *evsel)
{
int n, j;
struct hist_entry *he;
n = add_script_opt_2(browser, act, optstr, thread, sym, evsel, "");
he = hist_browser__selected_entry(browser);
if (sort_order && strstr(sort_order, "time")) {
char tstr[128];
optstr++;
act++;
j = sprintf(tstr, " in ");
j += timestamp__scnprintf_usec(he->time, tstr + j,
sizeof tstr - j);
j += sprintf(tstr + j, "-");
timestamp__scnprintf_usec(he->time + symbol_conf.time_quantum,
tstr + j, sizeof tstr - j);
n += add_script_opt_2(browser, act, optstr, thread, sym,
evsel, tstr);
act->time = he->time;
}
return n;
}
static int
add_res_sample_opt(struct hist_browser *browser __maybe_unused,
struct popup_action *act, char **optstr,
struct res_sample *res_sample,
struct perf_evsel *evsel,
enum rstype type)
{
if (!res_sample)
return 0;
if (asprintf(optstr, "Show context for individual samples %s",
type == A_ASM ? "with assembler" :
type == A_SOURCE ? "with source" : "") < 0)
return 0;
act->fn = do_res_sample_script;
act->evsel = evsel;
act->rstype = type;
return 1;
}
static int
do_switch_data(struct hist_browser *browser __maybe_unused,
struct popup_action *act __maybe_unused)
......@@ -3031,7 +3129,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
nr_options += add_script_opt(browser,
&actions[nr_options],
&options[nr_options],
thread, NULL);
thread, NULL, evsel);
}
/*
* Note that browser->selection != NULL
......@@ -3046,11 +3144,24 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
nr_options += add_script_opt(browser,
&actions[nr_options],
&options[nr_options],
NULL, browser->selection->sym);
NULL, browser->selection->sym,
evsel);
}
}
nr_options += add_script_opt(browser, &actions[nr_options],
&options[nr_options], NULL, NULL);
&options[nr_options], NULL, NULL, evsel);
nr_options += add_res_sample_opt(browser, &actions[nr_options],
&options[nr_options],
hist_browser__selected_entry(browser)->res_samples,
evsel, A_NORMAL);
nr_options += add_res_sample_opt(browser, &actions[nr_options],
&options[nr_options],
hist_browser__selected_entry(browser)->res_samples,
evsel, A_ASM);
nr_options += add_res_sample_opt(browser, &actions[nr_options],
&options[nr_options],
hist_browser__selected_entry(browser)->res_samples,
evsel, A_SOURCE);
nr_options += add_switch_opt(browser, &actions[nr_options],
&options[nr_options]);
skip_scripting:
......
// SPDX-License-Identifier: GPL-2.0
/* Display a menu with individual samples to browse with perf script */
#include "util.h"
#include "hist.h"
#include "evsel.h"
#include "hists.h"
#include "sort.h"
#include "config.h"
#include "time-utils.h"
#include <linux/time64.h>
static u64 context_len = 10 * NSEC_PER_MSEC;
static int res_sample_config(const char *var, const char *value, void *data __maybe_unused)
{
if (!strcmp(var, "samples.context"))
return perf_config_u64(&context_len, var, value);
return 0;
}
void res_sample_init(void)
{
perf_config(res_sample_config, NULL);
}
int res_sample_browse(struct res_sample *res_samples, int num_res,
struct perf_evsel *evsel, enum rstype rstype)
{
char **names;
int i, n;
int choice;
char *cmd;
char pbuf[256], tidbuf[32], cpubuf[32];
const char *perf = perf_exe(pbuf, sizeof pbuf);
char trange[128], tsample[64];
struct res_sample *r;
char extra_format[256];
names = calloc(num_res, sizeof(char *));
if (!names)
return -1;
for (i = 0; i < num_res; i++) {
char tbuf[64];
timestamp__scnprintf_nsec(res_samples[i].time, tbuf, sizeof tbuf);
if (asprintf(&names[i], "%s: CPU %d tid %d", tbuf,
res_samples[i].cpu, res_samples[i].tid) < 0) {
while (--i >= 0)
free(names[i]);
free(names);
return -1;
}
}
choice = ui__popup_menu(num_res, names);
for (i = 0; i < num_res; i++)
free(names[i]);
free(names);
if (choice < 0 || choice >= num_res)
return -1;
r = &res_samples[choice];
n = timestamp__scnprintf_nsec(r->time - context_len, trange, sizeof trange);
trange[n++] = ',';
timestamp__scnprintf_nsec(r->time + context_len, trange + n, sizeof trange - n);
timestamp__scnprintf_nsec(r->time, tsample, sizeof tsample);
attr_to_script(extra_format, &evsel->attr);
if (asprintf(&cmd, "%s script %s%s --time %s %s%s %s%s --ns %s %s %s %s %s | less +/%s",
perf,
input_name ? "-i " : "",
input_name ? input_name : "",
trange,
r->cpu >= 0 ? "--cpu " : "",
r->cpu >= 0 ? (sprintf(cpubuf, "%d", r->cpu), cpubuf) : "",
r->tid ? "--tid " : "",
r->tid ? (sprintf(tidbuf, "%d", r->tid), tidbuf) : "",
extra_format,
rstype == A_ASM ? "-F +insn --xed" :
rstype == A_SOURCE ? "-F +srcline,+srccode" : "",
symbol_conf.inline_name ? "--inline" : "",
"--show-lost-events ",
r->tid ? "--show-switch-events --show-task-events " : "",
tsample) < 0)
return -1;
run_script(cmd);
free(cmd);
return 0;
}
// SPDX-License-Identifier: GPL-2.0
#include <elf.h>
#include <inttypes.h>
#include <sys/ttydefaults.h>
#include <string.h>
#include "../../util/sort.h"
#include "../../util/util.h"
#include "../../util/hist.h"
#include "../../util/debug.h"
#include "../../util/symbol.h"
#include "../browser.h"
#include "../helpline.h"
#include "../libslang.h"
/* 2048 lines should be enough for a script output */
#define MAX_LINES 2048
/* 160 bytes for one output line */
#define AVERAGE_LINE_LEN 160
struct script_line {
struct list_head node;
char line[AVERAGE_LINE_LEN];
};
struct perf_script_browser {
struct ui_browser b;
struct list_head entries;
const char *script_name;
int nr_lines;
};
#include "config.h"
#define SCRIPT_NAMELEN 128
#define SCRIPT_MAX_NO 64
......@@ -40,149 +18,169 @@ struct perf_script_browser {
*/
#define SCRIPT_FULLPATH_LEN 256
struct script_config {
const char **names;
char **paths;
int index;
const char *perf;
char extra_format[256];
};
void attr_to_script(char *extra_format, struct perf_event_attr *attr)
{
extra_format[0] = 0;
if (attr->read_format & PERF_FORMAT_GROUP)
strcat(extra_format, " -F +metric");
if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK)
strcat(extra_format, " -F +brstackinsn --xed");
if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
strcat(extra_format, " -F +iregs");
if (attr->sample_type & PERF_SAMPLE_REGS_USER)
strcat(extra_format, " -F +uregs");
if (attr->sample_type & PERF_SAMPLE_PHYS_ADDR)
strcat(extra_format, " -F +phys_addr");
}
static int add_script_option(const char *name, const char *opt,
struct script_config *c)
{
c->names[c->index] = name;
if (asprintf(&c->paths[c->index],
"%s script %s -F +metric %s %s",
c->perf, opt, symbol_conf.inline_name ? " --inline" : "",
c->extra_format) < 0)
return -1;
c->index++;
return 0;
}
static int scripts_config(const char *var, const char *value, void *data)
{
struct script_config *c = data;
if (!strstarts(var, "scripts."))
return -1;
if (c->index >= SCRIPT_MAX_NO)
return -1;
c->names[c->index] = strdup(var + 7);
if (!c->names[c->index])
return -1;
if (asprintf(&c->paths[c->index], "%s %s", value,
c->extra_format) < 0)
return -1;
c->index++;
return 0;
}
/*
* When success, will copy the full path of the selected script
* into the buffer pointed by script_name, and return 0.
* Return -1 on failure.
*/
static int list_scripts(char *script_name)
static int list_scripts(char *script_name, bool *custom,
struct perf_evsel *evsel)
{
char *buf, *names[SCRIPT_MAX_NO], *paths[SCRIPT_MAX_NO];
int i, num, choice, ret = -1;
char *buf, *paths[SCRIPT_MAX_NO], *names[SCRIPT_MAX_NO];
int i, num, choice;
int ret = 0;
int max_std, custom_perf;
char pbuf[256];
const char *perf = perf_exe(pbuf, sizeof pbuf);
struct script_config scriptc = {
.names = (const char **)names,
.paths = paths,
.perf = perf
};
script_name[0] = 0;
/* Preset the script name to SCRIPT_NAMELEN */
buf = malloc(SCRIPT_MAX_NO * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN));
if (!buf)
return ret;
return -1;
for (i = 0; i < SCRIPT_MAX_NO; i++) {
names[i] = buf + i * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN);
if (evsel)
attr_to_script(scriptc.extra_format, &evsel->attr);
add_script_option("Show individual samples", "", &scriptc);
add_script_option("Show individual samples with assembler", "-F +insn --xed",
&scriptc);
add_script_option("Show individual samples with source", "-F +srcline,+srccode",
&scriptc);
perf_config(scripts_config, &scriptc);
custom_perf = scriptc.index;
add_script_option("Show samples with custom perf script arguments", "", &scriptc);
i = scriptc.index;
max_std = i;
for (; i < SCRIPT_MAX_NO; i++) {
names[i] = buf + (i - max_std) * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN);
paths[i] = names[i] + SCRIPT_NAMELEN;
}
num = find_scripts(names, paths);
if (num > 0) {
choice = ui__popup_menu(num, names);
if (choice < num && choice >= 0) {
strcpy(script_name, paths[choice]);
ret = 0;
}
num = find_scripts(names + max_std, paths + max_std, SCRIPT_MAX_NO - max_std,
SCRIPT_FULLPATH_LEN);
if (num < 0)
num = 0;
choice = ui__popup_menu(num + max_std, (char * const *)names);
if (choice < 0) {
ret = -1;
goto out;
}
if (choice == custom_perf) {
char script_args[50];
int key = ui_browser__input_window("perf script command",
"Enter perf script command line (without perf script prefix)",
script_args, "", 0);
if (key != K_ENTER)
return -1;
sprintf(script_name, "%s script %s", perf, script_args);
} else if (choice < num + max_std) {
strcpy(script_name, paths[choice]);
}
*custom = choice >= max_std;
out:
free(buf);
for (i = 0; i < max_std; i++)
free(paths[i]);
return ret;
}
static void script_browser__write(struct ui_browser *browser,
void *entry, int row)
void run_script(char *cmd)
{
struct script_line *sline = list_entry(entry, struct script_line, node);
bool current_entry = ui_browser__is_current_entry(browser, row);
ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
HE_COLORSET_NORMAL);
ui_browser__write_nstring(browser, sline->line, browser->width);
pr_debug("Running %s\n", cmd);
SLang_reset_tty();
if (system(cmd) < 0)
pr_warning("Cannot run %s\n", cmd);
/*
* SLang doesn't seem to reset the whole terminal, so be more
* forceful to get back to the original state.
*/
printf("\033[c\033[H\033[J");
fflush(stdout);
SLang_init_tty(0, 0, 0);
SLsmg_refresh();
}
static int script_browser__run(struct perf_script_browser *browser)
int script_browse(const char *script_opt, struct perf_evsel *evsel)
{
int key;
char *cmd, script_name[SCRIPT_FULLPATH_LEN];
bool custom = false;
if (ui_browser__show(&browser->b, browser->script_name,
"Press ESC to exit") < 0)
memset(script_name, 0, SCRIPT_FULLPATH_LEN);
if (list_scripts(script_name, &custom, evsel))
return -1;
while (1) {
key = ui_browser__run(&browser->b, 0);
/* We can add some special key handling here if needed */
break;
}
ui_browser__hide(&browser->b);
return key;
}
int script_browse(const char *script_opt)
{
char cmd[SCRIPT_FULLPATH_LEN*2], script_name[SCRIPT_FULLPATH_LEN];
char *line = NULL;
size_t len = 0;
ssize_t retlen;
int ret = -1, nr_entries = 0;
FILE *fp;
void *buf;
struct script_line *sline;
struct perf_script_browser script = {
.b = {
.refresh = ui_browser__list_head_refresh,
.seek = ui_browser__list_head_seek,
.write = script_browser__write,
},
.script_name = script_name,
};
INIT_LIST_HEAD(&script.entries);
/* Save each line of the output in one struct script_line object. */
buf = zalloc((sizeof(*sline)) * MAX_LINES);
if (!buf)
if (asprintf(&cmd, "%s%s %s %s%s 2>&1 | less",
custom ? "perf script -s " : "",
script_name,
script_opt ? script_opt : "",
input_name ? "-i " : "",
input_name ? input_name : "") < 0)
return -1;
sline = buf;
memset(script_name, 0, SCRIPT_FULLPATH_LEN);
if (list_scripts(script_name))
goto exit;
sprintf(cmd, "perf script -s %s ", script_name);
if (script_opt)
strcat(cmd, script_opt);
run_script(cmd);
free(cmd);
if (input_name) {
strcat(cmd, " -i ");
strcat(cmd, input_name);
}
strcat(cmd, " 2>&1");
fp = popen(cmd, "r");
if (!fp)
goto exit;
while ((retlen = getline(&line, &len, fp)) != -1) {
strncpy(sline->line, line, AVERAGE_LINE_LEN);
/* If one output line is very large, just cut it short */
if (retlen >= AVERAGE_LINE_LEN) {
sline->line[AVERAGE_LINE_LEN - 1] = '\0';
sline->line[AVERAGE_LINE_LEN - 2] = '\n';
}
list_add_tail(&sline->node, &script.entries);
if (script.b.width < retlen)
script.b.width = retlen;
if (nr_entries++ >= MAX_LINES - 1)
break;
sline++;
}
if (script.b.width > AVERAGE_LINE_LEN)
script.b.width = AVERAGE_LINE_LEN;
free(line);
pclose(fp);
script.nr_lines = nr_entries;
script.b.nr_entries = nr_entries;
script.b.entries = &script.entries;
ret = script_browser__run(&script);
exit:
free(buf);
return ret;
return 0;
}
#ifndef INSN_H
#define INSN_H 1
struct perf_sample;
struct machine;
struct thread;
void arch_fetch_insn(struct perf_sample *sample,
struct thread *thread,
struct machine *machine);
#endif
......@@ -14,6 +14,7 @@
#include "data.h"
#include "util.h"
#include "debug.h"
#include "header.h"
static void close_dir(struct perf_data_file *files, int nr)
{
......@@ -34,12 +35,16 @@ int perf_data__create_dir(struct perf_data *data, int nr)
struct perf_data_file *files = NULL;
int i, ret = -1;
if (WARN_ON(!data->is_dir))
return -EINVAL;
files = zalloc(nr * sizeof(*files));
if (!files)
return -ENOMEM;
data->dir.files = files;
data->dir.nr = nr;
data->dir.version = PERF_DIR_VERSION;
data->dir.files = files;
data->dir.nr = nr;
for (i = 0; i < nr; i++) {
struct perf_data_file *file = &files[i];
......@@ -69,6 +74,13 @@ int perf_data__open_dir(struct perf_data *data)
DIR *dir;
int nr = 0;
if (WARN_ON(!data->is_dir))
return -EINVAL;
/* The version is provided by DIR_FORMAT feature. */
if (WARN_ON(data->dir.version != PERF_DIR_VERSION))
return -1;
dir = opendir(data->path);
if (!dir)
return -EINVAL;
......@@ -118,6 +130,26 @@ int perf_data__open_dir(struct perf_data *data)
return ret;
}
int perf_data__update_dir(struct perf_data *data)
{
int i;
if (WARN_ON(!data->is_dir))
return -EINVAL;
for (i = 0; i < data->dir.nr; i++) {
struct perf_data_file *file = &data->dir.files[i];
struct stat st;
if (fstat(file->fd, &st))
return -1;
file->size = st.st_size;
}
return 0;
}
static bool check_pipe(struct perf_data *data)
{
struct stat st;
......@@ -173,6 +205,16 @@ static int check_backup(struct perf_data *data)
return 0;
}
static bool is_dir(struct perf_data *data)
{
struct stat st;
if (stat(data->path, &st))
return false;
return (st.st_mode & S_IFMT) == S_IFDIR;
}
static int open_file_read(struct perf_data *data)
{
struct stat st;
......@@ -254,6 +296,30 @@ static int open_file_dup(struct perf_data *data)
return open_file(data);
}
static int open_dir(struct perf_data *data)
{
int ret;
/*
* So far we open only the header, so we can read the data version and
* layout.
*/
if (asprintf(&data->file.path, "%s/header", data->path) < 0)
return -1;
if (perf_data__is_write(data) &&
mkdir(data->path, S_IRWXU) < 0)
return -1;
ret = open_file(data);
/* Cleanup whatever we managed to create so far. */
if (ret && perf_data__is_write(data))
rm_rf_perf_data(data->path);
return ret;
}
int perf_data__open(struct perf_data *data)
{
if (check_pipe(data))
......@@ -265,11 +331,18 @@ int perf_data__open(struct perf_data *data)
if (check_backup(data))
return -1;
return open_file_dup(data);
if (perf_data__is_read(data))
data->is_dir = is_dir(data);
return perf_data__is_dir(data) ?
open_dir(data) : open_file_dup(data);
}
void perf_data__close(struct perf_data *data)
{
if (perf_data__is_dir(data))
perf_data__close_dir(data);
zfree(&data->file.path);
close(data->file.fd);
}
......@@ -326,3 +399,20 @@ int perf_data__switch(struct perf_data *data,
free(new_filepath);
return ret;
}
unsigned long perf_data__size(struct perf_data *data)
{
u64 size = data->file.size;
int i;
if (!data->is_dir)
return size;
for (i = 0; i < data->dir.nr; i++) {
struct perf_data_file *file = &data->dir.files[i];
size += file->size;
}
return size;
}
......@@ -19,10 +19,12 @@ struct perf_data {
const char *path;
struct perf_data_file file;
bool is_pipe;
bool is_dir;
bool force;
enum perf_data_mode mode;
struct {
u64 version;
struct perf_data_file *files;
int nr;
} dir;
......@@ -43,14 +45,14 @@ static inline int perf_data__is_pipe(struct perf_data *data)
return data->is_pipe;
}
static inline int perf_data__fd(struct perf_data *data)
static inline bool perf_data__is_dir(struct perf_data *data)
{
return data->file.fd;
return data->is_dir;
}
static inline unsigned long perf_data__size(struct perf_data *data)
static inline int perf_data__fd(struct perf_data *data)
{
return data->file.size;
return data->file.fd;
}
int perf_data__open(struct perf_data *data);
......@@ -73,4 +75,6 @@ int perf_data__switch(struct perf_data *data,
int perf_data__create_dir(struct perf_data *data, int nr);
int perf_data__open_dir(struct perf_data *data);
void perf_data__close_dir(struct perf_data *data);
int perf_data__update_dir(struct perf_data *data);
unsigned long perf_data__size(struct perf_data *data);
#endif /* __PERF_DATA_H */
......@@ -861,6 +861,21 @@ static int write_clockid(struct feat_fd *ff,
sizeof(ff->ph->env.clockid_res_ns));
}
static int write_dir_format(struct feat_fd *ff,
struct perf_evlist *evlist __maybe_unused)
{
struct perf_session *session;
struct perf_data *data;
session = container_of(ff->ph, struct perf_session, header);
data = session->data;
if (WARN_ON(!perf_data__is_dir(data)))
return -1;
return do_write(ff, &data->dir.version, sizeof(data->dir.version));
}
static int cpu_cache_level__sort(const void *a, const void *b)
{
struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
......@@ -1341,6 +1356,17 @@ static void print_clockid(struct feat_fd *ff, FILE *fp)
ff->ph->env.clockid_res_ns * 1000);
}
static void print_dir_format(struct feat_fd *ff, FILE *fp)
{
struct perf_session *session;
struct perf_data *data;
session = container_of(ff->ph, struct perf_session, header);
data = session->data;
fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version);
}
static void free_event_desc(struct perf_evsel *events)
{
struct perf_evsel *evsel;
......@@ -2373,6 +2399,21 @@ static int process_clockid(struct feat_fd *ff,
return 0;
}
static int process_dir_format(struct feat_fd *ff,
void *_data __maybe_unused)
{
struct perf_session *session;
struct perf_data *data;
session = container_of(ff->ph, struct perf_session, header);
data = session->data;
if (WARN_ON(!perf_data__is_dir(data)))
return -1;
return do_read_u64(ff, &data->dir.version);
}
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
......@@ -2432,7 +2473,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPN(CACHE, cache, true),
FEAT_OPR(SAMPLE_TIME, sample_time, false),
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
FEAT_OPR(CLOCKID, clockid, false)
FEAT_OPR(CLOCKID, clockid, false),
FEAT_OPN(DIR_FORMAT, dir_format, false)
};
struct header_print_data {
......
......@@ -39,6 +39,7 @@ enum {
HEADER_SAMPLE_TIME,
HEADER_MEM_TOPOLOGY,
HEADER_CLOCKID,
HEADER_DIR_FORMAT,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
......@@ -48,6 +49,10 @@ enum perf_header_version {
PERF_HEADER_VERSION_2,
};
enum perf_dir_version {
PERF_DIR_VERSION = 1,
};
struct perf_file_section {
u64 offset;
u64 size;
......
......@@ -19,6 +19,7 @@
#include <math.h>
#include <inttypes.h>
#include <sys/param.h>
#include <linux/time64.h>
static bool hists__filter_entry_by_dso(struct hists *hists,
struct hist_entry *he);
......@@ -192,6 +193,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
hists__new_col_len(hists, HISTC_TIME, 12);
if (h->srcline) {
len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header));
......@@ -246,6 +248,14 @@ static void he_stat__add_cpumode_period(struct he_stat *he_stat,
}
}
static long hist_time(unsigned long htime)
{
unsigned long time_quantum = symbol_conf.time_quantum;
if (time_quantum)
return (htime / time_quantum) * time_quantum;
return htime;
}
static void he_stat__add_period(struct he_stat *he_stat, u64 period,
u64 weight)
{
......@@ -426,6 +436,13 @@ static int hist_entry__init(struct hist_entry *he,
goto err_rawdata;
}
if (symbol_conf.res_sample) {
he->res_samples = calloc(sizeof(struct res_sample),
symbol_conf.res_sample);
if (!he->res_samples)
goto err_srcline;
}
INIT_LIST_HEAD(&he->pairs.node);
thread__get(he->thread);
he->hroot_in = RB_ROOT_CACHED;
......@@ -436,6 +453,9 @@ static int hist_entry__init(struct hist_entry *he,
return 0;
err_srcline:
free(he->srcline);
err_rawdata:
free(he->raw_data);
......@@ -593,6 +613,32 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
return he;
}
static unsigned random_max(unsigned high)
{
unsigned thresh = -high % high;
for (;;) {
unsigned r = random();
if (r >= thresh)
return r % high;
}
}
static void hists__res_sample(struct hist_entry *he, struct perf_sample *sample)
{
struct res_sample *r;
int j;
if (he->num_res < symbol_conf.res_sample) {
j = he->num_res++;
} else {
j = random_max(symbol_conf.res_sample);
}
r = &he->res_samples[j];
r->time = sample->time;
r->cpu = sample->cpu;
r->tid = sample->tid;
}
static struct hist_entry*
__hists__add_entry(struct hists *hists,
struct addr_location *al,
......@@ -635,10 +681,13 @@ __hists__add_entry(struct hists *hists,
.raw_data = sample->raw_data,
.raw_size = sample->raw_size,
.ops = ops,
.time = hist_time(sample->time),
}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
if (!hists->has_callchains && he && he->callchain_size != 0)
hists->has_callchains = true;
if (he && symbol_conf.res_sample)
hists__res_sample(he, sample);
return he;
}
......@@ -1162,6 +1211,7 @@ void hist_entry__delete(struct hist_entry *he)
mem_info__zput(he->mem_info);
}
zfree(&he->res_samples);
zfree(&he->stat_acc);
free_srcline(he->srcline);
if (he->srcfile && he->srcfile[0])
......
......@@ -31,6 +31,7 @@ enum hist_filter {
enum hist_column {
HISTC_SYMBOL,
HISTC_TIME,
HISTC_DSO,
HISTC_THREAD,
HISTC_COMM,
......@@ -432,9 +433,18 @@ struct hist_browser_timer {
};
struct annotation_options;
struct res_sample;
enum rstype {
A_NORMAL,
A_ASM,
A_SOURCE
};
#ifdef HAVE_SLANG_SUPPORT
#include "../ui/keysyms.h"
void attr_to_script(char *buf, struct perf_event_attr *attr);
int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
struct hist_browser_timer *hbt,
struct annotation_options *annotation_opts);
......@@ -449,7 +459,13 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
struct perf_env *env,
bool warn_lost_event,
struct annotation_options *annotation_options);
int script_browse(const char *script_opt);
int script_browse(const char *script_opt, struct perf_evsel *evsel);
void run_script(char *cmd);
int res_sample_browse(struct res_sample *res_samples, int num_res,
struct perf_evsel *evsel, enum rstype rstype);
void res_sample_init(void);
#else
static inline
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
......@@ -478,11 +494,22 @@ static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
return 0;
}
static inline int script_browse(const char *script_opt __maybe_unused)
static inline int script_browse(const char *script_opt __maybe_unused,
struct perf_evsel *evsel __maybe_unused)
{
return 0;
}
static inline int res_sample_browse(struct res_sample *res_samples __maybe_unused,
int num_res __maybe_unused,
struct perf_evsel *evsel __maybe_unused,
enum rstype rstype __maybe_unused)
{
return 0;
}
static inline void res_sample_init(void) {}
#define K_LEFT -1000
#define K_RIGHT -2000
#define K_SWITCH_INPUT_DATA -3000
......
......@@ -160,8 +160,10 @@ static struct map *kernel_get_module_map(const char *module)
if (module && strchr(module, '/'))
return dso__new_map(module);
if (!module)
module = "kernel";
if (!module) {
pos = machine__kernel_map(host_machine);
return map__get(pos);
}
for (pos = maps__first(maps); pos; pos = map__next(pos)) {
/* short_name is "[module]" */
......
......@@ -152,6 +152,10 @@ struct perf_session *perf_session__new(struct perf_data *data,
}
perf_evlist__init_trace_event_sample_raw(session->evlist);
/* Open the directory data. */
if (data->is_dir && perf_data__open_dir(data))
goto out_delete;
}
} else {
session->machines.host.env = &perf_env;
......@@ -1843,10 +1847,17 @@ fetch_mmaped_event(struct perf_session *session,
#define NUM_MMAPS 128
#endif
struct reader;
typedef s64 (*reader_cb_t)(struct perf_session *session,
union perf_event *event,
u64 file_offset);
struct reader {
int fd;
u64 data_size;
u64 data_offset;
int fd;
u64 data_size;
u64 data_offset;
reader_cb_t process;
};
static int
......@@ -1917,7 +1928,7 @@ reader__process_events(struct reader *rd, struct perf_session *session,
size = event->header.size;
if (size < sizeof(struct perf_event_header) ||
(skip = perf_session__process_event(session, event, file_pos)) < 0) {
(skip = rd->process(session, event, file_pos)) < 0) {
pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
file_offset + head, event->header.size,
event->header.type);
......@@ -1943,12 +1954,20 @@ reader__process_events(struct reader *rd, struct perf_session *session,
return err;
}
static s64 process_simple(struct perf_session *session,
union perf_event *event,
u64 file_offset)
{
return perf_session__process_event(session, event, file_offset);
}
static int __perf_session__process_events(struct perf_session *session)
{
struct reader rd = {
.fd = perf_data__fd(session->data),
.data_size = session->header.data_size,
.data_offset = session->header.data_offset,
.process = process_simple,
};
struct ordered_events *oe = &session->ordered_events;
struct perf_tool *tool = session->tool;
......
......@@ -3,6 +3,7 @@
#include <inttypes.h>
#include <regex.h>
#include <linux/mman.h>
#include <linux/time64.h>
#include "sort.h"
#include "hist.h"
#include "comm.h"
......@@ -15,6 +16,7 @@
#include <traceevent/event-parse.h>
#include "mem-events.h"
#include "annotate.h"
#include "time-utils.h"
#include <linux/kernel.h>
regex_t parent_regex;
......@@ -654,6 +656,42 @@ struct sort_entry sort_socket = {
.se_width_idx = HISTC_SOCKET,
};
/* --sort time */
static int64_t
sort__time_cmp(struct hist_entry *left, struct hist_entry *right)
{
return right->time - left->time;
}
static int hist_entry__time_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
unsigned long secs;
unsigned long long nsecs;
char he_time[32];
nsecs = he->time;
secs = nsecs / NSEC_PER_SEC;
nsecs -= secs * NSEC_PER_SEC;
if (symbol_conf.nanosecs)
snprintf(he_time, sizeof he_time, "%5lu.%09llu: ",
secs, nsecs);
else
timestamp__scnprintf_usec(he->time, he_time,
sizeof(he_time));
return repsep_snprintf(bf, size, "%-.*s", width, he_time);
}
struct sort_entry sort_time = {
.se_header = "Time",
.se_cmp = sort__time_cmp,
.se_snprintf = hist_entry__time_snprintf,
.se_width_idx = HISTC_TIME,
};
/* --sort trace */
static char *get_trace_output(struct hist_entry *he)
......@@ -1634,6 +1672,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
DIM(SORT_TIME, "time", sort_time),
};
#undef DIM
......
......@@ -47,6 +47,12 @@ extern struct sort_entry sort_srcline;
extern enum sort_type sort__first_dimension;
extern const char default_mem_sort_order[];
struct res_sample {
u64 time;
int cpu;
int tid;
};
struct he_stat {
u64 period;
u64 period_sys;
......@@ -135,10 +141,13 @@ struct hist_entry {
char *srcfile;
struct symbol *parent;
struct branch_info *branch_info;
long time;
struct hists *hists;
struct mem_info *mem_info;
void *raw_data;
u32 raw_size;
int num_res;
struct res_sample *res_samples;
void *trace_output;
struct perf_hpp_list *hpp_list;
struct hist_entry *parent_he;
......@@ -231,6 +240,7 @@ enum sort_type {
SORT_DSO_SIZE,
SORT_CGROUP_ID,
SORT_SYM_IPC_NULL,
SORT_TIME,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
......
......@@ -6,6 +6,7 @@
#include <string.h>
#include <linux/kernel.h>
#include <linux/mman.h>
#include <linux/time64.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
......@@ -39,15 +40,18 @@ int vmlinux_path__nr_entries;
char **vmlinux_path;
struct symbol_conf symbol_conf = {
.nanosecs = false,
.use_modules = true,
.try_vmlinux_path = true,
.demangle = true,
.demangle_kernel = false,
.cumulate_callchain = true,
.time_quantum = 100 * NSEC_PER_MSEC, /* 100ms */
.show_hist_headers = true,
.symfs = "",
.event_group = true,
.inline_name = true,
.res_sample = 0,
};
static enum dso_binary_type binary_type_symtab[] = {
......
......@@ -8,6 +8,7 @@ struct strlist;
struct intlist;
struct symbol_conf {
bool nanosecs;
unsigned short priv_size;
bool try_vmlinux_path,
init_annotation,
......@@ -55,6 +56,7 @@ struct symbol_conf {
*sym_list_str,
*col_width_list_str,
*bt_stop_list_str;
unsigned long time_quantum;
struct strlist *dso_list,
*comm_list,
*sym_list,
......@@ -66,6 +68,7 @@ struct symbol_conf {
struct intlist *pid_list,
*tid_list;
const char *symfs;
int res_sample;
};
extern struct symbol_conf symbol_conf;
......
......@@ -453,6 +453,14 @@ int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec);
}
int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz)
{
u64 sec = timestamp / NSEC_PER_SEC,
nsec = timestamp % NSEC_PER_SEC;
return scnprintf(buf, sz, "%" PRIu64 ".%09" PRIu64, sec, nsec);
}
int fetch_current_timestamp(char *buf, size_t sz)
{
struct timeval tv;
......
......@@ -30,6 +30,7 @@ int perf_time__parse_for_ranges(const char *str, struct perf_session *session,
int *range_size, int *range_num);
int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);
int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz);
int fetch_current_timestamp(char *buf, size_t sz);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment