Commit d223575e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-tools-for-v5.19-2022-05-23' of...

Merge tag 'perf-tools-for-v5.19-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tool updates from Arnaldo Carvalho de Melo:
 "Intel PT:

   - Allow hardware tracing on KVM test programs. In this case, the VM
     is not running an OS, but only the functions loaded into it by the
     hypervisor test program, and conveniently, loaded at the same
     virtual addresses.

   - Improve documentation:
      - Add link to perf wiki's page

   - Cleanups:
      - Delete now unused perf-with-kcore.sh script
      - Remove unused machines__find_host()

  ARM SPE (Statistical Profile Extensions):

   - Add man page entry.

  Vendor Events:

   - Update various Intel event topics

   - Update various microarch events

   - Fix various cstate metrics

   - Fix Alderlake metric groups

   - Add sapphirerapids events

   - Add JSON files for ARM Cortex A34, A35, A55, A510, A65, A73, A75,
     A77, A78, A710, X1, X2 and Neoverse E1

   - Update Cortex A57/A72

  perf stat:

   - Introduce stats for the user and system rusage times

  perf c2c:

   - Prep work to support ARM systems

  perf annotate:

   - Add --percent-limit option

  perf lock:

   - Add -t/--thread option for report

   - Do not discard broken lock stats

  perf bench:

   - Add breakpoint benchmarks

  perf test:

   - Limit to only run executable scripts in tests

   - Add basic perf record tests

   - Add stat record+report test

   - Add basic stat and topdown group test

   - Skip several tests when the user hasn't permission to perform them

   - Fix test case 81 ("perf record tests") on s390x

  perf version:

   - debuginfod support improvements

  perf scripting python:

   - Expose symbol offset and source information

  perf build:

   - Error for BPF skeletons without LIBBPF

   - Use Python devtools for version autodetection rather than runtime

  Miscellaneous:

   - Add riscv64 support to 'perf jitdump'

   - Various fixes/tidy ups related to cpu_map

   - Fixes for handling Intel hybrid systems"

* tag 'perf-tools-for-v5.19-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (122 commits)
  perf intel-pt: Add guest_code support
  perf kvm report: Add guest_code support
  perf script: Add guest_code support
  perf tools: Add guest_code support
  perf tools: Factor out thread__set_guest_comm()
  perf tools: Add machine to machines back pointer
  perf vendors events arm64: Update Cortex A57/A72
  perf vendors events arm64: Arm Neoverse E1
  perf vendors events arm64: Arm Cortex-X2
  perf vendors events arm64: Arm Cortex-X1
  perf vendors events arm64: Arm Cortex-A710
  perf vendors events arm64: Arm Cortex-A78
  perf vendors events arm64: Arm Cortex-A77
  perf vendors events arm64: Arm Cortex-A75
  perf vendors events arm64: Arm Cortex-A73
  perf vendors events arm64: Arm Cortex-A65
  perf vendors events arm64: Arm Cortex-A510
  perf vendors events arm64: Arm Cortex-A55
  perf vendors events arm64: Arm Cortex-A35
  perf vendors events arm64: Arm Cortex-A34
  ...
parents e908305f 5d2b6bc3
...@@ -59,6 +59,10 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) ...@@ -59,6 +59,10 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
{ {
struct perf_evsel *evsel; struct perf_evsel *evsel;
/* Recomputing all_cpus, so start with a blank slate. */
perf_cpu_map__put(evlist->all_cpus);
evlist->all_cpus = NULL;
perf_evlist__for_each_evsel(evlist, evsel) perf_evlist__for_each_evsel(evlist, evsel)
__perf_evlist__propagate_maps(evlist, evsel); __perf_evlist__propagate_maps(evlist, evsel);
} }
...@@ -474,6 +478,9 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, ...@@ -474,6 +478,9 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
*/ */
refcount_set(&map->refcnt, 2); refcount_set(&map->refcnt, 2);
if (ops->idx)
ops->idx(evlist, evsel, mp, idx);
if (ops->mmap(map, mp, *output, evlist_cpu) < 0) if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
return -1; return -1;
...@@ -516,9 +523,6 @@ mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, ...@@ -516,9 +523,6 @@ mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int output = -1; int output = -1;
int output_overwrite = -1; int output_overwrite = -1;
if (ops->idx)
ops->idx(evlist, mp, thread, false);
if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
&output, &output_overwrite)) &output, &output_overwrite))
goto out_unmap; goto out_unmap;
...@@ -543,9 +547,6 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, ...@@ -543,9 +547,6 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int output = -1; int output = -1;
int output_overwrite = -1; int output_overwrite = -1;
if (ops->idx)
ops->idx(evlist, mp, cpu, true);
for (thread = 0; thread < nr_threads; thread++) { for (thread = 0; thread < nr_threads; thread++) {
if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
thread, &output, &output_overwrite)) thread, &output, &output_overwrite))
......
...@@ -328,6 +328,17 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, ...@@ -328,6 +328,17 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
return 0; return 0;
} }
static int perf_evsel__ioctl(struct perf_evsel *evsel, int ioc, void *arg,
int cpu_map_idx, int thread)
{
int *fd = FD(evsel, cpu_map_idx, thread);
if (fd == NULL || *fd < 0)
return -1;
return ioctl(*fd, ioc, arg);
}
static int perf_evsel__run_ioctl(struct perf_evsel *evsel, static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
int ioc, void *arg, int ioc, void *arg,
int cpu_map_idx) int cpu_map_idx)
...@@ -335,13 +346,7 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, ...@@ -335,13 +346,7 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
int thread; int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
int err; int err = perf_evsel__ioctl(evsel, ioc, arg, cpu_map_idx, thread);
int *fd = FD(evsel, cpu_map_idx, thread);
if (fd == NULL || *fd < 0)
return -1;
err = ioctl(*fd, ioc, arg);
if (err) if (err)
return err; return err;
...@@ -355,6 +360,21 @@ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) ...@@ -355,6 +360,21 @@ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx); return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx);
} }
int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread)
{
struct perf_cpu cpu __maybe_unused;
int idx;
int err;
perf_cpu_map__for_each_cpu(cpu, idx, evsel->cpus) {
err = perf_evsel__ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, idx, thread);
if (err)
return err;
}
return 0;
}
int perf_evsel__enable(struct perf_evsel *evsel) int perf_evsel__enable(struct perf_evsel *evsel)
{ {
int i; int i;
......
...@@ -38,7 +38,8 @@ struct perf_evlist { ...@@ -38,7 +38,8 @@ struct perf_evlist {
}; };
typedef void typedef void
(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); (*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_evsel*,
struct perf_mmap_param*, int);
typedef struct perf_mmap* typedef struct perf_mmap*
(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
typedef int typedef int
......
...@@ -9,4 +9,6 @@ extern unsigned int page_size; ...@@ -9,4 +9,6 @@ extern unsigned int page_size;
ssize_t readn(int fd, void *buf, size_t n); ssize_t readn(int fd, void *buf, size_t n);
ssize_t writen(int fd, const void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n);
ssize_t preadn(int fd, void *buf, size_t n, off_t offs);
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */ #endif /* __LIBPERF_INTERNAL_CPUMAP_H */
...@@ -31,4 +31,7 @@ LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_c ...@@ -31,4 +31,7 @@ LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_c
(idx) < perf_cpu_map__nr(cpus); \ (idx) < perf_cpu_map__nr(cpus); \
(idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))
#define perf_cpu_map__for_each_idx(idx, cpus) \
for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++)
#endif /* __LIBPERF_CPUMAP_H */ #endif /* __LIBPERF_CPUMAP_H */
...@@ -36,6 +36,7 @@ LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int ...@@ -36,6 +36,7 @@ LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int
struct perf_counts_values *count); struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
......
...@@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n) ...@@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n)
return ion(true, fd, buf, n); return ion(true, fd, buf, n);
} }
ssize_t preadn(int fd, void *buf, size_t n, off_t offs)
{
size_t left = n;
while (left) {
ssize_t ret = pread(fd, buf, left, offs);
if (ret < 0 && errno == EINTR)
continue;
if (ret <= 0)
return ret;
left -= ret;
buf += ret;
offs += ret;
}
return n;
}
/* /*
* Write exactly 'n' bytes or return an error. * Write exactly 'n' bytes or return an error.
*/ */
......
...@@ -19,7 +19,6 @@ perf.data ...@@ -19,7 +19,6 @@ perf.data
perf.data.old perf.data.old
output.svg output.svg
perf-archive perf-archive
perf-with-kcore
perf-iostat perf-iostat
tags tags
TAGS TAGS
......
...@@ -147,6 +147,11 @@ include::itrace.txt[] ...@@ -147,6 +147,11 @@ include::itrace.txt[]
The period/hits keywords set the base the percentage is computed The period/hits keywords set the base the percentage is computed
on - the samples period or the number of samples (hits). on - the samples period or the number of samples (hits).
--percent-limit::
Do not show functions which have an overhead under that percent on
stdio or stdio2 (Default: 0). Note that this is about selection of
functions to display, not about lines within the function.
SEE ALSO SEE ALSO
-------- --------
linkperf:perf-record[1], linkperf:perf-report[1] linkperf:perf-record[1], linkperf:perf-report[1]
perf-arm-spe(1)
================
NAME
----
perf-arm-spe - Support for Arm Statistical Profiling Extension within Perf tools
SYNOPSIS
--------
[verse]
'perf record' -e arm_spe//
DESCRIPTION
-----------
The SPE (Statistical Profiling Extension) feature provides accurate attribution of latencies and
events down to individual instructions. Rather than being interrupt-driven, it picks an
instruction to sample and then captures data for it during execution. Data includes execution time
in cycles. For loads and stores it also includes data address, cache miss events, and data origin.
The sampling has 5 stages:
1. Choose an operation
2. Collect data about the operation
3. Optionally discard the record based on a filter
4. Write the record to memory
5. Interrupt when the buffer is full
Choose an operation
~~~~~~~~~~~~~~~~~~~
This is chosen from a sample population, for SPE this is an IMPLEMENTATION DEFINED choice of all
architectural instructions or all micro-ops. Sampling happens at a programmable interval. The
architecture provides a mechanism for the SPE driver to infer the minimum interval at which it should
sample. This minimum interval is used by the driver if no interval is specified. A pseudo-random
perturbation is also added to the sampling interval by default.
Collect data about the operation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Program counter, PMU events, timings and data addresses related to the operation are recorded.
Sampling ensures there is only one sampled operation is in flight.
Optionally discard the record based on a filter
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Based on programmable criteria, choose whether to keep the record or discard it. If the record is
discarded then the flow stops here for this sample.
Write the record to memory
~~~~~~~~~~~~~~~~~~~~~~~~~~
The record is appended to a memory buffer
Interrupt when the buffer is full
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When the buffer fills, an interrupt is sent and the driver signals Perf to collect the records.
Perf saves the raw data in the perf.data file.
Opening the file
----------------
Up until this point no decoding of the SPE data was done by either the kernel or Perf. Only when the
recorded file is opened with 'perf report' or 'perf script' does the decoding happen. When decoding
the data, Perf generates "synthetic samples" as if these were generated at the time of the
recording. These samples are the same as if normal sampling was done by Perf without using SPE,
although they may have more attributes associated with them. For example a normal sample may have
just the instruction pointer, but an SPE sample can have data addresses and latency attributes.
Why Sampling?
-------------
- Sampling, rather than tracing, cuts down the profiling problem to something more manageable for
hardware. Only one sampled operation is in flight at a time.
- Allows precise attribution data, including: Full PC of instruction, data virtual and physical
addresses.
- Allows correlation between an instruction and events, such as TLB and cache miss. (Data source
indicates which particular cache was hit, but the meaning is implementation defined because
different implementations can have different cache configurations.)
However, SPE does not provide any call-graph information, and relies on statistical methods.
Collisions
----------
When an operation is sampled while a previous sampled operation has not finished, a collision
occurs. The new sample is dropped. Collisions affect the integrity of the data, so the sample rate
should be set to avoid collisions.
The 'sample_collision' PMU event can be used to determine the number of lost samples. Although this
count is based on collisions _before_ filtering occurs. Therefore this can not be used as an exact
number for samples dropped that would have made it through the filter, but can be a rough
guide.
The effect of microarchitectural sampling
-----------------------------------------
If an implementation samples micro-operations instead of instructions, the results of sampling must
be weighted accordingly.
For example, if a given instruction A is always converted into two micro-operations, A0 and A1, it
becomes twice as likely to appear in the sample population.
The coarse effect of conversions, and, if applicable, sampling of speculative operations, can be
estimated from the 'sample_pop' and 'inst_retired' PMU events.
Kernel Requirements
-------------------
The ARM_SPE_PMU config must be set to build as either a module or statically.
Depending on CPU model, the kernel may need to be booted with page table isolation disabled
(kpti=off). If KPTI needs to be disabled, this will fail with a console message "profiling buffer
inaccessible. Try passing 'kpti=off' on the kernel command line".
Capturing SPE with perf command-line tools
------------------------------------------
You can record a session with SPE samples:
perf record -e arm_spe// -- ./mybench
The sample period is set from the -c option, and because the minimum interval is used by default
it's recommended to set this to a higher value. The value is written to PMSIRR.INTERVAL.
Config parameters
~~~~~~~~~~~~~~~~~
These are placed between the // in the event and comma separated. For example '-e
arm_spe/load_filter=1,min_latency=10/'
branch_filter=1 - collect branches only (PMSFCR.B)
event_filter=<mask> - filter on specific events (PMSEVFR) - see bitfield description below
jitter=1 - use jitter to avoid resonance when sampling (PMSIRR.RND)
load_filter=1 - collect loads only (PMSFCR.LD)
min_latency=<n> - collect only samples with this latency or higher* (PMSLATFR)
pa_enable=1 - collect physical address (as well as VA) of loads/stores (PMSCR.PA) - requires privilege
pct_enable=1 - collect physical timestamp instead of virtual timestamp (PMSCR.PCT) - requires privilege
store_filter=1 - collect stores only (PMSFCR.ST)
ts_enable=1 - enable timestamping with value of generic timer (PMSCR.TS)
+++*+++ Latency is the total latency from the point at which sampling started on that instruction, rather
than only the execution latency.
Only some events can be filtered on; these include:
bit 1 - instruction retired (i.e. omit speculative instructions)
bit 3 - L1D refill
bit 5 - TLB refill
bit 7 - mispredict
bit 11 - misaligned access
So to sample just retired instructions:
perf record -e arm_spe/event_filter=2/ -- ./mybench
or just mispredicted branches:
perf record -e arm_spe/event_filter=0x80/ -- ./mybench
Viewing the data
~~~~~~~~~~~~~~~~~
By default perf report and perf script will assign samples to separate groups depending on the
attributes/events of the SPE record. Because instructions can have multiple events associated with
them, the samples in these groups are not necessarily unique. For example perf report shows these
groups:
Available samples
0 arm_spe//
0 dummy:u
21 l1d-miss
897 l1d-access
5 llc-miss
7 llc-access
2 tlb-miss
1K tlb-access
36 branch-miss
0 remote-access
900 memory
The arm_spe// and dummy:u events are implementation details and are expected to be empty.
To get a full list of unique samples that are not sorted into groups, set the itrace option to
generate 'instruction' samples. The period option is also taken into account, so set it to 1
instruction unless you want to further downsample the already sampled SPE data:
perf report --itrace=i1i
Memory access details are also stored on the samples and this can be viewed with:
perf report --mem-mode
Common errors
~~~~~~~~~~~~~
- "Cannot find PMU `arm_spe'. Missing kernel support?"
Module not built or loaded, KPTI not disabled (see above), or running on a VM
- "Arm SPE CONTEXT packets not found in the traces."
Root privilege is required to collect context packets. But these only increase the accuracy of
assigning PIDs to kernel samples. For userspace sampling this can be ignored.
- Excessively large perf.data file size
Increase sampling interval (see above)
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script[1], linkperf:perf-report[1],
linkperf:perf-inject[1]
...@@ -189,9 +189,10 @@ For each cacheline in the 1) list we display following data: ...@@ -189,9 +189,10 @@ For each cacheline in the 1) list we display following data:
Total stores Total stores
- sum of all store accesses - sum of all store accesses
Store Reference - L1Hit, L1Miss Store Reference - L1Hit, L1Miss, N/A
L1Hit - store accesses that hit L1 L1Hit - store accesses that hit L1
L1Miss - store accesses that missed L1 L1Miss - store accesses that missed L1
N/A - store accesses with memory level is not available
Core Load Hit - FB, L1, L2 Core Load Hit - FB, L1, L2
- count of load hits in FB (Fill Buffer), L1 and L2 cache - count of load hits in FB (Fill Buffer), L1 and L2 cache
...@@ -210,8 +211,9 @@ For each offset in the 2) list we display following data: ...@@ -210,8 +211,9 @@ For each offset in the 2) list we display following data:
HITM - Rmt, Lcl HITM - Rmt, Lcl
- % of Remote/Local HITM accesses for given offset within cacheline - % of Remote/Local HITM accesses for given offset within cacheline
Store Refs - L1 Hit, L1 Miss Store Refs - L1 Hit, L1 Miss, N/A
- % of store accesses that hit/missed L1 for given offset within cacheline - % of store accesses that hit L1, missed L1 and N/A (no available) memory
level for given offset within cacheline
Data address - Offset Data address - Offset
- offset address - offset address
......
This diff is collapsed.
...@@ -94,6 +94,9 @@ OPTIONS ...@@ -94,6 +94,9 @@ OPTIONS
kernel module information. Users copy it out from guest os. kernel module information. Users copy it out from guest os.
--guestvmlinux=<path>:: --guestvmlinux=<path>::
Guest os kernel vmlinux. Guest os kernel vmlinux.
--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.
-v:: -v::
--verbose:: --verbose::
Be more verbose (show counter open errors, etc). Be more verbose (show counter open errors, etc).
......
...@@ -64,6 +64,27 @@ REPORT OPTIONS ...@@ -64,6 +64,27 @@ REPORT OPTIONS
--combine-locks:: --combine-locks::
Merge lock instances in the same class (based on name). Merge lock instances in the same class (based on name).
-t::
--threads::
The -t option is to show per-thread lock stat like below:
$ perf lock report -t -F acquired,contended,avg_wait
Name acquired contended avg wait (ns)
perf 240569 9 5784
swapper 106610 19 543
:15789 17370 2 14538
ContainerMgr 8981 6 874
sleep 5275 1 11281
ContainerThread 4416 4 944
RootPressureThr 3215 5 1215
rcu_preempt 2954 0 0
ContainerMgr 2560 0 0
unnamed 1873 0 0
EventManager_De 1845 1 636
futex-default-S 1609 0 0
INFO OPTIONS INFO OPTIONS
------------ ------------
......
...@@ -499,6 +499,10 @@ include::itrace.txt[] ...@@ -499,6 +499,10 @@ include::itrace.txt[]
The known limitations include exception handing such as The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match. setjmp/longjmp will have calls/returns not match.
--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.
SEE ALSO SEE ALSO
-------- --------
linkperf:perf-record[1], linkperf:perf-script-perl[1], linkperf:perf-record[1], linkperf:perf-script-perl[1],
......
...@@ -454,6 +454,16 @@ Multiple events are created from a single event specification when: ...@@ -454,6 +454,16 @@ Multiple events are created from a single event specification when:
2. Aliases, which are listed immediately after the Kernel PMU events 2. Aliases, which are listed immediately after the Kernel PMU events
by perf list, are used. by perf list, are used.
--hybrid-merge::
Merge the hybrid event counts from all PMUs.
For hybrid events, by default, the stat aggregates and reports the event
counts per PMU. But sometimes, it's also useful to aggregate event counts
from all PMUs. This option enables that behavior and reports the counts
without PMUs.
For non-hybrid events, it should be no effect.
--smi-cost:: --smi-cost::
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported. Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
......
...@@ -77,7 +77,7 @@ linkperf:perf-stat[1], linkperf:perf-top[1], ...@@ -77,7 +77,7 @@ linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-record[1], linkperf:perf-report[1],
linkperf:perf-list[1] linkperf:perf-list[1]
linkperf:perf-annotate[1],linkperf:perf-archive[1], linkperf:perf-annotate[1],linkperf:perf-archive[1],linkperf:perf-arm-spe[1],
linkperf:perf-bench[1], linkperf:perf-buildid-cache[1], linkperf:perf-bench[1], linkperf:perf-buildid-cache[1],
linkperf:perf-buildid-list[1], linkperf:perf-c2c[1], linkperf:perf-buildid-list[1], linkperf:perf-c2c[1],
linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1], linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1],
......
...@@ -239,18 +239,33 @@ ifdef PARSER_DEBUG ...@@ -239,18 +239,33 @@ ifdef PARSER_DEBUG
endif endif
# Try different combinations to accommodate systems that only have # Try different combinations to accommodate systems that only have
# python[2][-config] in weird combinations but always preferring # python[2][3]-config in weird combinations in the following order of
# python2 and python2-config as per pep-0394. If python2 or python # priority from lowest to highest:
# aren't found, then python3 is used. # * python3-config
PYTHON_AUTO := python # * python-config
PYTHON_AUTO := $(if $(call get-executable,python3),python3,$(PYTHON_AUTO)) # * python2-config as per pep-0394.
PYTHON_AUTO := $(if $(call get-executable,python),python,$(PYTHON_AUTO)) # * $(PYTHON)-config (If PYTHON is user supplied but PYTHON_CONFIG isn't)
PYTHON_AUTO := $(if $(call get-executable,python2),python2,$(PYTHON_AUTO)) #
override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON_AUTO)) PYTHON_AUTO := python-config
PYTHON_AUTO_CONFIG := \ PYTHON_AUTO := $(if $(call get-executable,python3-config),python3-config,$(PYTHON_AUTO))
$(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) PYTHON_AUTO := $(if $(call get-executable,python-config),python-config,$(PYTHON_AUTO))
override PYTHON_CONFIG := \ PYTHON_AUTO := $(if $(call get-executable,python2-config),python2-config,$(PYTHON_AUTO))
$(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO_CONFIG))
# If PYTHON is defined but PYTHON_CONFIG isn't, then take $(PYTHON)-config as if it was the user
# supplied value for PYTHON_CONFIG. Because it's "user supplied", error out if it doesn't exist.
ifdef PYTHON
ifndef PYTHON_CONFIG
PYTHON_CONFIG_AUTO := $(call get-executable,$(PYTHON)-config)
PYTHON_CONFIG := $(if $(PYTHON_CONFIG_AUTO),$(PYTHON_CONFIG_AUTO),\
$(call $(error $(PYTHON)-config not found)))
endif
endif
# Select either auto detected python and python-config or use user supplied values if they are
# defined. get-executable-or-default fails with an error if the first argument is supplied but
# doesn't exist.
override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO))
override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_AUTO)))
grep-libs = $(filter -l%,$(1)) grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1)) strip-libs = $(filter-out -l%,$(1))
...@@ -656,6 +671,9 @@ ifdef BUILD_BPF_SKEL ...@@ -656,6 +671,9 @@ ifdef BUILD_BPF_SKEL
ifeq ($(feature-clang-bpf-co-re), 0) ifeq ($(feature-clang-bpf-co-re), 0)
dummy := $(error Error: clang too old/not installed. Please install recent clang to build with BUILD_BPF_SKEL) dummy := $(error Error: clang too old/not installed. Please install recent clang to build with BUILD_BPF_SKEL)
endif endif
ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),)
dummy := $(error Error: BPF skeleton support requires libbpf)
endif
$(call detected,CONFIG_PERF_BPF_SKEL) $(call detected,CONFIG_PERF_BPF_SKEL)
CFLAGS += -DHAVE_BPF_SKEL CFLAGS += -DHAVE_BPF_SKEL
endif endif
......
...@@ -286,7 +286,6 @@ PYRF_OBJS = ...@@ -286,7 +286,6 @@ PYRF_OBJS =
SCRIPT_SH = SCRIPT_SH =
SCRIPT_SH += perf-archive.sh SCRIPT_SH += perf-archive.sh
SCRIPT_SH += perf-with-kcore.sh
SCRIPT_SH += perf-iostat.sh SCRIPT_SH += perf-iostat.sh
grep-libs = $(filter -l%,$(1)) grep-libs = $(filter -l%,$(1))
...@@ -973,8 +972,6 @@ ifndef NO_LIBBPF ...@@ -973,8 +972,6 @@ ifndef NO_LIBBPF
endif endif
$(call QUIET_INSTALL, perf-archive) \ $(call QUIET_INSTALL, perf-archive) \
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(call QUIET_INSTALL, perf-with-kcore) \
$(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(call QUIET_INSTALL, perf-iostat) \ $(call QUIET_INSTALL, perf-iostat) \
$(INSTALL) $(OUTPUT)perf-iostat -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) $(OUTPUT)perf-iostat -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
ifndef NO_LIBAUDIT ifndef NO_LIBAUDIT
...@@ -1088,7 +1085,7 @@ bpf-skel-clean: ...@@ -1088,7 +1085,7 @@ bpf-skel-clean:
$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS)
$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected $(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
......
...@@ -319,6 +319,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, ...@@ -319,6 +319,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
} }
evsel->core.attr.freq = 0; evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1; evsel->core.attr.sample_period = 1;
evsel->needs_auxtrace_mmap = true;
cs_etm_evsel = evsel; cs_etm_evsel = evsel;
opts->full_auxtrace = true; opts->full_auxtrace = true;
} }
......
...@@ -160,6 +160,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, ...@@ -160,6 +160,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
} }
evsel->core.attr.freq = 0; evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = arm_spe_pmu->default_config->sample_period; evsel->core.attr.sample_period = arm_spe_pmu->default_config->sample_period;
evsel->needs_auxtrace_mmap = true;
arm_spe_evsel = evsel; arm_spe_evsel = evsel;
opts->full_auxtrace = true; opts->full_auxtrace = true;
} }
......
...@@ -2,3 +2,4 @@ ifndef NO_DWARF ...@@ -2,3 +2,4 @@ ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1 PERF_HAVE_DWARF_REGS := 1
endif endif
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
PERF_HAVE_JITDUMP := 1
...@@ -98,6 +98,7 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist, ...@@ -98,6 +98,7 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist,
evlist__for_each_entry(evlist, pos) { evlist__for_each_entry(evlist, pos) {
if (pos->core.attr.config == PERF_EVENT_CPUM_SF_DIAG) { if (pos->core.attr.config == PERF_EVENT_CPUM_SF_DIAG) {
diagnose = 1; diagnose = 1;
pos->needs_auxtrace_mmap = true;
break; break;
} }
} }
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include "util/pmu.h" #include "util/pmu.h"
#include "util/evlist.h" #include "util/evlist.h"
#include "util/parse-events.h" #include "util/parse-events.h"
#include "topdown.h"
#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}" #define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}" #define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
...@@ -25,18 +26,18 @@ struct evsel *arch_evlist__leader(struct list_head *list) ...@@ -25,18 +26,18 @@ struct evsel *arch_evlist__leader(struct list_head *list)
first = list_first_entry(list, struct evsel, core.node); first = list_first_entry(list, struct evsel, core.node);
if (!pmu_have_event("cpu", "slots")) if (!topdown_sys_has_perf_metrics())
return first; return first;
/* If there is a slots event and a topdown event then the slots event comes first. */ /* If there is a slots event and a topdown event then the slots event comes first. */
__evlist__for_each_entry(list, evsel) { __evlist__for_each_entry(list, evsel) {
if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && evsel->name) { if (evsel->pmu_name && !strncmp(evsel->pmu_name, "cpu", 3) && evsel->name) {
if (strcasestr(evsel->name, "slots")) { if (strcasestr(evsel->name, "slots")) {
slots = evsel; slots = evsel;
if (slots == first) if (slots == first)
return first; return first;
} }
if (!strncasecmp(evsel->name, "topdown", 7)) if (strcasestr(evsel->name, "topdown"))
has_topdown = true; has_topdown = true;
if (slots && has_topdown) if (slots && has_topdown)
return slots; return slots;
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <stdlib.h> #include <stdlib.h>
#include "util/evsel.h" #include "util/evsel.h"
#include "util/env.h" #include "util/env.h"
#include "util/pmu.h"
#include "linux/string.h" #include "linux/string.h"
void arch_evsel__set_sample_weight(struct evsel *evsel) void arch_evsel__set_sample_weight(struct evsel *evsel)
...@@ -29,3 +30,33 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr) ...@@ -29,3 +30,33 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr)
free(env.cpuid); free(env.cpuid);
} }
/* Check whether the evsel's PMU supports the perf metrics */
static bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
{
const char *pmu_name = evsel->pmu_name ? evsel->pmu_name : "cpu";
/*
* The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU
* on a non-hybrid machine, "cpu_core" PMU on a hybrid machine.
* The slots event is only available for the core PMU, which
* supports the perf metrics feature.
* Checking both the PERF_TYPE_RAW type and the slots event
* should be good enough to detect the perf metrics feature.
*/
if ((evsel->core.attr.type == PERF_TYPE_RAW) &&
pmu_have_event(pmu_name, "slots"))
return true;
return false;
}
bool arch_evsel__must_be_in_group(const struct evsel *evsel)
{
if (!evsel__sys_has_perf_metrics(evsel))
return false;
return evsel->name &&
(!strcasecmp(evsel->name, "slots") ||
strcasestr(evsel->name, "topdown"));
}
...@@ -129,6 +129,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, ...@@ -129,6 +129,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
} }
evsel->core.attr.freq = 0; evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1; evsel->core.attr.sample_period = 1;
evsel->needs_auxtrace_mmap = true;
intel_bts_evsel = evsel; intel_bts_evsel = evsel;
opts->full_auxtrace = true; opts->full_auxtrace = true;
} }
......
...@@ -649,6 +649,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, ...@@ -649,6 +649,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
evsel->core.attr.freq = 0; evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1; evsel->core.attr.sample_period = 1;
evsel->no_aux_samples = true; evsel->no_aux_samples = true;
evsel->needs_auxtrace_mmap = true;
intel_pt_evsel = evsel; intel_pt_evsel = evsel;
opts->full_auxtrace = true; opts->full_auxtrace = true;
} }
......
...@@ -3,6 +3,31 @@ ...@@ -3,6 +3,31 @@
#include "api/fs/fs.h" #include "api/fs/fs.h"
#include "util/pmu.h" #include "util/pmu.h"
#include "util/topdown.h" #include "util/topdown.h"
#include "topdown.h"
/* Check whether there is a PMU which supports the perf metrics. */
bool topdown_sys_has_perf_metrics(void)
{
static bool has_perf_metrics;
static bool cached;
struct perf_pmu *pmu;
if (cached)
return has_perf_metrics;
/*
* The perf metrics feature is a core PMU feature.
* The PERF_TYPE_RAW type is the type of a core PMU.
* The slots event is only available when the core PMU
* supports the perf metrics feature.
*/
pmu = perf_pmu__find_by_type(PERF_TYPE_RAW);
if (pmu && pmu_have_event(pmu->name, "slots"))
has_perf_metrics = true;
cached = true;
return has_perf_metrics;
}
/* /*
* Check whether we can use a group for top down. * Check whether we can use a group for top down.
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _TOPDOWN_H
#define _TOPDOWN_H 1
bool topdown_sys_has_perf_metrics(void);
#endif
...@@ -14,6 +14,7 @@ perf-y += kallsyms-parse.o ...@@ -14,6 +14,7 @@ perf-y += kallsyms-parse.o
perf-y += find-bit-bench.o perf-y += find-bit-bench.o
perf-y += inject-buildid.o perf-y += inject-buildid.o
perf-y += evlist-open-close.o perf-y += evlist-open-close.o
perf-y += breakpoint.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
......
...@@ -49,6 +49,8 @@ int bench_synthesize(int argc, const char **argv); ...@@ -49,6 +49,8 @@ int bench_synthesize(int argc, const char **argv);
int bench_kallsyms_parse(int argc, const char **argv); int bench_kallsyms_parse(int argc, const char **argv);
int bench_inject_build_id(int argc, const char **argv); int bench_inject_build_id(int argc, const char **argv);
int bench_evlist_open_close(int argc, const char **argv); int bench_evlist_open_close(int argc, const char **argv);
int bench_breakpoint_thread(int argc, const char **argv);
int bench_breakpoint_enable(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0 #define BENCH_FORMAT_DEFAULT 0
......
// SPDX-License-Identifier: GPL-2.0
#include <subcmd/parse-options.h>
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
#include <linux/time64.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/time.h>
#include <pthread.h>
#include <stddef.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include "bench.h"
#include "futex.h"
struct {
unsigned int nbreakpoints;
unsigned int nparallel;
unsigned int nthreads;
} thread_params = {
.nbreakpoints = 1,
.nparallel = 1,
.nthreads = 1,
};
static const struct option thread_options[] = {
OPT_UINTEGER('b', "breakpoints", &thread_params.nbreakpoints,
"Specify amount of breakpoints"),
OPT_UINTEGER('p', "parallelism", &thread_params.nparallel, "Specify amount of parallelism"),
OPT_UINTEGER('t', "threads", &thread_params.nthreads, "Specify amount of threads"),
OPT_END()
};
static const char * const thread_usage[] = {
"perf bench breakpoint thread <options>",
NULL
};
struct breakpoint {
int fd;
char watched;
};
static int breakpoint_setup(void *addr)
{
struct perf_event_attr attr = { .size = 0, };
attr.type = PERF_TYPE_BREAKPOINT;
attr.size = sizeof(attr);
attr.inherit = 1;
attr.exclude_kernel = 1;
attr.exclude_hv = 1;
attr.bp_addr = (unsigned long)addr;
attr.bp_type = HW_BREAKPOINT_RW;
attr.bp_len = HW_BREAKPOINT_LEN_1;
return syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
}
static void *passive_thread(void *arg)
{
unsigned int *done = (unsigned int *)arg;
while (!__atomic_load_n(done, __ATOMIC_RELAXED))
futex_wait(done, 0, NULL, 0);
return NULL;
}
static void *active_thread(void *arg)
{
unsigned int *done = (unsigned int *)arg;
while (!__atomic_load_n(done, __ATOMIC_RELAXED));
return NULL;
}
static void *breakpoint_thread(void *arg)
{
unsigned int i, done;
int *repeat = (int *)arg;
pthread_t *threads;
threads = calloc(thread_params.nthreads, sizeof(threads[0]));
if (!threads)
exit((perror("calloc"), EXIT_FAILURE));
while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) {
done = 0;
for (i = 0; i < thread_params.nthreads; i++) {
if (pthread_create(&threads[i], NULL, passive_thread, &done))
exit((perror("pthread_create"), EXIT_FAILURE));
}
__atomic_store_n(&done, 1, __ATOMIC_RELAXED);
futex_wake(&done, thread_params.nthreads, 0);
for (i = 0; i < thread_params.nthreads; i++)
pthread_join(threads[i], NULL);
}
free(threads);
return NULL;
}
// The benchmark creates nbreakpoints inheritable breakpoints,
// then starts nparallel threads which create and join bench_repeat batches of nthreads threads.
int bench_breakpoint_thread(int argc, const char **argv)
{
unsigned int i, result_usec;
int repeat = bench_repeat;
struct breakpoint *breakpoints;
pthread_t *parallel;
struct timeval start, stop, diff;
if (parse_options(argc, argv, thread_options, thread_usage, 0)) {
usage_with_options(thread_usage, thread_options);
exit(EXIT_FAILURE);
}
breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0]));
parallel = calloc(thread_params.nparallel, sizeof(parallel[0]));
if (!breakpoints || !parallel)
exit((perror("calloc"), EXIT_FAILURE));
for (i = 0; i < thread_params.nbreakpoints; i++) {
breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched);
if (breakpoints[i].fd == -1)
exit((perror("perf_event_open"), EXIT_FAILURE));
}
gettimeofday(&start, NULL);
for (i = 0; i < thread_params.nparallel; i++) {
if (pthread_create(&parallel[i], NULL, breakpoint_thread, &repeat))
exit((perror("pthread_create"), EXIT_FAILURE));
}
for (i = 0; i < thread_params.nparallel; i++)
pthread_join(parallel[i], NULL);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &diff);
for (i = 0; i < thread_params.nbreakpoints; i++)
close(breakpoints[i].fd);
free(parallel);
free(breakpoints);
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n",
bench_repeat, thread_params.nbreakpoints, thread_params.nparallel);
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
(long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
printf(" %14lf usecs/op\n",
(double)result_usec / bench_repeat / thread_params.nthreads);
printf(" %14lf usecs/op/cpu\n",
(double)result_usec / bench_repeat /
thread_params.nthreads * thread_params.nparallel);
break;
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
break;
default:
fprintf(stderr, "Unknown format: %d\n", bench_format);
exit(EXIT_FAILURE);
}
return 0;
}
struct {
unsigned int npassive;
unsigned int nactive;
} enable_params = {
.nactive = 0,
.npassive = 0,
};
static const struct option enable_options[] = {
OPT_UINTEGER('p', "passive", &enable_params.npassive, "Specify amount of passive threads"),
OPT_UINTEGER('a', "active", &enable_params.nactive, "Specify amount of active threads"),
OPT_END()
};
static const char * const enable_usage[] = {
"perf bench breakpoint enable <options>",
NULL
};
// The benchmark creates an inheritable breakpoint,
// then starts npassive threads that block and nactive threads that actively spin
// and then disables and enables the breakpoint bench_repeat times.
int bench_breakpoint_enable(int argc, const char **argv)
{
unsigned int i, nthreads, result_usec, done = 0;
char watched;
int fd;
pthread_t *threads;
struct timeval start, stop, diff;
if (parse_options(argc, argv, enable_options, enable_usage, 0)) {
usage_with_options(enable_usage, enable_options);
exit(EXIT_FAILURE);
}
fd = breakpoint_setup(&watched);
if (fd == -1)
exit((perror("perf_event_open"), EXIT_FAILURE));
nthreads = enable_params.npassive + enable_params.nactive;
threads = calloc(nthreads, sizeof(threads[0]));
if (!threads)
exit((perror("calloc"), EXIT_FAILURE));
for (i = 0; i < nthreads; i++) {
if (pthread_create(&threads[i], NULL,
i < enable_params.npassive ? passive_thread : active_thread, &done))
exit((perror("pthread_create"), EXIT_FAILURE));
}
usleep(10000); // let the threads block
gettimeofday(&start, NULL);
for (i = 0; i < bench_repeat; i++) {
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)"), EXIT_FAILURE));
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0))
exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)"), EXIT_FAILURE));
}
gettimeofday(&stop, NULL);
timersub(&stop, &start, &diff);
__atomic_store_n(&done, 1, __ATOMIC_RELAXED);
futex_wake(&done, enable_params.npassive, 0);
for (i = 0; i < nthreads; i++)
pthread_join(threads[i], NULL);
free(threads);
close(fd);
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n",
bench_repeat, enable_params.npassive, enable_params.nactive);
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
(long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
printf(" %14lf usecs/op\n", (double)result_usec / bench_repeat);
break;
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
break;
default:
fprintf(stderr, "Unknown format: %d\n", bench_format);
exit(EXIT_FAILURE);
}
return 0;
}
...@@ -54,6 +54,7 @@ struct perf_annotate { ...@@ -54,6 +54,7 @@ struct perf_annotate {
bool skip_missing; bool skip_missing;
bool has_br_stack; bool has_br_stack;
bool group_set; bool group_set;
float min_percent;
const char *sym_hist_filter; const char *sym_hist_filter;
const char *cpu_list; const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
...@@ -324,6 +325,17 @@ static void hists__find_annotations(struct hists *hists, ...@@ -324,6 +325,17 @@ static void hists__find_annotations(struct hists *hists,
(strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0)) (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
goto find_next; goto find_next;
if (ann->min_percent) {
float percent = 0;
u64 total = hists__total_period(hists);
if (total)
percent = 100.0 * he->stat.period / total;
if (percent < ann->min_percent)
goto find_next;
}
notes = symbol__annotation(he->ms.sym); notes = symbol__annotation(he->ms.sym);
if (notes->src == NULL) { if (notes->src == NULL) {
find_next: find_next:
...@@ -457,6 +469,16 @@ static int __cmd_annotate(struct perf_annotate *ann) ...@@ -457,6 +469,16 @@ static int __cmd_annotate(struct perf_annotate *ann)
return ret; return ret;
} }
static int parse_percent_limit(const struct option *opt, const char *str,
int unset __maybe_unused)
{
struct perf_annotate *ann = opt->value;
double pcnt = strtof(str, NULL);
ann->min_percent = pcnt;
return 0;
}
static const char * const annotate_usage[] = { static const char * const annotate_usage[] = {
"perf annotate [<options>]", "perf annotate [<options>]",
NULL NULL
...@@ -557,6 +579,8 @@ int cmd_annotate(int argc, const char **argv) ...@@ -557,6 +579,8 @@ int cmd_annotate(int argc, const char **argv)
OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period", OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period",
"Set percent type local/global-period/hits", "Set percent type local/global-period/hits",
annotate_parse_percent_type), annotate_parse_percent_type),
OPT_CALLBACK(0, "percent-limit", &annotate, "percent",
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
"Instruction Tracing options\n" ITRACE_HELP, "Instruction Tracing options\n" ITRACE_HELP,
itrace_parse_synth_opts), itrace_parse_synth_opts),
......
...@@ -92,6 +92,13 @@ static struct bench internals_benchmarks[] = { ...@@ -92,6 +92,13 @@ static struct bench internals_benchmarks[] = {
{ NULL, NULL, NULL } { NULL, NULL, NULL }
}; };
static struct bench breakpoint_benchmarks[] = {
{ "thread", "Benchmark thread start/finish with breakpoints", bench_breakpoint_thread},
{ "enable", "Benchmark breakpoint enable/disable", bench_breakpoint_enable},
{ "all", "Run all breakpoint benchmarks", NULL},
{ NULL, NULL, NULL },
};
struct collection { struct collection {
const char *name; const char *name;
const char *summary; const char *summary;
...@@ -110,6 +117,7 @@ static struct collection collections[] = { ...@@ -110,6 +117,7 @@ static struct collection collections[] = {
{"epoll", "Epoll stressing benchmarks", epoll_benchmarks }, {"epoll", "Epoll stressing benchmarks", epoll_benchmarks },
#endif #endif
{ "internals", "Perf-internals benchmarks", internals_benchmarks }, { "internals", "Perf-internals benchmarks", internals_benchmarks },
{ "breakpoint", "Breakpoint benchmarks", breakpoint_benchmarks },
{ "all", "All benchmarks", NULL }, { "all", "All benchmarks", NULL },
{ NULL, NULL, NULL } { NULL, NULL, NULL }
}; };
......
...@@ -653,6 +653,7 @@ STAT_FN(lcl_hitm) ...@@ -653,6 +653,7 @@ STAT_FN(lcl_hitm)
STAT_FN(store) STAT_FN(store)
STAT_FN(st_l1hit) STAT_FN(st_l1hit)
STAT_FN(st_l1miss) STAT_FN(st_l1miss)
STAT_FN(st_na)
STAT_FN(ld_fbhit) STAT_FN(ld_fbhit)
STAT_FN(ld_l1hit) STAT_FN(ld_l1hit)
STAT_FN(ld_l2hit) STAT_FN(ld_l2hit)
...@@ -677,7 +678,8 @@ static uint64_t total_records(struct c2c_stats *stats) ...@@ -677,7 +678,8 @@ static uint64_t total_records(struct c2c_stats *stats)
total = ldcnt + total = ldcnt +
stats->st_l1hit + stats->st_l1hit +
stats->st_l1miss; stats->st_l1miss +
stats->st_na;
return total; return total;
} }
...@@ -899,6 +901,7 @@ PERCENT_FN(rmt_hitm) ...@@ -899,6 +901,7 @@ PERCENT_FN(rmt_hitm)
PERCENT_FN(lcl_hitm) PERCENT_FN(lcl_hitm)
PERCENT_FN(st_l1hit) PERCENT_FN(st_l1hit)
PERCENT_FN(st_l1miss) PERCENT_FN(st_l1miss)
PERCENT_FN(st_na)
static int static int
percent_rmt_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, percent_rmt_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
...@@ -1024,6 +1027,37 @@ percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, ...@@ -1024,6 +1027,37 @@ percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return per_left - per_right; return per_left - per_right;
} }
static int
percent_stores_na_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
int width = c2c_width(fmt, hpp, he->hists);
double per = PERCENT(he, st_na);
char buf[10];
return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
}
static int
percent_stores_na_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
return percent_color(fmt, hpp, he, percent_st_na);
}
static int64_t
percent_stores_na_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
{
double per_left;
double per_right;
per_left = PERCENT(left, st_na);
per_right = PERCENT(right, st_na);
return per_left - per_right;
}
STAT_FN(lcl_dram) STAT_FN(lcl_dram)
STAT_FN(rmt_dram) STAT_FN(rmt_dram)
...@@ -1351,7 +1385,7 @@ static struct c2c_dimension dim_tot_stores = { ...@@ -1351,7 +1385,7 @@ static struct c2c_dimension dim_tot_stores = {
}; };
static struct c2c_dimension dim_stores_l1hit = { static struct c2c_dimension dim_stores_l1hit = {
.header = HEADER_SPAN("---- Stores ----", "L1Hit", 1), .header = HEADER_SPAN("--------- Stores --------", "L1Hit", 2),
.name = "stores_l1hit", .name = "stores_l1hit",
.cmp = st_l1hit_cmp, .cmp = st_l1hit_cmp,
.entry = st_l1hit_entry, .entry = st_l1hit_entry,
...@@ -1366,8 +1400,16 @@ static struct c2c_dimension dim_stores_l1miss = { ...@@ -1366,8 +1400,16 @@ static struct c2c_dimension dim_stores_l1miss = {
.width = 7, .width = 7,
}; };
static struct c2c_dimension dim_stores_na = {
.header = HEADER_SPAN_LOW("N/A"),
.name = "stores_na",
.cmp = st_na_cmp,
.entry = st_na_entry,
.width = 7,
};
static struct c2c_dimension dim_cl_stores_l1hit = { static struct c2c_dimension dim_cl_stores_l1hit = {
.header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1), .header = HEADER_SPAN("------- Store Refs ------", "L1 Hit", 2),
.name = "cl_stores_l1hit", .name = "cl_stores_l1hit",
.cmp = st_l1hit_cmp, .cmp = st_l1hit_cmp,
.entry = st_l1hit_entry, .entry = st_l1hit_entry,
...@@ -1382,6 +1424,14 @@ static struct c2c_dimension dim_cl_stores_l1miss = { ...@@ -1382,6 +1424,14 @@ static struct c2c_dimension dim_cl_stores_l1miss = {
.width = 7, .width = 7,
}; };
static struct c2c_dimension dim_cl_stores_na = {
.header = HEADER_SPAN_LOW("N/A"),
.name = "cl_stores_na",
.cmp = st_na_cmp,
.entry = st_na_entry,
.width = 7,
};
static struct c2c_dimension dim_ld_fbhit = { static struct c2c_dimension dim_ld_fbhit = {
.header = HEADER_SPAN("----- Core Load Hit -----", "FB", 2), .header = HEADER_SPAN("----- Core Load Hit -----", "FB", 2),
.name = "ld_fbhit", .name = "ld_fbhit",
...@@ -1471,7 +1521,7 @@ static struct c2c_dimension dim_percent_lcl_hitm = { ...@@ -1471,7 +1521,7 @@ static struct c2c_dimension dim_percent_lcl_hitm = {
}; };
static struct c2c_dimension dim_percent_stores_l1hit = { static struct c2c_dimension dim_percent_stores_l1hit = {
.header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1), .header = HEADER_SPAN("------- Store Refs ------", "L1 Hit", 2),
.name = "percent_stores_l1hit", .name = "percent_stores_l1hit",
.cmp = percent_stores_l1hit_cmp, .cmp = percent_stores_l1hit_cmp,
.entry = percent_stores_l1hit_entry, .entry = percent_stores_l1hit_entry,
...@@ -1488,6 +1538,15 @@ static struct c2c_dimension dim_percent_stores_l1miss = { ...@@ -1488,6 +1538,15 @@ static struct c2c_dimension dim_percent_stores_l1miss = {
.width = 7, .width = 7,
}; };
static struct c2c_dimension dim_percent_stores_na = {
.header = HEADER_SPAN_LOW("N/A"),
.name = "percent_stores_na",
.cmp = percent_stores_na_cmp,
.entry = percent_stores_na_entry,
.color = percent_stores_na_color,
.width = 7,
};
static struct c2c_dimension dim_dram_lcl = { static struct c2c_dimension dim_dram_lcl = {
.header = HEADER_SPAN("--- Load Dram ----", "Lcl", 1), .header = HEADER_SPAN("--- Load Dram ----", "Lcl", 1),
.name = "dram_lcl", .name = "dram_lcl",
...@@ -1618,8 +1677,10 @@ static struct c2c_dimension *dimensions[] = { ...@@ -1618,8 +1677,10 @@ static struct c2c_dimension *dimensions[] = {
&dim_tot_stores, &dim_tot_stores,
&dim_stores_l1hit, &dim_stores_l1hit,
&dim_stores_l1miss, &dim_stores_l1miss,
&dim_stores_na,
&dim_cl_stores_l1hit, &dim_cl_stores_l1hit,
&dim_cl_stores_l1miss, &dim_cl_stores_l1miss,
&dim_cl_stores_na,
&dim_ld_fbhit, &dim_ld_fbhit,
&dim_ld_l1hit, &dim_ld_l1hit,
&dim_ld_l2hit, &dim_ld_l2hit,
...@@ -1632,6 +1693,7 @@ static struct c2c_dimension *dimensions[] = { ...@@ -1632,6 +1693,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_percent_lcl_hitm, &dim_percent_lcl_hitm,
&dim_percent_stores_l1hit, &dim_percent_stores_l1hit,
&dim_percent_stores_l1miss, &dim_percent_stores_l1miss,
&dim_percent_stores_na,
&dim_dram_lcl, &dim_dram_lcl,
&dim_dram_rmt, &dim_dram_rmt,
&dim_pid, &dim_pid,
...@@ -2149,6 +2211,7 @@ static void print_c2c__display_stats(FILE *out) ...@@ -2149,6 +2211,7 @@ static void print_c2c__display_stats(FILE *out)
fprintf(out, " Store - no mapping : %10d\n", stats->st_noadrs); fprintf(out, " Store - no mapping : %10d\n", stats->st_noadrs);
fprintf(out, " Store L1D Hit : %10d\n", stats->st_l1hit); fprintf(out, " Store L1D Hit : %10d\n", stats->st_l1hit);
fprintf(out, " Store L1D Miss : %10d\n", stats->st_l1miss); fprintf(out, " Store L1D Miss : %10d\n", stats->st_l1miss);
fprintf(out, " Store No available memory level : %10d\n", stats->st_na);
fprintf(out, " No Page Map Rejects : %10d\n", stats->nomap); fprintf(out, " No Page Map Rejects : %10d\n", stats->nomap);
fprintf(out, " Unable to parse data source : %10d\n", stats->noparse); fprintf(out, " Unable to parse data source : %10d\n", stats->noparse);
} }
...@@ -2171,6 +2234,7 @@ static void print_shared_cacheline_info(FILE *out) ...@@ -2171,6 +2234,7 @@ static void print_shared_cacheline_info(FILE *out)
fprintf(out, " Blocked Access on shared lines : %10d\n", stats->blk_data + stats->blk_addr); fprintf(out, " Blocked Access on shared lines : %10d\n", stats->blk_data + stats->blk_addr);
fprintf(out, " Store HITs on shared lines : %10d\n", stats->store); fprintf(out, " Store HITs on shared lines : %10d\n", stats->store);
fprintf(out, " Store L1D hits on shared lines : %10d\n", stats->st_l1hit); fprintf(out, " Store L1D hits on shared lines : %10d\n", stats->st_l1hit);
fprintf(out, " Store No available memory level : %10d\n", stats->st_na);
fprintf(out, " Total Merged records : %10d\n", hitm_cnt + stats->store); fprintf(out, " Total Merged records : %10d\n", hitm_cnt + stats->store);
} }
...@@ -2193,10 +2257,10 @@ static void print_cacheline(struct c2c_hists *c2c_hists, ...@@ -2193,10 +2257,10 @@ static void print_cacheline(struct c2c_hists *c2c_hists,
fprintf(out, "\n"); fprintf(out, "\n");
} }
fprintf(out, " -------------------------------------------------------------\n"); fprintf(out, " ----------------------------------------------------------------------\n");
__hist_entry__snprintf(he_cl, &hpp, hpp_list); __hist_entry__snprintf(he_cl, &hpp, hpp_list);
fprintf(out, "%s\n", bf); fprintf(out, "%s\n", bf);
fprintf(out, " -------------------------------------------------------------\n"); fprintf(out, " ----------------------------------------------------------------------\n");
hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, false); hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, false);
} }
...@@ -2213,6 +2277,7 @@ static void print_pareto(FILE *out) ...@@ -2213,6 +2277,7 @@ static void print_pareto(FILE *out)
"cl_lcl_hitm," "cl_lcl_hitm,"
"cl_stores_l1hit," "cl_stores_l1hit,"
"cl_stores_l1miss," "cl_stores_l1miss,"
"cl_stores_na,"
"dcacheline"; "dcacheline";
perf_hpp_list__init(&hpp_list); perf_hpp_list__init(&hpp_list);
...@@ -2664,6 +2729,7 @@ static int build_cl_output(char *cl_sort, bool no_source) ...@@ -2664,6 +2729,7 @@ static int build_cl_output(char *cl_sort, bool no_source)
"percent_lcl_hitm," "percent_lcl_hitm,"
"percent_stores_l1hit," "percent_stores_l1hit,"
"percent_stores_l1miss," "percent_stores_l1miss,"
"percent_stores_na,"
"offset,offset_node,dcacheline_count,", "offset,offset_node,dcacheline_count,",
add_pid ? "pid," : "", add_pid ? "pid," : "",
add_tid ? "tid," : "", add_tid ? "tid," : "",
...@@ -2850,7 +2916,7 @@ static int perf_c2c__report(int argc, const char **argv) ...@@ -2850,7 +2916,7 @@ static int perf_c2c__report(int argc, const char **argv)
"tot_recs," "tot_recs,"
"tot_loads," "tot_loads,"
"tot_stores," "tot_stores,"
"stores_l1hit,stores_l1miss," "stores_l1hit,stores_l1miss,stores_na,"
"ld_fbhit,ld_l1hit,ld_l2hit," "ld_fbhit,ld_l1hit,ld_l2hit,"
"ld_lclhit,lcl_hitm," "ld_lclhit,lcl_hitm,"
"ld_rmthit,rmt_hitm," "ld_rmthit,rmt_hitm,"
......
...@@ -27,6 +27,8 @@ ...@@ -27,6 +27,8 @@
#include "util/namespaces.h" #include "util/namespaces.h"
#include "util/util.h" #include "util/util.h"
#include <internal/lib.h>
#include <linux/err.h> #include <linux/err.h>
#include <subcmd/parse-options.h> #include <subcmd/parse-options.h>
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
...@@ -48,6 +50,7 @@ struct perf_inject { ...@@ -48,6 +50,7 @@ struct perf_inject {
bool in_place_update; bool in_place_update;
bool in_place_update_dry_run; bool in_place_update_dry_run;
bool is_pipe; bool is_pipe;
bool copy_kcore_dir;
const char *input_name; const char *input_name;
struct perf_data output; struct perf_data output;
u64 bytes_written; u64 bytes_written;
...@@ -55,6 +58,7 @@ struct perf_inject { ...@@ -55,6 +58,7 @@ struct perf_inject {
struct list_head samples; struct list_head samples;
struct itrace_synth_opts itrace_synth_opts; struct itrace_synth_opts itrace_synth_opts;
char event_copy[PERF_SAMPLE_MAX_SIZE]; char event_copy[PERF_SAMPLE_MAX_SIZE];
struct perf_file_section secs[HEADER_FEAT_BITS];
}; };
struct event_entry { struct event_entry {
...@@ -763,6 +767,133 @@ static int parse_vm_time_correlation(const struct option *opt, const char *str, ...@@ -763,6 +767,133 @@ static int parse_vm_time_correlation(const struct option *opt, const char *str,
return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
} }
static int save_section_info_cb(struct perf_file_section *section,
struct perf_header *ph __maybe_unused,
int feat, int fd __maybe_unused, void *data)
{
struct perf_inject *inject = data;
inject->secs[feat] = *section;
return 0;
}
static int save_section_info(struct perf_inject *inject)
{
struct perf_header *header = &inject->session->header;
int fd = perf_data__fd(inject->session->data);
return perf_header__process_sections(header, fd, inject, save_section_info_cb);
}
static bool keep_feat(int feat)
{
switch (feat) {
/* Keep original information that describes the machine or software */
case HEADER_TRACING_DATA:
case HEADER_HOSTNAME:
case HEADER_OSRELEASE:
case HEADER_VERSION:
case HEADER_ARCH:
case HEADER_NRCPUS:
case HEADER_CPUDESC:
case HEADER_CPUID:
case HEADER_TOTAL_MEM:
case HEADER_CPU_TOPOLOGY:
case HEADER_NUMA_TOPOLOGY:
case HEADER_PMU_MAPPINGS:
case HEADER_CACHE:
case HEADER_MEM_TOPOLOGY:
case HEADER_CLOCKID:
case HEADER_BPF_PROG_INFO:
case HEADER_BPF_BTF:
case HEADER_CPU_PMU_CAPS:
case HEADER_CLOCK_DATA:
case HEADER_HYBRID_TOPOLOGY:
case HEADER_HYBRID_CPU_PMU_CAPS:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
case HEADER_CMDLINE:
case HEADER_EVENT_DESC:
case HEADER_BRANCH_STACK:
case HEADER_GROUP_DESC:
case HEADER_AUXTRACE:
case HEADER_STAT:
case HEADER_SAMPLE_TIME:
case HEADER_DIR_FORMAT:
case HEADER_COMPRESSED:
default:
return false;
};
}
static int read_file(int fd, u64 offs, void *buf, size_t sz)
{
ssize_t ret = preadn(fd, buf, sz, offs);
if (ret < 0)
return -errno;
if ((size_t)ret != sz)
return -EINVAL;
return 0;
}
static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
{
int fd = perf_data__fd(inject->session->data);
u64 offs = inject->secs[feat].offset;
size_t sz = inject->secs[feat].size;
void *buf = malloc(sz);
int ret;
if (!buf)
return -ENOMEM;
ret = read_file(fd, offs, buf, sz);
if (ret)
goto out_free;
ret = fw->write(fw, buf, sz);
out_free:
free(buf);
return ret;
}
struct inject_fc {
struct feat_copier fc;
struct perf_inject *inject;
};
static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
{
struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
struct perf_inject *inject = inj_fc->inject;
int ret;
if (!inject->secs[feat].offset ||
!keep_feat(feat))
return 0;
ret = feat_copy(inject, feat, fw);
if (ret < 0)
return ret;
return 1; /* Feature section copied */
}
static int copy_kcore_dir(struct perf_inject *inject)
{
char *cmd;
int ret;
ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
inject->input_name, inject->output.path);
if (ret < 0)
return ret;
pr_debug("%s\n", cmd);
return system(cmd);
}
static int output_fd(struct perf_inject *inject) static int output_fd(struct perf_inject *inject)
{ {
return inject->in_place_update ? -1 : perf_data__fd(&inject->output); return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
...@@ -848,6 +979,11 @@ static int __cmd_inject(struct perf_inject *inject) ...@@ -848,6 +979,11 @@ static int __cmd_inject(struct perf_inject *inject)
return ret; return ret;
if (!inject->is_pipe && !inject->in_place_update) { if (!inject->is_pipe && !inject->in_place_update) {
struct inject_fc inj_fc = {
.fc.copy = feat_copy_cb,
.inject = inject,
};
if (inject->build_ids) if (inject->build_ids)
perf_header__set_feat(&session->header, perf_header__set_feat(&session->header,
HEADER_BUILD_ID); HEADER_BUILD_ID);
...@@ -872,7 +1008,13 @@ static int __cmd_inject(struct perf_inject *inject) ...@@ -872,7 +1008,13 @@ static int __cmd_inject(struct perf_inject *inject)
} }
session->header.data_offset = output_data_offset; session->header.data_offset = output_data_offset;
session->header.data_size = inject->bytes_written; session->header.data_size = inject->bytes_written;
perf_session__write_header(session, session->evlist, fd, true); perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc);
if (inject->copy_kcore_dir) {
ret = copy_kcore_dir(inject);
if (ret)
return ret;
}
} }
return ret; return ret;
...@@ -1009,9 +1151,16 @@ int cmd_inject(int argc, const char **argv) ...@@ -1009,9 +1151,16 @@ int cmd_inject(int argc, const char **argv)
} }
if (!inject.in_place_update_dry_run) if (!inject.in_place_update_dry_run)
data.in_place_update = true; data.in_place_update = true;
} else if (perf_data__open(&inject.output)) { } else {
perror("failed to create output file"); if (strcmp(inject.output.path, "-") && !inject.strip &&
return -1; has_kcore_dir(inject.input_name)) {
inject.output.is_dir = true;
inject.copy_kcore_dir = true;
}
if (perf_data__open(&inject.output)) {
perror("failed to create output file");
return -1;
}
} }
data.path = inject.input_name; data.path = inject.input_name;
...@@ -1037,6 +1186,11 @@ int cmd_inject(int argc, const char **argv) ...@@ -1037,6 +1186,11 @@ int cmd_inject(int argc, const char **argv)
if (zstd_init(&(inject.session->zstd_data), 0) < 0) if (zstd_init(&(inject.session->zstd_data), 0) < 0)
pr_warning("Decompression initialization failed.\n"); pr_warning("Decompression initialization failed.\n");
/* Save original section info before feature bits change */
ret = save_section_info(&inject);
if (ret)
goto out_delete;
if (!data.is_pipe && inject.output.is_pipe) { if (!data.is_pipe && inject.output.is_pipe) {
ret = perf_header__write_pipe(perf_data__fd(&inject.output)); ret = perf_header__write_pipe(perf_data__fd(&inject.output));
if (ret < 0) { if (ret < 0) {
......
...@@ -1603,6 +1603,8 @@ int cmd_kvm(int argc, const char **argv) ...@@ -1603,6 +1603,8 @@ int cmd_kvm(int argc, const char **argv)
"file", "file saving guest os /proc/kallsyms"), "file", "file saving guest os /proc/kallsyms"),
OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules, OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
"file", "file saving guest os /proc/modules"), "file", "file saving guest os /proc/modules"),
OPT_BOOLEAN(0, "guest-code", &symbol_conf.guest_code,
"Guest code can be found in hypervisor process"),
OPT_INCR('v', "verbose", &verbose, OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"), "be more verbose (show counter open errors, etc)"),
OPT_END() OPT_END()
......
...@@ -65,7 +65,7 @@ struct lock_stat { ...@@ -65,7 +65,7 @@ struct lock_stat {
u64 wait_time_min; u64 wait_time_min;
u64 wait_time_max; u64 wait_time_max;
int discard; /* flag of blacklist */ int broken; /* flag of blacklist */
int combined; int combined;
}; };
...@@ -118,6 +118,7 @@ struct thread_stat { ...@@ -118,6 +118,7 @@ struct thread_stat {
static struct rb_root thread_stats; static struct rb_root thread_stats;
static bool combine_locks; static bool combine_locks;
static bool show_thread_stats;
static struct thread_stat *thread_stat_find(u32 tid) static struct thread_stat *thread_stat_find(u32 tid)
{ {
...@@ -384,9 +385,6 @@ static void combine_lock_stats(struct lock_stat *st) ...@@ -384,9 +385,6 @@ static void combine_lock_stats(struct lock_stat *st)
ret = !!st->name - !!p->name; ret = !!st->name - !!p->name;
if (ret == 0) { if (ret == 0) {
if (st->discard)
goto out;
p->nr_acquired += st->nr_acquired; p->nr_acquired += st->nr_acquired;
p->nr_contended += st->nr_contended; p->nr_contended += st->nr_contended;
p->wait_time_total += st->wait_time_total; p->wait_time_total += st->wait_time_total;
...@@ -399,10 +397,7 @@ static void combine_lock_stats(struct lock_stat *st) ...@@ -399,10 +397,7 @@ static void combine_lock_stats(struct lock_stat *st)
if (p->wait_time_max < st->wait_time_max) if (p->wait_time_max < st->wait_time_max)
p->wait_time_max = st->wait_time_max; p->wait_time_max = st->wait_time_max;
/* now it got a new !discard record */ p->broken |= st->broken;
p->discard = 0;
out:
st->combined = 1; st->combined = 1;
return; return;
} }
...@@ -415,15 +410,6 @@ static void combine_lock_stats(struct lock_stat *st) ...@@ -415,15 +410,6 @@ static void combine_lock_stats(struct lock_stat *st)
rb_link_node(&st->rb, parent, rb); rb_link_node(&st->rb, parent, rb);
rb_insert_color(&st->rb, &sorted); rb_insert_color(&st->rb, &sorted);
if (st->discard) {
st->nr_acquired = 0;
st->nr_contended = 0;
st->wait_time_total = 0;
st->avg_wait_time = 0;
st->wait_time_min = ULLONG_MAX;
st->wait_time_max = 0;
}
} }
static void insert_to_result(struct lock_stat *st, static void insert_to_result(struct lock_stat *st,
...@@ -557,11 +543,13 @@ static int report_lock_acquire_event(struct evsel *evsel, ...@@ -557,11 +543,13 @@ static int report_lock_acquire_event(struct evsel *evsel,
u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
int flag = evsel__intval(evsel, sample, "flags"); int flag = evsel__intval(evsel, sample, "flags");
/* abuse ls->addr for tid */
if (show_thread_stats)
addr = sample->tid;
ls = lock_stat_findnew(addr, name); ls = lock_stat_findnew(addr, name);
if (!ls) if (!ls)
return -ENOMEM; return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid); ts = thread_stat_findnew(sample->tid);
if (!ts) if (!ts)
...@@ -599,9 +587,11 @@ static int report_lock_acquire_event(struct evsel *evsel, ...@@ -599,9 +587,11 @@ static int report_lock_acquire_event(struct evsel *evsel,
case SEQ_STATE_ACQUIRING: case SEQ_STATE_ACQUIRING:
case SEQ_STATE_CONTENDED: case SEQ_STATE_CONTENDED:
broken: broken:
/* broken lock sequence, discard it */ /* broken lock sequence */
ls->discard = 1; if (!ls->broken) {
bad_hist[BROKEN_ACQUIRE]++; ls->broken = 1;
bad_hist[BROKEN_ACQUIRE]++;
}
list_del_init(&seq->list); list_del_init(&seq->list);
free(seq); free(seq);
goto end; goto end;
...@@ -626,11 +616,12 @@ static int report_lock_acquired_event(struct evsel *evsel, ...@@ -626,11 +616,12 @@ static int report_lock_acquired_event(struct evsel *evsel,
const char *name = evsel__strval(evsel, sample, "name"); const char *name = evsel__strval(evsel, sample, "name");
u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
if (show_thread_stats)
addr = sample->tid;
ls = lock_stat_findnew(addr, name); ls = lock_stat_findnew(addr, name);
if (!ls) if (!ls)
return -ENOMEM; return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid); ts = thread_stat_findnew(sample->tid);
if (!ts) if (!ts)
...@@ -657,9 +648,11 @@ static int report_lock_acquired_event(struct evsel *evsel, ...@@ -657,9 +648,11 @@ static int report_lock_acquired_event(struct evsel *evsel,
case SEQ_STATE_RELEASED: case SEQ_STATE_RELEASED:
case SEQ_STATE_ACQUIRED: case SEQ_STATE_ACQUIRED:
case SEQ_STATE_READ_ACQUIRED: case SEQ_STATE_READ_ACQUIRED:
/* broken lock sequence, discard it */ /* broken lock sequence */
ls->discard = 1; if (!ls->broken) {
bad_hist[BROKEN_ACQUIRED]++; ls->broken = 1;
bad_hist[BROKEN_ACQUIRED]++;
}
list_del_init(&seq->list); list_del_init(&seq->list);
free(seq); free(seq);
goto end; goto end;
...@@ -685,11 +678,12 @@ static int report_lock_contended_event(struct evsel *evsel, ...@@ -685,11 +678,12 @@ static int report_lock_contended_event(struct evsel *evsel,
const char *name = evsel__strval(evsel, sample, "name"); const char *name = evsel__strval(evsel, sample, "name");
u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
if (show_thread_stats)
addr = sample->tid;
ls = lock_stat_findnew(addr, name); ls = lock_stat_findnew(addr, name);
if (!ls) if (!ls)
return -ENOMEM; return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid); ts = thread_stat_findnew(sample->tid);
if (!ts) if (!ts)
...@@ -709,9 +703,11 @@ static int report_lock_contended_event(struct evsel *evsel, ...@@ -709,9 +703,11 @@ static int report_lock_contended_event(struct evsel *evsel,
case SEQ_STATE_ACQUIRED: case SEQ_STATE_ACQUIRED:
case SEQ_STATE_READ_ACQUIRED: case SEQ_STATE_READ_ACQUIRED:
case SEQ_STATE_CONTENDED: case SEQ_STATE_CONTENDED:
/* broken lock sequence, discard it */ /* broken lock sequence */
ls->discard = 1; if (!ls->broken) {
bad_hist[BROKEN_CONTENDED]++; ls->broken = 1;
bad_hist[BROKEN_CONTENDED]++;
}
list_del_init(&seq->list); list_del_init(&seq->list);
free(seq); free(seq);
goto end; goto end;
...@@ -737,11 +733,12 @@ static int report_lock_release_event(struct evsel *evsel, ...@@ -737,11 +733,12 @@ static int report_lock_release_event(struct evsel *evsel,
const char *name = evsel__strval(evsel, sample, "name"); const char *name = evsel__strval(evsel, sample, "name");
u64 addr = evsel__intval(evsel, sample, "lockdep_addr"); u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
if (show_thread_stats)
addr = sample->tid;
ls = lock_stat_findnew(addr, name); ls = lock_stat_findnew(addr, name);
if (!ls) if (!ls)
return -ENOMEM; return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid); ts = thread_stat_findnew(sample->tid);
if (!ts) if (!ts)
...@@ -767,9 +764,11 @@ static int report_lock_release_event(struct evsel *evsel, ...@@ -767,9 +764,11 @@ static int report_lock_release_event(struct evsel *evsel,
case SEQ_STATE_ACQUIRING: case SEQ_STATE_ACQUIRING:
case SEQ_STATE_CONTENDED: case SEQ_STATE_CONTENDED:
case SEQ_STATE_RELEASED: case SEQ_STATE_RELEASED:
/* broken lock sequence, discard it */ /* broken lock sequence */
ls->discard = 1; if (!ls->broken) {
bad_hist[BROKEN_RELEASE]++; ls->broken = 1;
bad_hist[BROKEN_RELEASE]++;
}
goto free_seq; goto free_seq;
default: default:
BUG_ON("Unknown state of lock sequence found!\n"); BUG_ON("Unknown state of lock sequence found!\n");
...@@ -854,15 +853,26 @@ static void print_result(void) ...@@ -854,15 +853,26 @@ static void print_result(void)
bad = total = 0; bad = total = 0;
while ((st = pop_from_result())) { while ((st = pop_from_result())) {
total++; total++;
if (st->discard) { if (st->broken)
bad++; bad++;
if (!st->nr_acquired)
continue; continue;
}
bzero(cut_name, 20); bzero(cut_name, 20);
if (strlen(st->name) < 20) { if (strlen(st->name) < 20) {
/* output raw name */ /* output raw name */
pr_info("%20s ", st->name); const char *name = st->name;
if (show_thread_stats) {
struct thread *t;
/* st->addr contains tid of thread */
t = perf_session__findnew(session, st->addr);
name = thread__comm_str(t);
}
pr_info("%20s ", name);
} else { } else {
strncpy(cut_name, st->name, 16); strncpy(cut_name, st->name, 16);
cut_name[16] = '.'; cut_name[16] = '.';
...@@ -1139,6 +1149,8 @@ int cmd_lock(int argc, const char **argv) ...@@ -1139,6 +1149,8 @@ int cmd_lock(int argc, const char **argv)
/* TODO: type */ /* TODO: type */
OPT_BOOLEAN('c', "combine-locks", &combine_locks, OPT_BOOLEAN('c', "combine-locks", &combine_locks,
"combine locks in the same class"), "combine locks in the same class"),
OPT_BOOLEAN('t', "threads", &show_thread_stats,
"show per-thread lock stats"),
OPT_PARENT(lock_options) OPT_PARENT(lock_options)
}; };
......
...@@ -1011,7 +1011,7 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru ...@@ -1011,7 +1011,7 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
if (cpu_map__is_dummy(cpus) || if (cpu_map__is_dummy(cpus) ||
test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) { test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
if (thread_data->maps) { if (thread_data->maps) {
thread_data->maps[tm] = &mmap[m]; thread_data->maps[tm] = &mmap[m];
pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
...@@ -3331,13 +3331,14 @@ struct option *record_options = __record_options; ...@@ -3331,13 +3331,14 @@ struct option *record_options = __record_options;
static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
{ {
int c; struct perf_cpu cpu;
int idx;
if (cpu_map__is_dummy(cpus)) if (cpu_map__is_dummy(cpus))
return; return;
for (c = 0; c < cpus->nr; c++) perf_cpu_map__for_each_cpu(cpu, idx, cpus)
set_bit(cpus->map[c].cpu, mask->bits); set_bit(cpu.cpu, mask->bits);
} }
static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
...@@ -3404,8 +3405,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map ...@@ -3404,8 +3405,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map
pr_debug("nr_threads: %d\n", rec->nr_threads); pr_debug("nr_threads: %d\n", rec->nr_threads);
for (t = 0; t < rec->nr_threads; t++) { for (t = 0; t < rec->nr_threads; t++) {
set_bit(cpus->map[t].cpu, rec->thread_masks[t].maps.bits); set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
set_bit(cpus->map[t].cpu, rec->thread_masks[t].affinity.bits); set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
if (verbose) { if (verbose) {
pr_debug("thread_masks[%d]: ", t); pr_debug("thread_masks[%d]: ", t);
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
......
...@@ -1742,16 +1742,44 @@ static int perf_sample__fprintf_pt_spacing(int len, FILE *fp) ...@@ -1742,16 +1742,44 @@ static int perf_sample__fprintf_pt_spacing(int len, FILE *fp)
return perf_sample__fprintf_spacing(len, 34, fp); return perf_sample__fprintf_spacing(len, 34, fp);
} }
/* If a value contains only printable ASCII characters padded with NULLs */
static bool ptw_is_prt(u64 val)
{
char c;
u32 i;
for (i = 0; i < sizeof(val); i++) {
c = ((char *)&val)[i];
if (!c)
break;
if (!isprint(c) || !isascii(c))
return false;
}
for (; i < sizeof(val); i++) {
c = ((char *)&val)[i];
if (c)
return false;
}
return true;
}
static int perf_sample__fprintf_synth_ptwrite(struct perf_sample *sample, FILE *fp) static int perf_sample__fprintf_synth_ptwrite(struct perf_sample *sample, FILE *fp)
{ {
struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample); struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample);
char str[sizeof(u64) + 1] = "";
int len; int len;
u64 val;
if (perf_sample__bad_synth_size(sample, *data)) if (perf_sample__bad_synth_size(sample, *data))
return 0; return 0;
len = fprintf(fp, " IP: %u payload: %#" PRIx64 " ", val = le64_to_cpu(data->payload);
data->ip, le64_to_cpu(data->payload)); if (ptw_is_prt(val)) {
memcpy(str, &val, sizeof(val));
str[sizeof(val)] = 0;
}
len = fprintf(fp, " IP: %u payload: %#" PRIx64 " %s ",
data->ip, val, str);
return len + perf_sample__fprintf_pt_spacing(len, fp); return len + perf_sample__fprintf_pt_spacing(len, fp);
} }
...@@ -3884,6 +3912,8 @@ int cmd_script(int argc, const char **argv) ...@@ -3884,6 +3912,8 @@ int cmd_script(int argc, const char **argv)
"file", "file saving guest os /proc/kallsyms"), "file", "file saving guest os /proc/kallsyms"),
OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules, OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
"file", "file saving guest os /proc/modules"), "file", "file saving guest os /proc/modules"),
OPT_BOOLEAN(0, "guest-code", &symbol_conf.guest_code,
"Guest code can be found in hypervisor process"),
OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr, OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
"Enable LBR callgraph stitching approach"), "Enable LBR callgraph stitching approach"),
OPTS_EVSWITCH(&script.evswitch), OPTS_EVSWITCH(&script.evswitch),
...@@ -3909,7 +3939,8 @@ int cmd_script(int argc, const char **argv) ...@@ -3909,7 +3939,8 @@ int cmd_script(int argc, const char **argv)
if (symbol_conf.guestmount || if (symbol_conf.guestmount ||
symbol_conf.default_guest_vmlinux_name || symbol_conf.default_guest_vmlinux_name ||
symbol_conf.default_guest_kallsyms || symbol_conf.default_guest_kallsyms ||
symbol_conf.default_guest_modules) { symbol_conf.default_guest_modules ||
symbol_conf.guest_code) {
/* /*
* Enable guest sample processing. * Enable guest sample processing.
*/ */
......
...@@ -216,6 +216,7 @@ static struct perf_stat_config stat_config = { ...@@ -216,6 +216,7 @@ static struct perf_stat_config stat_config = {
.run_count = 1, .run_count = 1,
.metric_only_len = METRIC_ONLY_LEN, .metric_only_len = METRIC_ONLY_LEN,
.walltime_nsecs_stats = &walltime_nsecs_stats, .walltime_nsecs_stats = &walltime_nsecs_stats,
.ru_stats = &ru_stats,
.big_num = true, .big_num = true,
.ctl_fd = -1, .ctl_fd = -1,
.ctl_fd_ack = -1, .ctl_fd_ack = -1,
...@@ -271,11 +272,8 @@ static void evlist__check_cpu_maps(struct evlist *evlist) ...@@ -271,11 +272,8 @@ static void evlist__check_cpu_maps(struct evlist *evlist)
pr_warning(" %s: %s\n", evsel->name, buf); pr_warning(" %s: %s\n", evsel->name, buf);
} }
for_each_group_evsel(pos, leader) { for_each_group_evsel(pos, leader)
evsel__set_leader(pos, pos); evsel__remove_from_group(pos, leader);
pos->core.nr_members = 0;
}
evsel->core.leader->nr_members = 0;
} }
} }
...@@ -341,15 +339,35 @@ static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 t ...@@ -341,15 +339,35 @@ static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 t
static int read_single_counter(struct evsel *counter, int cpu_map_idx, static int read_single_counter(struct evsel *counter, int cpu_map_idx,
int thread, struct timespec *rs) int thread, struct timespec *rs)
{ {
if (counter->tool_event == PERF_TOOL_DURATION_TIME) { switch(counter->tool_event) {
u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; case PERF_TOOL_DURATION_TIME: {
struct perf_counts_values *count = u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
perf_counts(counter->counts, cpu_map_idx, thread); struct perf_counts_values *count =
count->ena = count->run = val; perf_counts(counter->counts, cpu_map_idx, thread);
count->val = val; count->ena = count->run = val;
return 0; count->val = val;
return 0;
}
case PERF_TOOL_USER_TIME:
case PERF_TOOL_SYSTEM_TIME: {
u64 val;
struct perf_counts_values *count =
perf_counts(counter->counts, cpu_map_idx, thread);
if (counter->tool_event == PERF_TOOL_USER_TIME)
val = ru_stats.ru_utime_usec_stat.mean;
else
val = ru_stats.ru_stime_usec_stat.mean;
count->ena = count->run = val;
count->val = val;
return 0;
}
default:
case PERF_TOOL_NONE:
return evsel__read_counter(counter, cpu_map_idx, thread);
case PERF_TOOL_MAX:
/* This should never be reached */
return 0;
} }
return evsel__read_counter(counter, cpu_map_idx, thread);
} }
/* /*
...@@ -1010,8 +1028,10 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) ...@@ -1010,8 +1028,10 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
evlist__reset_prev_raw_counts(evsel_list); evlist__reset_prev_raw_counts(evsel_list);
runtime_stat_reset(&stat_config); runtime_stat_reset(&stat_config);
perf_stat__reset_shadow_per_stat(&rt_stat); perf_stat__reset_shadow_per_stat(&rt_stat);
} else } else {
update_stats(&walltime_nsecs_stats, t1 - t0); update_stats(&walltime_nsecs_stats, t1 - t0);
update_rusage_stats(&ru_stats, &stat_config.ru_data);
}
/* /*
* Closing a group leader splits the group, and as we only disable * Closing a group leader splits the group, and as we only disable
...@@ -1235,6 +1255,8 @@ static struct option stat_options[] = { ...@@ -1235,6 +1255,8 @@ static struct option stat_options[] = {
OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
"disable CPU count aggregation", AGGR_NONE), "disable CPU count aggregation", AGGR_NONE),
OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"), OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
"Merge identical named hybrid events"),
OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
"print counts with custom separator"), "print counts with custom separator"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name", OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
...@@ -1842,11 +1864,23 @@ static int add_default_attributes(void) ...@@ -1842,11 +1864,23 @@ static int add_default_attributes(void)
unsigned int max_level = 1; unsigned int max_level = 1;
char *str = NULL; char *str = NULL;
bool warn = false; bool warn = false;
const char *pmu_name = "cpu";
if (!force_metric_only) if (!force_metric_only)
stat_config.metric_only = true; stat_config.metric_only = true;
if (pmu_have_event("cpu", topdown_metric_L2_attrs[5])) { if (perf_pmu__has_hybrid()) {
if (!evsel_list->hybrid_pmu_name) {
pr_warning("WARNING: default to use cpu_core topdown events\n");
evsel_list->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
}
pmu_name = evsel_list->hybrid_pmu_name;
if (!pmu_name)
return -1;
}
if (pmu_have_event(pmu_name, topdown_metric_L2_attrs[5])) {
metric_attrs = topdown_metric_L2_attrs; metric_attrs = topdown_metric_L2_attrs;
max_level = 2; max_level = 2;
} }
...@@ -1857,10 +1891,11 @@ static int add_default_attributes(void) ...@@ -1857,10 +1891,11 @@ static int add_default_attributes(void)
} else if (!stat_config.topdown_level) } else if (!stat_config.topdown_level)
stat_config.topdown_level = max_level; stat_config.topdown_level = max_level;
if (topdown_filter_events(metric_attrs, &str, 1) < 0) { if (topdown_filter_events(metric_attrs, &str, 1, pmu_name) < 0) {
pr_err("Out of memory\n"); pr_err("Out of memory\n");
return -1; return -1;
} }
if (metric_attrs[0] && str) { if (metric_attrs[0] && str) {
if (!stat_config.interval && !stat_config.metric_only) { if (!stat_config.interval && !stat_config.metric_only) {
fprintf(stat_config.output, fprintf(stat_config.output,
...@@ -1884,10 +1919,12 @@ static int add_default_attributes(void) ...@@ -1884,10 +1919,12 @@ static int add_default_attributes(void)
} }
if (topdown_filter_events(topdown_attrs, &str, if (topdown_filter_events(topdown_attrs, &str,
arch_topdown_check_group(&warn)) < 0) { arch_topdown_check_group(&warn),
pmu_name) < 0) {
pr_err("Out of memory\n"); pr_err("Out of memory\n");
return -1; return -1;
} }
if (topdown_attrs[0] && str) { if (topdown_attrs[0] && str) {
struct parse_events_error errinfo; struct parse_events_error errinfo;
if (warn) if (warn)
......
...@@ -65,6 +65,7 @@ static void library_status(void) ...@@ -65,6 +65,7 @@ static void library_status(void)
#endif #endif
STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table); STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table);
STATUS(HAVE_LIBBFD_SUPPORT, libbfd); STATUS(HAVE_LIBBFD_SUPPORT, libbfd);
STATUS(HAVE_DEBUGINFOD_SUPPORT, debuginfod);
STATUS(HAVE_LIBELF_SUPPORT, libelf); STATUS(HAVE_LIBELF_SUPPORT, libelf);
STATUS(HAVE_LIBNUMA_SUPPORT, libnuma); STATUS(HAVE_LIBNUMA_SUPPORT, libnuma);
STATUS(HAVE_LIBNUMA_SUPPORT, numa_num_possible_cpus); STATUS(HAVE_LIBNUMA_SUPPORT, numa_num_possible_cpus);
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
# perf-with-kcore: use perf with a copy of kcore
# Copyright (c) 2014, Intel Corporation.
#
set -e
usage()
{
echo "Usage: perf-with-kcore <perf sub-command> <perf.data directory> [<sub-command options> [ -- <workload>]]" >&2
echo " <perf sub-command> can be record, script, report or inject" >&2
echo " or: perf-with-kcore fix_buildid_cache_permissions" >&2
exit 1
}
find_perf()
{
if [ -n "$PERF" ] ; then
return
fi
PERF=`which perf || true`
if [ -z "$PERF" ] ; then
echo "Failed to find perf" >&2
exit 1
fi
if [ ! -x "$PERF" ] ; then
echo "Failed to find perf" >&2
exit 1
fi
echo "Using $PERF"
"$PERF" version
}
copy_kcore()
{
echo "Copying kcore"
if [ $EUID -eq 0 ] ; then
SUDO=""
else
SUDO="sudo"
fi
rm -f perf.data.junk
("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
PERF_PID=$!
# Need to make sure that perf has started
sleep 1
KCORE=$(($SUDO "$PERF" buildid-cache -v -f -k /proc/kcore >/dev/null) 2>&1)
case "$KCORE" in
"kcore added to build-id cache directory "*)
KCORE_DIR=${KCORE#"kcore added to build-id cache directory "}
;;
*)
kill $PERF_PID
wait >/dev/null 2>/dev/null || true
rm perf.data.junk
echo "$KCORE"
echo "Failed to find kcore" >&2
exit 1
;;
esac
kill $PERF_PID
wait >/dev/null 2>/dev/null || true
rm perf.data.junk
$SUDO cp -a "$KCORE_DIR" "$(pwd)/$PERF_DATA_DIR"
$SUDO rm -f "$KCORE_DIR/kcore"
$SUDO rm -f "$KCORE_DIR/kallsyms"
$SUDO rm -f "$KCORE_DIR/modules"
$SUDO rmdir "$KCORE_DIR"
KCORE_DIR_BASENAME=$(basename "$KCORE_DIR")
KCORE_DIR="$(pwd)/$PERF_DATA_DIR/$KCORE_DIR_BASENAME"
$SUDO chown $UID "$KCORE_DIR"
$SUDO chown $UID "$KCORE_DIR/kcore"
$SUDO chown $UID "$KCORE_DIR/kallsyms"
$SUDO chown $UID "$KCORE_DIR/modules"
$SUDO chgrp $GROUPS "$KCORE_DIR"
$SUDO chgrp $GROUPS "$KCORE_DIR/kcore"
$SUDO chgrp $GROUPS "$KCORE_DIR/kallsyms"
$SUDO chgrp $GROUPS "$KCORE_DIR/modules"
ln -s "$KCORE_DIR_BASENAME" "$PERF_DATA_DIR/kcore_dir"
}
fix_buildid_cache_permissions()
{
if [ $EUID -ne 0 ] ; then
echo "This script must be run as root via sudo " >&2
exit 1
fi
if [ -z "$SUDO_USER" ] ; then
echo "This script must be run via sudo" >&2
exit 1
fi
USER_HOME=$(bash <<< "echo ~$SUDO_USER")
echo "Fixing buildid cache permissions"
find "$USER_HOME/.debug" -xdev -type d ! -user "$SUDO_USER" -ls -exec chown "$SUDO_USER" \{\} \;
find "$USER_HOME/.debug" -xdev -type f -links 1 ! -user "$SUDO_USER" -ls -exec chown "$SUDO_USER" \{\} \;
find "$USER_HOME/.debug" -xdev -type l ! -user "$SUDO_USER" -ls -exec chown -h "$SUDO_USER" \{\} \;
if [ -n "$SUDO_GID" ] ; then
find "$USER_HOME/.debug" -xdev -type d ! -group "$SUDO_GID" -ls -exec chgrp "$SUDO_GID" \{\} \;
find "$USER_HOME/.debug" -xdev -type f -links 1 ! -group "$SUDO_GID" -ls -exec chgrp "$SUDO_GID" \{\} \;
find "$USER_HOME/.debug" -xdev -type l ! -group "$SUDO_GID" -ls -exec chgrp -h "$SUDO_GID" \{\} \;
fi
echo "Done"
}
check_buildid_cache_permissions()
{
if [ $EUID -eq 0 ] ; then
return
fi
PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type d ! -user "$USER" -print -quit)
PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type f -links 1 ! -user "$USER" -print -quit)
PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type l ! -user "$USER" -print -quit)
PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type d ! -group "$GROUPS" -print -quit)
PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type f -links 1 ! -group "$GROUPS" -print -quit)
PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type l ! -group "$GROUPS" -print -quit)
if [ -n "$PERMISSIONS_OK" ] ; then
echo "*** WARNING *** buildid cache permissions may need fixing" >&2
fi
}
record()
{
echo "Recording"
if [ $EUID -ne 0 ] ; then
if [ "$(cat /proc/sys/kernel/kptr_restrict)" -ne 0 ] ; then
echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
fi
if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
fi
if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
fi
if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
true
elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
true
elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
fi
fi
fi
if [ -z "$1" ] ; then
echo "Workload is required for recording" >&2
usage
fi
if [ -e "$PERF_DATA_DIR" ] ; then
echo "'$PERF_DATA_DIR' exists" >&2
exit 1
fi
find_perf
mkdir "$PERF_DATA_DIR"
echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
"$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
exit 1
fi
copy_kcore
echo "Done"
}
subcommand()
{
find_perf
check_buildid_cache_permissions
echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
}
if [ "$1" = "fix_buildid_cache_permissions" ] ; then
fix_buildid_cache_permissions
exit 0
fi
PERF_SUB_COMMAND=$1
PERF_DATA_DIR=$2
shift || true
shift || true
if [ -z "$PERF_SUB_COMMAND" ] ; then
usage
fi
if [ -z "$PERF_DATA_DIR" ] ; then
usage
fi
case "$PERF_SUB_COMMAND" in
"record")
while [ "$1" != "--" ] ; do
PERF_OPTIONS+=("$1")
shift || break
done
if [ "$1" != "--" ] ; then
echo "Options and workload are required for recording" >&2
usage
fi
shift
record "$@"
;;
"script")
subcommand "$@"
;;
"report")
subcommand "$@"
;;
"inject")
subcommand "$@"
;;
*)
usage
;;
esac
[
{
"ArchStdEvent": "BR_MIS_PRED"
},
{
"ArchStdEvent": "BR_PRED"
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC"
}
]
[
{
"ArchStdEvent": "CPU_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS"
},
{
"ArchStdEvent": "BUS_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS_RD"
},
{
"ArchStdEvent": "BUS_ACCESS_WR"
}
]
[
{
"ArchStdEvent": "L1I_CACHE_REFILL"
},
{
"ArchStdEvent": "L1I_TLB_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE"
},
{
"ArchStdEvent": "L1D_TLB_REFILL"
},
{
"ArchStdEvent": "L1I_CACHE"
},
{
"ArchStdEvent": "L1D_CACHE_WB"
},
{
"ArchStdEvent": "L2D_CACHE"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL"
},
{
"ArchStdEvent": "L2D_CACHE_WB"
}
]
[
{
"ArchStdEvent": "EXC_TAKEN"
},
{
"ArchStdEvent": "MEMORY_ERROR"
},
{
"ArchStdEvent": "EXC_IRQ"
},
{
"ArchStdEvent": "EXC_FIQ"
}
]
[
{
"ArchStdEvent": "SW_INCR"
},
{
"ArchStdEvent": "LD_RETIRED"
},
{
"ArchStdEvent": "ST_RETIRED"
},
{
"ArchStdEvent": "INST_RETIRED"
},
{
"ArchStdEvent": "EXC_RETURN"
},
{
"ArchStdEvent": "CID_WRITE_RETIRED"
},
{
"ArchStdEvent": "PC_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_IMMED_RETIRED"
},
{
"ArchStdEvent": "BR_RETURN_RETIRED"
}
]
[
{
"ArchStdEvent": "UNALIGNED_LDST_RETIRED"
},
{
"ArchStdEvent": "MEM_ACCESS"
}
]
[
{
"ArchStdEvent": "BR_MIS_PRED"
},
{
"ArchStdEvent": "BR_PRED"
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC"
}
]
[
{
"ArchStdEvent": "CPU_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS"
},
{
"ArchStdEvent": "BUS_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS_RD"
},
{
"ArchStdEvent": "BUS_ACCESS_WR"
}
]
[
{
"ArchStdEvent": "L1I_CACHE_REFILL"
},
{
"ArchStdEvent": "L1I_TLB_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE"
},
{
"ArchStdEvent": "L1D_TLB_REFILL"
},
{
"ArchStdEvent": "L1I_CACHE"
},
{
"ArchStdEvent": "L1D_CACHE_WB"
},
{
"ArchStdEvent": "L2D_CACHE"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL"
},
{
"ArchStdEvent": "L2D_CACHE_WB"
}
]
[
{
"ArchStdEvent": "EXC_TAKEN"
},
{
"ArchStdEvent": "MEMORY_ERROR"
},
{
"ArchStdEvent": "EXC_IRQ"
},
{
"ArchStdEvent": "EXC_FIQ"
}
]
[
{
"ArchStdEvent": "SW_INCR"
},
{
"ArchStdEvent": "LD_RETIRED"
},
{
"ArchStdEvent": "ST_RETIRED"
},
{
"ArchStdEvent": "INST_RETIRED"
},
{
"ArchStdEvent": "EXC_RETURN"
},
{
"ArchStdEvent": "CID_WRITE_RETIRED"
},
{
"ArchStdEvent": "PC_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_IMMED_RETIRED"
},
{
"ArchStdEvent": "BR_RETURN_RETIRED"
},
{
"ArchStdEvent": "INST_SPEC"
},
{
"ArchStdEvent": "DP_SPEC"
},
{
"ArchStdEvent": "ASE_SPEC"
},
{
"ArchStdEvent": "VFP_SPEC"
},
{
"ArchStdEvent": "CRYPTO_SPEC"
}
]
[
{
"ArchStdEvent": "UNALIGNED_LDST_RETIRED"
},
{
"ArchStdEvent": "MEM_ACCESS"
}
]
[
{
"ArchStdEvent": "BR_MIS_PRED"
},
{
"ArchStdEvent": "BR_PRED"
},
{
"ArchStdEvent": "BR_IMMED_SPEC"
},
{
"ArchStdEvent": "BR_RETURN_SPEC"
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC"
},
{
"PublicDescription": "Predicted conditional branch executed. This event counts when any branch that the conditional predictor can predict is retired. This event still counts when branch prediction is disabled due to the Memory Management Unit (MMU) being off",
"EventCode": "0xC9",
"EventName": "BR_COND_PRED",
"BriefDescription": "Predicted conditional branch executed. This event counts when any branch that the conditional predictor can predict is retired. This event still counts when branch prediction is disabled due to the Memory Management Unit (MMU) being off"
},
{
"PublicDescription": "Indirect branch mispredicted. This event counts when any indirect branch that the Branch Target Address Cache (BTAC) can predict is retired and has mispredicted either the condition or the address. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCA",
"EventName": "BR_INDIRECT_MIS_PRED",
"BriefDescription": "Indirect branch mispredicted. This event counts when any indirect branch that the Branch Target Address Cache (BTAC) can predict is retired and has mispredicted either the condition or the address. This event still counts when branch prediction is disabled due to the MMU being off"
},
{
"PublicDescription": "Indirect branch mispredicted due to address miscompare. This event counts when any indirect branch that the BTAC can predict is retired, was taken, correctly predicted the condition, and has mispredicted the address. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCB",
"EventName": "BR_INDIRECT_ADDR_MIS_PRED",
"BriefDescription": "Indirect branch mispredicted due to address miscompare. This event counts when any indirect branch that the BTAC can predict is retired, was taken, correctly predicted the condition, and has mispredicted the address. This event still counts when branch prediction is disabled due to the MMU being off"
},
{
"PublicDescription": "Conditional branch mispredicted. This event counts when any branch that the conditional predictor can predict is retired and has mispredicted the condition. This event still counts when branch prediction is disabled due to the MMU being off. Conditional indirect branches that correctly predict the condition but mispredict the address do not count",
"EventCode": "0xCC",
"EventName": "BR_COND_MIS_PRED",
"BriefDescription": "Conditional branch mispredicted. This event counts when any branch that the conditional predictor can predict is retired and has mispredicted the condition. This event still counts when branch prediction is disabled due to the MMU being off. Conditional indirect branches that correctly predict the condition but mispredict the address do not count"
},
{
"PublicDescription": "Indirect branch with predicted address executed. This event counts when any indirect branch that the BTAC can predict is retired, was taken, and correctly predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCD",
"EventName": "BR_INDIRECT_ADDR_PRED",
"BriefDescription": "Indirect branch with predicted address executed. This event counts when any indirect branch that the BTAC can predict is retired, was taken, and correctly predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off"
},
{
"PublicDescription": "Procedure return with predicted address executed. This event counts when any procedure return that the call-return stack can predict is retired, was taken, and correctly predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCE",
"EventName": "BR_RETURN_ADDR_PRED",
"BriefDescription": "Procedure return with predicted address executed. This event counts when any procedure return that the call-return stack can predict is retired, was taken, and correctly predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off"
},
{
"PublicDescription": "Procedure return mispredicted due to address miscompare. This event counts when any procedure return that the call-return stack can predict is retired, was taken, correctly predicted the condition, and has mispredicted the address. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCF",
"EventName": "BR_RETURN_ADDR_MIS_PRED",
"BriefDescription": "Procedure return mispredicted due to address miscompare. This event counts when any procedure return that the call-return stack can predict is retired, was taken, correctly predicted the condition, and has mispredicted the address. This event still counts when branch prediction is disabled due to the MMU being off"
}
]
[
{
"ArchStdEvent": "CPU_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS"
},
{
"ArchStdEvent": "BUS_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS_RD"
},
{
"ArchStdEvent": "BUS_ACCESS_WR"
}
]
[
{
"ArchStdEvent": "L1I_CACHE_REFILL"
},
{
"ArchStdEvent": "L1I_TLB_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE"
},
{
"ArchStdEvent": "L1D_TLB_REFILL"
},
{
"ArchStdEvent": "L1I_CACHE"
},
{
"ArchStdEvent": "L1D_CACHE_WB"
},
{
"ArchStdEvent": "L2D_CACHE"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL"
},
{
"ArchStdEvent": "L2D_CACHE_WB"
},
{
"ArchStdEvent": "L2D_CACHE_ALLOCATE"
},
{
"ArchStdEvent": "L1D_TLB"
},
{
"ArchStdEvent": "L1I_TLB"
},
{
"ArchStdEvent": "L3D_CACHE"
},
{
"ArchStdEvent": "L2D_TLB_REFILL"
},
{
"ArchStdEvent": "L2D_TLB"
},
{
"ArchStdEvent": "DTLB_WALK"
},
{
"ArchStdEvent": "ITLB_WALK"
},
{
"ArchStdEvent": "LL_CACHE_RD"
},
{
"ArchStdEvent": "LL_CACHE_MISS_RD"
},
{
"ArchStdEvent": "L1D_CACHE_LMISS_RD"
},
{
"ArchStdEvent": "L1D_CACHE_RD"
},
{
"ArchStdEvent": "L1D_CACHE_WR"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_RD"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_WR"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_INNER"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
},
{
"ArchStdEvent": "L2D_CACHE_RD"
},
{
"ArchStdEvent": "L2D_CACHE_WR"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_RD"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_WR"
},
{
"ArchStdEvent": "L3D_CACHE_RD"
},
{
"ArchStdEvent": "L3D_CACHE_REFILL_RD"
},
{
"PublicDescription": "L2 cache refill due to prefetch. If the complex is configured with a per-complex L2 cache, this event does not count. If the complex is configured without a per-complex L2 cache, this event counts the cluster cache event, as defined by L3D_CACHE_REFILL_PREFETCH. If neither a per-complex cache or a cluster cache is configured, this event is not implemented",
"EventCode": "0xC1",
"EventName": "L2D_CACHE_REFILL_PREFETCH",
"BriefDescription": "L2 cache refill due to prefetch. If the complex is configured with a per-complex L2 cache, this event does not count. If the complex is configured without a per-complex L2 cache, this event counts the cluster cache event, as defined by L3D_CACHE_REFILL_PREFETCH. If neither a per-complex cache or a cluster cache is configured, this event is not implemented"
},
{
"PublicDescription": "L1 data cache refill due to prefetch. This event counts any linefills from the prefetcher that cause an allocation into the L1 data cache",
"EventCode": "0xC2",
"EventName": "L1D_CACHE_REFILL_PREFETCH",
"BriefDescription": "L1 data cache refill due to prefetch. This event counts any linefills from the prefetcher that cause an allocation into the L1 data cache"
},
{
"PublicDescription": "L2 cache write streaming mode. This event counts for each cycle where the core is in write streaming mode and is not allocating writes into the L2 cache",
"EventCode": "0xC3",
"EventName": "L2D_WS_MODE",
"BriefDescription": "L2 cache write streaming mode. This event counts for each cycle where the core is in write streaming mode and is not allocating writes into the L2 cache"
},
{
"PublicDescription": "L1 data cache entering write streaming mode. This event counts for each entry into write streaming mode",
"EventCode": "0xC4",
"EventName": "L1D_WS_MODE_ENTRY",
"BriefDescription": "L1 data cache entering write streaming mode. This event counts for each entry into write streaming mode"
},
{
"PublicDescription": "L1 data cache write streaming mode. This event counts for each cycle where the core is in write streaming mode and is not allocating writes into the L1 data cache",
"EventCode": "0xC5",
"EventName": "L1D_WS_MODE",
"BriefDescription": "L1 data cache write streaming mode. This event counts for each cycle where the core is in write streaming mode and is not allocating writes into the L1 data cache"
},
{
"PublicDescription": "L3 cache write streaming mode. This event counts for each cycle where the core is in write streaming mode and is not allocating writes into the L3 cache",
"EventCode": "0xC7",
"EventName": "L3D_WS_MODE",
"BriefDescription": "L3 cache write streaming mode. This event counts for each cycle where the core is in write streaming mode and is not allocating writes into the L3 cache"
},
{
"PublicDescription": "Last level cache write streaming mode. This event counts for each cycle where the core is in write streaming mode and is not allocating writes into the system cache",
"EventCode": "0xC8",
"EventName": "LL_WS_MODE",
"BriefDescription": "Last level cache write streaming mode. This event counts for each cycle where the core is in write streaming mode and is not allocating writes into the system cache"
},
{
"PublicDescription": "L2 TLB walk cache access. This event does not count if the MMU is disabled",
"EventCode": "0xD0",
"EventName": "L2D_WALK_TLB",
"BriefDescription": "L2 TLB walk cache access. This event does not count if the MMU is disabled"
},
{
"PublicDescription": "L2 TLB walk cache refill. This event does not count if the MMU is disabled",
"EventCode": "0xD1",
"EventName": "L2D_WALK_TLB_REFILL",
"BriefDescription": "L2 TLB walk cache refill. This event does not count if the MMU is disabled"
},
{
"PublicDescription": "L2 TLB IPA cache access. This event counts on each access to the IPA cache. If a single translation table walk needs to make multiple accesses to the IPA cache, each access is counted. If stage 2 translation is disabled, this event does not count",
"EventCode": "0xD4",
"EventName": "L2D_S2_TLB",
"BriefDescription": "L2 TLB IPA cache access. This event counts on each access to the IPA cache. If a single translation table walk needs to make multiple accesses to the IPA cache, each access is counted. If stage 2 translation is disabled, this event does not count"
},
{
"PublicDescription": "L2 TLB IPA cache refill. This event counts on each refill of the IPA cache. If a single translation table walk needs to make multiple accesses to the IPA cache, each access that causes a refill is counted. If stage 2 translation is disabled, this event does not count",
"EventCode": "0xD5",
"EventName": "L2D_S2_TLB_REFILL",
"BriefDescription": "L2 TLB IPA cache refill. This event counts on each refill of the IPA cache. If a single translation table walk needs to make multiple accesses to the IPA cache, each access that causes a refill is counted. If stage 2 translation is disabled, this event does not count"
},
{
"PublicDescription": "L2 cache stash dropped. This event counts on each stash request that is received from the interconnect or the Accelerator Coherency Port (ACP), that targets L2 cache and is dropped due to lack of buffer space to hold the request",
"EventCode": "0xD6",
"EventName": "L2D_CACHE_STASH_DROPPED",
"BriefDescription": "L2 cache stash dropped. This event counts on each stash request that is received from the interconnect or the Accelerator Coherency Port (ACP), that targets L2 cache and is dropped due to lack of buffer space to hold the request"
},
{
"ArchStdEvent": "L1I_CACHE_LMISS"
},
{
"ArchStdEvent": "L2D_CACHE_LMISS_RD"
},
{
"ArchStdEvent": "L3D_CACHE_LMISS_RD"
}
]
[
{
"ArchStdEvent": "EXC_TAKEN"
},
{
"ArchStdEvent": "MEMORY_ERROR"
},
{
"ArchStdEvent": "EXC_IRQ"
},
{
"ArchStdEvent": "EXC_FIQ"
}
]
[
{
"ArchStdEvent": "LD_RETIRED"
},
{
"ArchStdEvent": "ST_RETIRED"
},
{
"ArchStdEvent": "INST_RETIRED"
},
{
"ArchStdEvent": "EXC_RETURN"
},
{
"ArchStdEvent": "CID_WRITE_RETIRED"
},
{
"ArchStdEvent": "PC_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_IMMED_RETIRED"
},
{
"ArchStdEvent": "BR_RETURN_RETIRED"
},
{
"ArchStdEvent": "INST_SPEC"
},
{
"ArchStdEvent": "TTBR_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_RETIRED"
},
{
"ArchStdEvent": "BR_MIS_PRED_RETIRED"
},
{
"ArchStdEvent": "OP_RETIRED"
},
{
"ArchStdEvent": "OP_SPEC"
},
{
"ArchStdEvent": "LD_SPEC"
},
{
"ArchStdEvent": "ST_SPEC"
},
{
"ArchStdEvent": "LDST_SPEC"
},
{
"ArchStdEvent": "DP_SPEC"
},
{
"ArchStdEvent": "ASE_SPEC"
},
{
"ArchStdEvent": "VFP_SPEC"
},
{
"ArchStdEvent": "PC_WRITE_SPEC"
},
{
"ArchStdEvent": "CRYPTO_SPEC"
},
{
"ArchStdEvent": "SVE_INST_RETIRED"
},
{
"ArchStdEvent": "SVE_INST_SPEC"
},
{
"ArchStdEvent": "FP_HP_SPEC"
},
{
"ArchStdEvent": "FP_SP_SPEC"
},
{
"ArchStdEvent": "FP_DP_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT8_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT16_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT32_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT64_SPEC"
}
]
[
{
"ArchStdEvent": "MEM_ACCESS"
},
{
"ArchStdEvent": "REMOTE_ACCESS_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_WR"
},
{
"ArchStdEvent": "LDST_ALIGN_LAT"
},
{
"ArchStdEvent": "LD_ALIGN_LAT"
},
{
"ArchStdEvent": "ST_ALIGN_LAT"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
}
]
[
{
"ArchStdEvent": "STALL_FRONTEND"
},
{
"ArchStdEvent": "STALL_BACKEND"
},
{
"ArchStdEvent": "STALL"
},
{
"ArchStdEvent": "STALL_SLOT_BACKEND"
},
{
"ArchStdEvent": "STALL_SLOT_FRONTEND"
},
{
"ArchStdEvent": "STALL_SLOT"
},
{
"PublicDescription": "No operation issued due to the frontend, cache miss. This event counts every cycle that the Data Processing Unit (DPU) instruction queue is empty and there is an instruction cache miss being processed",
"EventCode": "0xE1",
"EventName": "STALL_FRONTEND_CACHE",
"BriefDescription": "No operation issued due to the frontend, cache miss. This event counts every cycle that the Data Processing Unit (DPU) instruction queue is empty and there is an instruction cache miss being processed"
},
{
"PublicDescription": "No operation issued due to the frontend, TLB miss. This event counts every cycle that the DPU instruction queue is empty and there is an instruction L1 TLB miss being processed",
"EventCode": "0xE2",
"EventName": "STALL_FRONTEND_TLB",
"BriefDescription": "No operation issued due to the frontend, TLB miss. This event counts every cycle that the DPU instruction queue is empty and there is an instruction L1 TLB miss being processed"
},
{
"PublicDescription": "No operation issued due to the frontend, pre-decode error",
"EventCode": "0xE3",
"EventName": "STALL_FRONTEND_PDERR",
"BriefDescription": "No operation issued due to the frontend, pre-decode error"
},
{
"PublicDescription": "No operation issued due to the backend interlock. This event counts every cycle where the issue of an operation is stalled and there is an interlock. Stall cycles due to a stall in the Wr stage are excluded",
"EventCode": "0xE4",
"EventName": "STALL_BACKEND_ILOCK",
"BriefDescription": "No operation issued due to the backend interlock. This event counts every cycle where the issue of an operation is stalled and there is an interlock. Stall cycles due to a stall in the Wr stage are excluded"
},
{
"PublicDescription": "No operation issued due to the backend, address interlock. This event counts every cycle where the issue of an operation is stalled and there is an interlock on an address operand. This type of interlock is caused by a load/store instruction waiting for data to calculate the address. Stall cycles due to a stall in the Wr stage are excluded",
"EventCode": "0xE5",
"EventName": "STALL_BACKEND_ILOCK_ADDR",
"BriefDescription": "No operation issued due to the backend, address interlock. This event counts every cycle where the issue of an operation is stalled and there is an interlock on an address operand. This type of interlock is caused by a load/store instruction waiting for data to calculate the address. Stall cycles due to a stall in the Wr stage are excluded"
},
{
"PublicDescription": "No operation issued due to the backend, interlock, or the Vector Processing Unit (VPU). This event counts every cycle where there is a stall or an interlock that is caused by a VPU instruction. Stall cycles due to a stall in the Wr stage are excluded",
"EventCode": "0xE6",
"EventName": "STALL_BACKEND_ILOCK_VPU",
"BriefDescription": "No operation issued due to the backend, interlock, or the Vector Processing Unit (VPU). This event counts every cycle where there is a stall or an interlock that is caused by a VPU instruction. Stall cycles due to a stall in the Wr stage are excluded"
},
{
"PublicDescription": "No operation issued due to the backend, load. This event counts every cycle where there is a stall in the Wr stage due to a load",
"EventCode": "0xE7",
"EventName": "STALL_BACKEND_LD",
"BriefDescription": "No operation issued due to the backend, load. This event counts every cycle where there is a stall in the Wr stage due to a load"
},
{
"PublicDescription": "No operation issued due to the backend, store. This event counts every cycle where there is a stall in the Wr stage due to a store",
"EventCode": "0xE8",
"EventName": "STALL_BACKEND_ST",
"BriefDescription": "No operation issued due to the backend, store. This event counts every cycle where there is a stall in the Wr stage due to a store"
},
{
"PublicDescription": "No operation issued due to the backend, load, cache miss. This event counts every cycle where there is a stall in the Wr stage due to a load that is waiting on data. The event counts for stalls that are caused by missing the cache or where the data is Non-cacheable",
"EventCode": "0xE9",
"EventName": "STALL_BACKEND_LD_CACHE",
"BriefDescription": "No operation issued due to the backend, load, cache miss. This event counts every cycle where there is a stall in the Wr stage due to a load that is waiting on data. The event counts for stalls that are caused by missing the cache or where the data is Non-cacheable"
},
{
"PublicDescription": "No operation issued due to the backend, load, TLB miss. This event counts every cycle where there is a stall in the Wr stage due to a load that misses in the L1 TLB",
"EventCode": "0xEA",
"EventName": "STALL_BACKEND_LD_TLB",
"BriefDescription": "No operation issued due to the backend, load, TLB miss. This event counts every cycle where there is a stall in the Wr stage due to a load that misses in the L1 TLB"
},
{
"PublicDescription": "No operation issued due to the backend, store, Store Buffer (STB) full. This event counts every cycle where there is a stall in the Wr stage because of a store operation that is waiting due to the STB being full",
"EventCode": "0xEB",
"EventName": "STALL_BACKEND_ST_STB",
"BriefDescription": "No operation issued due to the backend, store, Store Buffer (STB) full. This event counts every cycle where there is a stall in the Wr stage because of a store operation that is waiting due to the STB being full"
},
{
"PublicDescription": "No operation issued due to the backend, store, TLB miss. This event counts every cycle where there is a stall in the Wr stage because of a store operation that has missed in the L1 TLB",
"EventCode": "0xEC",
"EventName": "STALL_BACKEND_ST_TLB",
"BriefDescription": "No operation issued due to the backend, store, TLB miss. This event counts every cycle where there is a stall in the Wr stage because of a store operation that has missed in the L1 TLB"
},
{
"PublicDescription": "No operation issued due to the backend, VPU hazard. This event counts every cycle where the core stalls due to contention for the VPU with the other core",
"EventCode": "0xED",
"EventName": "STALL_BACKEND_VPU_HAZARD",
"BriefDescription": "No operation issued due to the backend, VPU hazard. This event counts every cycle where the core stalls due to contention for the VPU with the other core"
},
{
"PublicDescription": "Issue slot not issued due to interlock. For each cycle, this event counts each dispatch slot that does not issue due to an interlock",
"EventCode": "0xEE",
"EventName": "STALL_SLOT_BACKEND_ILOCK",
"BriefDescription": "Issue slot not issued due to interlock. For each cycle, this event counts each dispatch slot that does not issue due to an interlock"
},
{
"ArchStdEvent": "STALL_BACKEND_MEM"
}
]
[
{
"ArchStdEvent": "PMU_OVFS"
},
{
"ArchStdEvent": "PMU_HOVFS"
}
]
[
{
"ArchStdEvent": "TRB_WRAP"
},
{
"ArchStdEvent": "TRB_TRIG"
},
{
"ArchStdEvent": "TRCEXTOUT0"
},
{
"ArchStdEvent": "TRCEXTOUT1"
},
{
"ArchStdEvent": "TRCEXTOUT2"
},
{
"ArchStdEvent": "TRCEXTOUT3"
},
{
"ArchStdEvent": "CTI_TRIGOUT4"
},
{
"ArchStdEvent": "CTI_TRIGOUT5"
},
{
"ArchStdEvent": "CTI_TRIGOUT6"
},
{
"ArchStdEvent": "CTI_TRIGOUT7"
}
]
[
{
"ArchStdEvent": "BR_MIS_PRED"
},
{
"ArchStdEvent": "BR_PRED"
},
{
"ArchStdEvent": "BR_IMMED_SPEC"
},
{
"ArchStdEvent": "BR_RETURN_SPEC"
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC"
},
{
"PublicDescription": "Predicted conditional branch executed.This event counts when any branch which can be predicted by the conditional predictor is retired. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xC9",
"EventName": "BR_COND_PRED",
"BriefDescription": "Predicted conditional branch executed.This event counts when any branch which can be predicted by the conditional predictor is retired. This event still counts when branch prediction is disabled due to the MMU being off"
},
{
"PublicDescription": "Indirect branch mis-predicted.This event counts when any indirect branch which can be predicted by the BTAC is retired, and has mispredicted for either the condition or the address. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCA",
"EventName": "BR_INDIRECT_MIS_PRED",
"BriefDescription": "Indirect branch mis-predicted.This event counts when any indirect branch which can be predicted by the BTAC is retired, and has mispredicted for either the condition or the address. This event still counts when branch prediction is disabled due to the MMU being off"
},
{
"PublicDescription": "Indirect branch mis-predicted due to address mis-compare.This event counts when any indirect branch which can be predicted by the BTAC is retired, was taken and correctly predicted the condition, and has mispredicted the address. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCB",
"EventName": "BR_INDIRECT_ADDR_MIS_PRED",
"BriefDescription": "Indirect branch mis-predicted due to address mis-compare.This event counts when any indirect branch which can be predicted by the BTAC is retired, was taken and correctly predicted the condition, and has mispredicted the address. This event still counts when branch prediction is disabled due to the MMU being off"
},
{
"PublicDescription": "Conditional branch mis-predicted.This event counts when any branch which can be predicted by the conditional predictor is retired, and has mis-predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off. Conditional indirect branches which correctly predicted the condition but mis-predicted on the address do not count this event",
"EventCode": "0xCC",
"EventName": "BR_COND_MIS_PRED",
"BriefDescription": "Conditional branch mis-predicted.This event counts when any branch which can be predicted by the conditional predictor is retired, and has mis-predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off. Conditional indirect branches which correctly predicted the condition but mis-predicted on the address do not count this event"
},
{
"PublicDescription": "Indirect branch with predicted address executed.This event counts when any indirect branch which can be predicted by the BTAC is retired, was taken and correctly predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCD",
"EventName": "BR_INDIRECT_ADDR_PRED",
"BriefDescription": "Indirect branch with predicted address executed.This event counts when any indirect branch which can be predicted by the BTAC is retired, was taken and correctly predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off"
},
{
"PublicDescription": "Procedure return with predicted address executed.This event counts when any procedure return which can be predicted by the CRS is retired, was taken and correctly predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCE",
"EventName": "BR_RETURN_ADDR_PRED",
"BriefDescription": "Procedure return with predicted address executed.This event counts when any procedure return which can be predicted by the CRS is retired, was taken and correctly predicted the condition. This event still counts when branch prediction is disabled due to the MMU being off"
},
{
"PublicDescription": "Procedure return mis-predicted due to address mis-compare.This event counts when any procedure return which can be predicted by the CRS is retired, was taken and correctly predicted the condition, and has mispredicted the address. This event still counts when branch prediction is disabled due to the MMU being off",
"EventCode": "0xCF",
"EventName": "BR_RETURN_ADDR_MIS_PRED",
"BriefDescription": "Procedure return mis-predicted due to address mis-compare.This event counts when any procedure return which can be predicted by the CRS is retired, was taken and correctly predicted the condition, and has mispredicted the address. This event still counts when branch prediction is disabled due to the MMU being off"
}
]
[
{
"ArchStdEvent": "CPU_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS"
},
{
"ArchStdEvent": "BUS_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS_RD"
},
{
"ArchStdEvent": "BUS_ACCESS_WR"
}
]
[
{
"ArchStdEvent": "L1I_CACHE_REFILL"
},
{
"ArchStdEvent": "L1I_TLB_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE"
},
{
"ArchStdEvent": "L1D_TLB_REFILL"
},
{
"ArchStdEvent": "L1I_CACHE"
},
{
"ArchStdEvent": "L1D_CACHE_WB"
},
{
"ArchStdEvent": "L2D_CACHE"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL"
},
{
"ArchStdEvent": "L2D_CACHE_WB"
},
{
"ArchStdEvent": "L2D_CACHE_ALLOCATE"
},
{
"ArchStdEvent": "L1D_TLB"
},
{
"ArchStdEvent": "L1I_TLB"
},
{
"ArchStdEvent": "L3D_CACHE_ALLOCATE"
},
{
"ArchStdEvent": "L3D_CACHE_REFILL"
},
{
"ArchStdEvent": "L3D_CACHE"
},
{
"ArchStdEvent": "L2D_TLB_REFILL"
},
{
"ArchStdEvent": "L2D_TLB"
},
{
"ArchStdEvent": "DTLB_WALK"
},
{
"ArchStdEvent": "ITLB_WALK"
},
{
"ArchStdEvent": "LL_CACHE_RD"
},
{
"ArchStdEvent": "LL_CACHE_MISS_RD"
},
{
"ArchStdEvent": "L1D_CACHE_RD"
},
{
"ArchStdEvent": "L1D_CACHE_WR"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_RD"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_WR"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_INNER"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
},
{
"ArchStdEvent": "L2D_CACHE_RD"
},
{
"ArchStdEvent": "L2D_CACHE_WR"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_RD"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_WR"
},
{
"ArchStdEvent": "L3D_CACHE_RD"
},
{
"ArchStdEvent": "L3D_CACHE_REFILL_RD"
},
{
"PublicDescription": "Level 3 cache refill due to prefetch. This event counts any linefills from the hardware prefetcher which cause an allocation into the L3 cache. Note It might not be possible to both distinguish hardware vs software prefetches and also which prefetches cause an allocation. If so, only hardware prefetches should be counted, regardless of whether they allocate. If either the core is configured without a per-core L2 or the cluster is configured without an L3 cache, this event is not implemented",
"EventCode": "0xC0",
"EventName": "L3D_CACHE_REFILL_PREFETCH",
"BriefDescription": "Level 3 cache refill due to prefetch. This event counts any linefills from the hardware prefetcher which cause an allocation into the L3 cache. Note It might not be possible to both distinguish hardware vs software prefetches and also which prefetches cause an allocation. If so, only hardware prefetches should be counted, regardless of whether they allocate. If either the core is configured without a per-core L2 or the cluster is configured without an L3 cache, this event is not implemented"
},
{
"PublicDescription": "Level 2 cache refill due to prefetch. +//0 If the core is configured with a per-core L2 cache: This event does not count. +//0 If the core is configured without a per-core L2 cache: This event counts the cluster cache event, as defined by L3D_CACHE_REFILL_PREFETCH. +//0 If there is neither a per-core cache nor a cluster cache configured, this event is not implemented",
"EventCode": "0xC1",
"EventName": "L2D_CACHE_REFILL_PREFETCH",
"BriefDescription": "Level 2 cache refill due to prefetch. +//0 If the core is configured with a per-core L2 cache: This event does not count. +//0 If the core is configured without a per-core L2 cache: This event counts the cluster cache event, as defined by L3D_CACHE_REFILL_PREFETCH. +//0 If there is neither a per-core cache nor a cluster cache configured, this event is not implemented"
},
{
"PublicDescription": "Level 1 data cache refill due to prefetch. This event counts any linefills from the prefetcher which cause an allocation into the L1 D-cache",
"EventCode": "0xC2",
"EventName": "L1D_CACHE_REFILL_PREFETCH",
"BriefDescription": "Level 1 data cache refill due to prefetch. This event counts any linefills from the prefetcher which cause an allocation into the L1 D-cache"
},
{
"PublicDescription": "Level 2 cache write streaming mode. This event counts for each cycle where the core is in write-streaming mode and not allocating writes into the L2 cache",
"EventCode": "0xC3",
"EventName": "L2D_WS_MODE",
"BriefDescription": "Level 2 cache write streaming mode. This event counts for each cycle where the core is in write-streaming mode and not allocating writes into the L2 cache"
},
{
"PublicDescription": "Level 1 data cache entering write streaming mode.This event counts for each entry into write-streaming mode",
"EventCode": "0xC4",
"EventName": "L1D_WS_MODE_ENTRY",
"BriefDescription": "Level 1 data cache entering write streaming mode.This event counts for each entry into write-streaming mode"
},
{
"PublicDescription": "Level 1 data cache write streaming mode.This event counts for each cycle where the core is in write-streaming mode and not allocating writes into the L1 D-cache",
"EventCode": "0xC5",
"EventName": "L1D_WS_MODE",
"BriefDescription": "Level 1 data cache write streaming mode.This event counts for each cycle where the core is in write-streaming mode and not allocating writes into the L1 D-cache"
},
{
"PublicDescription": "Level 3 cache write streaming mode.This event counts for each cycle where the core is in write-streaming mode and not allocating writes into the L3 cache",
"EventCode": "0xC7",
"EventName": "L3D_WS_MODE",
"BriefDescription": "Level 3 cache write streaming mode.This event counts for each cycle where the core is in write-streaming mode and not allocating writes into the L3 cache"
},
{
"PublicDescription": "Level 2 TLB last-level walk cache access.This event does not count if the MMU is disabled",
"EventCode": "0xD0",
"EventName": "L2D_LLWALK_TLB",
"BriefDescription": "Level 2 TLB last-level walk cache access.This event does not count if the MMU is disabled"
},
{
"PublicDescription": "Level 2 TLB last-level walk cache refill.This event does not count if the MMU is disabled",
"EventCode": "0xD1",
"EventName": "L2D_LLWALK_TLB_REFILL",
"BriefDescription": "Level 2 TLB last-level walk cache refill.This event does not count if the MMU is disabled"
},
{
"PublicDescription": "Level 2 TLB level-2 walk cache access.This event counts accesses to the level-2 walk cache where the last-level walk cache has missed. The event only counts when the translation regime of the pagewalk uses level 2 descriptors. This event does not count if the MMU is disabled",
"EventCode": "0xD2",
"EventName": "L2D_L2WALK_TLB",
"BriefDescription": "Level 2 TLB level-2 walk cache access.This event counts accesses to the level-2 walk cache where the last-level walk cache has missed. The event only counts when the translation regime of the pagewalk uses level 2 descriptors. This event does not count if the MMU is disabled"
},
{
"PublicDescription": "Level 2 TLB level-2 walk cache refill.This event does not count if the MMU is disabled",
"EventCode": "0xD3",
"EventName": "L2D_L2WALK_TLB_REFILL",
"BriefDescription": "Level 2 TLB level-2 walk cache refill.This event does not count if the MMU is disabled"
},
{
"PublicDescription": "Level 2 TLB IPA cache access. This event counts on each access to the IPA cache. +//0 If a single pagewalk needs to make multiple accesses to the IPA cache, each access is counted. +//0 If stage 2 translation is disabled, this event does not count",
"EventCode": "0xD4",
"EventName": "L2D_S2_TLB",
"BriefDescription": "Level 2 TLB IPA cache access. This event counts on each access to the IPA cache. +//0 If a single pagewalk needs to make multiple accesses to the IPA cache, each access is counted. +//0 If stage 2 translation is disabled, this event does not count"
},
{
"PublicDescription": "Level 2 TLB IPA cache refill. This event counts on each refill of the IPA cache. +//0 If a single pagewalk needs to make multiple accesses to the IPA cache, each access which causes a refill is counted. +//0 If stage 2 translation is disabled, this event does not count",
"EventCode": "0xD5",
"EventName": "L2D_S2_TLB_REFILL",
"BriefDescription": "Level 2 TLB IPA cache refill. This event counts on each refill of the IPA cache. +//0 If a single pagewalk needs to make multiple accesses to the IPA cache, each access which causes a refill is counted. +//0 If stage 2 translation is disabled, this event does not count"
},
{
"PublicDescription": "Level 2 cache stash dropped.This event counts on each stash request received from the interconnect or ACP, that is targeting L2 and gets dropped due to lack of buffer space to hold the request",
"EventCode": "0xD6",
"EventName": "L2D_CACHE_STASH_DROPPED",
"BriefDescription": "Level 2 cache stash dropped.This event counts on each stash request received from the interconnect or ACP, that is targeting L2 and gets dropped due to lack of buffer space to hold the request"
}
]
[
{
"ArchStdEvent": "EXC_TAKEN"
},
{
"ArchStdEvent": "MEMORY_ERROR"
},
{
"ArchStdEvent": "EXC_IRQ"
},
{
"ArchStdEvent": "EXC_FIQ"
},
{
"PublicDescription": "Predecode error",
"EventCode": "0xC6",
"EventName": "PREDECODE_ERROR",
"BriefDescription": "Predecode error"
}
]
[
{
"ArchStdEvent": "SW_INCR"
},
{
"ArchStdEvent": "LD_RETIRED"
},
{
"ArchStdEvent": "ST_RETIRED"
},
{
"ArchStdEvent": "INST_RETIRED"
},
{
"ArchStdEvent": "EXC_RETURN"
},
{
"ArchStdEvent": "CID_WRITE_RETIRED"
},
{
"ArchStdEvent": "PC_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_IMMED_RETIRED"
},
{
"ArchStdEvent": "BR_RETURN_RETIRED"
},
{
"ArchStdEvent": "INST_SPEC"
},
{
"ArchStdEvent": "TTBR_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_RETIRED"
},
{
"ArchStdEvent": "BR_MIS_PRED_RETIRED"
},
{
"ArchStdEvent": "LD_SPEC"
},
{
"ArchStdEvent": "ST_SPEC"
},
{
"ArchStdEvent": "LDST_SPEC"
},
{
"ArchStdEvent": "DP_SPEC"
},
{
"ArchStdEvent": "ASE_SPEC"
},
{
"ArchStdEvent": "VFP_SPEC"
},
{
"ArchStdEvent": "PC_WRITE_SPEC"
},
{
"ArchStdEvent": "CRYPTO_SPEC"
}
]
[
{
"ArchStdEvent": "UNALIGNED_LDST_RETIRED"
},
{
"ArchStdEvent": "MEM_ACCESS"
},
{
"ArchStdEvent": "REMOTE_ACCESS_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_WR"
}
]
[
{
"ArchStdEvent": "STALL_FRONTEND"
},
{
"ArchStdEvent": "STALL_BACKEND"
},
{
"PublicDescription": "No operation issued due to the frontend, cache miss.This event counts every cycle the DPU IQ is empty and there is an instruction cache miss being processed",
"EventCode": "0xE1",
"EventName": "STALL_FRONTEND_CACHE",
"BriefDescription": "No operation issued due to the frontend, cache miss.This event counts every cycle the DPU IQ is empty and there is an instruction cache miss being processed"
},
{
"PublicDescription": "No operation issued due to the frontend, TLB miss.This event counts every cycle the DPU IQ is empty and there is an instruction L1 TLB miss being processed",
"EventCode": "0xE2",
"EventName": "STALL_FRONTEND_TLB",
"BriefDescription": "No operation issued due to the frontend, TLB miss.This event counts every cycle the DPU IQ is empty and there is an instruction L1 TLB miss being processed"
},
{
"PublicDescription": "No operation issued due to the frontend, pre-decode error.This event counts every cycle the DPU IQ is empty and there is a pre-decode error being processed",
"EventCode": "0xE3",
"EventName": "STALL_FRONTEND_PDERR",
"BriefDescription": "No operation issued due to the frontend, pre-decode error.This event counts every cycle the DPU IQ is empty and there is a pre-decode error being processed"
},
{
"PublicDescription": "No operation issued due to the backend interlock.This event counts every cycle that issue is stalled and there is an interlock. Stall cycles due to a stall in Wr (typically awaiting load data) are excluded",
"EventCode": "0xE4",
"EventName": "STALL_BACKEND_ILOCK",
"BriefDescription": "No operation issued due to the backend interlock.This event counts every cycle that issue is stalled and there is an interlock. Stall cycles due to a stall in Wr (typically awaiting load data) are excluded"
},
{
"PublicDescription": "No operation issued due to the backend, interlock, AGU.This event counts every cycle that issue is stalled and there is an interlock that is due to a load/store instruction waiting for data to calculate the address in the AGU. Stall cycles due to a stall in Wr (typically awaiting load data) are excluded",
"EventCode": "0xE5",
"EventName": "STALL_BACKEND_ILOCK_AGU",
"BriefDescription": "No operation issued due to the backend, interlock, AGU.This event counts every cycle that issue is stalled and there is an interlock that is due to a load/store instruction waiting for data to calculate the address in the AGU. Stall cycles due to a stall in Wr (typically awaiting load data) are excluded"
},
{
"PublicDescription": "No operation issued due to the backend, interlock, FPU.This event counts every cycle that issue is stalled and there is an interlock that is due to an FPU/NEON instruction. Stall cycles due to a stall in the Wr stage (typically awaiting load data) are excluded",
"EventCode": "0xE6",
"EventName": "STALL_BACKEND_ILOCK_FPU",
"BriefDescription": "No operation issued due to the backend, interlock, FPU.This event counts every cycle that issue is stalled and there is an interlock that is due to an FPU/NEON instruction. Stall cycles due to a stall in the Wr stage (typically awaiting load data) are excluded"
},
{
"PublicDescription": "No operation issued due to the backend, load.This event counts every cycle there is a stall in the Wr stage due to a load",
"EventCode": "0xE7",
"EventName": "STALL_BACKEND_LD",
"BriefDescription": "No operation issued due to the backend, load.This event counts every cycle there is a stall in the Wr stage due to a load"
},
{
"PublicDescription": "No operation issued due to the backend, store.This event counts every cycle there is a stall in the Wr stage due to a store",
"EventCode": "0xE8",
"EventName": "STALL_BACKEND_ST",
"BriefDescription": "No operation issued due to the backend, store.This event counts every cycle there is a stall in the Wr stage due to a store"
},
{
"PublicDescription": "No operation issued due to the backend, load, cache miss.This event counts every cycle there is a stall in the Wr stage due to a load which is waiting on data (due to missing the cache or being non-cacheable)",
"EventCode": "0xE9",
"EventName": "STALL_BACKEND_LD_CACHE",
"BriefDescription": "No operation issued due to the backend, load, cache miss.This event counts every cycle there is a stall in the Wr stage due to a load which is waiting on data (due to missing the cache or being non-cacheable)"
},
{
"PublicDescription": "No operation issued due to the backend, load, TLB miss.This event counts every cycle there is a stall in the Wr stage due to a load which has missed in the L1 TLB",
"EventCode": "0xEA",
"EventName": "STALL_BACKEND_LD_TLB",
"BriefDescription": "No operation issued due to the backend, load, TLB miss.This event counts every cycle there is a stall in the Wr stage due to a load which has missed in the L1 TLB"
},
{
"PublicDescription": "No operation issued due to the backend, store, STB full.This event counts every cycle there is a stall in the Wr stage due to a store which is waiting due to the STB being full",
"EventCode": "0xEB",
"EventName": "STALL_BACKEND_ST_STB",
"BriefDescription": "No operation issued due to the backend, store, STB full.This event counts every cycle there is a stall in the Wr stage due to a store which is waiting due to the STB being full"
},
{
"PublicDescription": "No operation issued due to the backend, store, TLB miss.This event counts every cycle there is a stall in the Wr stage due to a store which has missed in the L1 TLB",
"EventCode": "0xEC",
"EventName": "STALL_BACKEND_ST_TLB",
"BriefDescription": "No operation issued due to the backend, store, TLB miss.This event counts every cycle there is a stall in the Wr stage due to a store which has missed in the L1 TLB"
}
]
[
{
"ArchStdEvent": "BR_MIS_PRED"
},
{
"ArchStdEvent": "BR_PRED"
},
{
"ArchStdEvent": "BR_IMMED_SPEC"
},
{
"ArchStdEvent": "BR_RETURN_SPEC"
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC"
}
]
[
{
"ArchStdEvent": "CPU_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS"
},
{
"ArchStdEvent": "BUS_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS_RD"
},
{
"ArchStdEvent": "BUS_ACCESS_WR"
},
{
"ArchStdEvent": "BUS_ACCESS_SHARED"
},
{
"ArchStdEvent": "BUS_ACCESS_NOT_SHARED"
},
{
"ArchStdEvent": "BUS_ACCESS_NORMAL"
},
{
"ArchStdEvent": "BUS_ACCESS_PERIPH"
}
]
[
{
"ArchStdEvent": "L1I_CACHE_REFILL"
},
{
"ArchStdEvent": "L1I_TLB_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE"
},
{
"ArchStdEvent": "L1D_TLB_REFILL"
},
{
"ArchStdEvent": "L1I_CACHE"
},
{
"ArchStdEvent": "L1D_CACHE_WB"
},
{
"ArchStdEvent": "L2D_CACHE"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL"
},
{
"ArchStdEvent": "L2D_CACHE_WB"
},
{
"ArchStdEvent": "L1D_CACHE_RD"
},
{
"ArchStdEvent": "L1D_CACHE_WR"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_RD"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_WR"
},
{
"ArchStdEvent": "L1D_CACHE_WB_VICTIM"
},
{
"ArchStdEvent": "L1D_CACHE_WB_CLEAN"
},
{
"ArchStdEvent": "L1D_CACHE_INVAL"
},
{
"ArchStdEvent": "L1D_TLB_REFILL_RD"
},
{
"ArchStdEvent": "L1D_TLB_REFILL_WR"
},
{
"ArchStdEvent": "L2D_CACHE_RD"
},
{
"ArchStdEvent": "L2D_CACHE_WR"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_RD"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_WR"
},
{
"ArchStdEvent": "L2D_CACHE_WB_VICTIM"
},
{
"ArchStdEvent": "L2D_CACHE_WB_CLEAN"
},
{
"ArchStdEvent": "L2D_CACHE_INVAL"
}
]
[
{
"ArchStdEvent": "EXC_TAKEN"
},
{
"ArchStdEvent": "MEMORY_ERROR"
},
{
"ArchStdEvent": "EXC_UNDEF"
},
{
"ArchStdEvent": "EXC_SVC"
},
{
"ArchStdEvent": "EXC_PABORT"
},
{
"ArchStdEvent": "EXC_DABORT"
},
{
"ArchStdEvent": "EXC_IRQ"
},
{
"ArchStdEvent": "EXC_FIQ"
},
{
"ArchStdEvent": "EXC_SMC"
},
{
"ArchStdEvent": "EXC_HVC"
},
{
"ArchStdEvent": "EXC_TRAP_PABORT"
},
{
"ArchStdEvent": "EXC_TRAP_DABORT"
},
{
"ArchStdEvent": "EXC_TRAP_OTHER"
},
{
"ArchStdEvent": "EXC_TRAP_IRQ"
},
{
"ArchStdEvent": "EXC_TRAP_FIQ"
}
]
[
{
"ArchStdEvent": "SW_INCR"
},
{
"ArchStdEvent": "INST_RETIRED"
},
{
"ArchStdEvent": "EXC_RETURN"
},
{
"ArchStdEvent": "CID_WRITE_RETIRED"
},
{
"ArchStdEvent": "INST_SPEC"
},
{
"ArchStdEvent": "TTBR_WRITE_RETIRED"
},
{
"ArchStdEvent": "LDREX_SPEC"
},
{
"ArchStdEvent": "STREX_PASS_SPEC"
},
{
"ArchStdEvent": "STREX_FAIL_SPEC"
},
{
"ArchStdEvent": "LD_SPEC"
},
{
"ArchStdEvent": "ST_SPEC"
},
{
"ArchStdEvent": "LDST_SPEC"
},
{
"ArchStdEvent": "DP_SPEC"
},
{
"ArchStdEvent": "ASE_SPEC"
},
{
"ArchStdEvent": "VFP_SPEC"
},
{
"ArchStdEvent": "PC_WRITE_SPEC"
},
{
"ArchStdEvent": "CRYPTO_SPEC"
},
{
"ArchStdEvent": "ISB_SPEC"
},
{
"ArchStdEvent": "DSB_SPEC"
},
{
"ArchStdEvent": "DMB_SPEC"
},
{
"ArchStdEvent": "RC_LD_SPEC"
},
{
"ArchStdEvent": "RC_ST_SPEC"
}
]
[
{
"ArchStdEvent": "MEM_ACCESS"
},
{
"ArchStdEvent": "MEM_ACCESS_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_WR"
},
{
"ArchStdEvent": "UNALIGNED_LD_SPEC"
},
{
"ArchStdEvent": "UNALIGNED_ST_SPEC"
},
{
"ArchStdEvent": "UNALIGNED_LDST_SPEC"
}
]
[
{
"ArchStdEvent": "BR_MIS_PRED"
},
{
"ArchStdEvent": "BR_PRED"
},
{
"ArchStdEvent": "BR_IMMED_SPEC"
},
{
"ArchStdEvent": "BR_RETURN_SPEC"
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC"
}
]
[
{
"ArchStdEvent": "CPU_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS"
},
{
"ArchStdEvent": "BUS_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS_RD"
},
{
"ArchStdEvent": "BUS_ACCESS_WR"
}
]
This diff is collapsed.
[
{
"PublicDescription": "Instruction retired, indirect branch, mispredicted",
"EventCode": "0xE9",
"EventName": "DPU_BR_IND_MIS",
"BriefDescription": "Instruction retired, indirect branch, mispredicted"
},
{
"PublicDescription": "Instruction retired, conditional branch, mispredicted",
"EventCode": "0xEA",
"EventName": "DPU_BR_COND_MIS",
"BriefDescription": "Instruction retired, conditional branch, mispredicted"
},
{
"PublicDescription": "Memory error (any type) from IFU",
"EventCode": "0xEB",
"EventName": "DPU_MEM_ERR_IFU",
"BriefDescription": "Memory error (any type) from IFU"
},
{
"PublicDescription": "Memory error (any type) from DCU",
"EventCode": "0xEC",
"EventName": "DPU_MEM_ERR_DCU",
"BriefDescription": "Memory error (any type) from DCU"
},
{
"PublicDescription": "Memory error (any type) from TLB",
"EventCode": "0xED",
"EventName": "DPU_MEM_ERR_TLB",
"BriefDescription": "Memory error (any type) from TLB"
}
]
[
{
"ArchStdEvent": "EXC_TAKEN"
},
{
"ArchStdEvent": "MEMORY_ERROR"
},
{
"ArchStdEvent": "EXC_IRQ"
},
{
"ArchStdEvent": "EXC_FIQ"
}
]
[
{
"PublicDescription": "I-Cache miss on an access from the prefetch block",
"EventCode": "0xD0",
"EventName": "IFU_IC_MISS_WAIT",
"BriefDescription": "I-Cache miss on an access from the prefetch block"
},
{
"PublicDescription": "Counts the cycles spent on a request for Level 2 TLB lookup after a Level 1l ITLB miss",
"EventCode": "0xD1",
"EventName": "IFU_IUTLB_MISS_WAIT",
"BriefDescription": "Counts the cycles spent on a request for Level 2 TLB lookup after a Level 1l ITLB miss"
},
{
"PublicDescription": "Micro-predictor conditional/direction mispredict, with respect to. if3/if4 predictor",
"EventCode": "0xD2",
"EventName": "IFU_MICRO_COND_MISPRED",
"BriefDescription": "Micro-predictor conditional/direction mispredict, with respect to. if3/if4 predictor"
},
{
"PublicDescription": "Micro-predictor address mispredict, with respect to if3/if4 predictor",
"EventCode": "0xD3",
"EventName": "IFU_MICRO_CADDR_MISPRED",
"BriefDescription": "Micro-predictor address mispredict, with respect to if3/if4 predictor"
},
{
"PublicDescription": "Micro-predictor hit with immediate redirect",
"EventCode": "0xD4",
"EventName": "IFU_MICRO_HIT",
"BriefDescription": "Micro-predictor hit with immediate redirect"
},
{
"PublicDescription": "Micro-predictor negative cache hit",
"EventCode": "0xD6",
"EventName": "IFU_MICRO_NEG_HIT",
"BriefDescription": "Micro-predictor negative cache hit"
},
{
"PublicDescription": "Micro-predictor correction",
"EventCode": "0xD7",
"EventName": "IFU_MICRO_CORRECTION",
"BriefDescription": "Micro-predictor correction"
},
{
"PublicDescription": "A 2nd instruction could have been pushed but was not because it was nonsequential",
"EventCode": "0xD8",
"EventName": "IFU_MICRO_NO_INSTR1",
"BriefDescription": "A 2nd instruction could have been pushed but was not because it was nonsequential"
},
{
"PublicDescription": "Micro-predictor miss",
"EventCode": "0xD9",
"EventName": "IFU_MICRO_NO_PRED",
"BriefDescription": "Micro-predictor miss"
},
{
"PublicDescription": "Thread flushed due to TLB miss",
"EventCode": "0xDA",
"EventName": "IFU_FLUSHED_TLB_MISS",
"BriefDescription": "Thread flushed due to TLB miss"
},
{
"PublicDescription": "Thread flushed due to reasons other than TLB miss",
"EventCode": "0xDB",
"EventName": "IFU_FLUSHED_EXCL_TLB_MISS",
"BriefDescription": "Thread flushed due to reasons other than TLB miss"
},
{
"PublicDescription": "This thread and the other thread both ready for scheduling in if0",
"EventCode": "0xDC",
"EventName": "IFU_ALL_THRDS_RDY",
"BriefDescription": "This thread and the other thread both ready for scheduling in if0"
},
{
"PublicDescription": "This thread was arbitrated when the other thread was also ready for scheduling",
"EventCode": "0xDD",
"EventName": "IFU_WIN_ARB_OTHER_RDY",
"BriefDescription": "This thread was arbitrated when the other thread was also ready for scheduling"
},
{
"PublicDescription": "This thread was arbitrated when the other thread was also active, but not necessarily ready. For example, waiting for I-Cache or TLB",
"EventCode": "0xDE",
"EventName": "IFU_WIN_ARB_OTHER_ACT",
"BriefDescription": "This thread was arbitrated when the other thread was also active, but not necessarily ready. For example, waiting for I-Cache or TLB"
},
{
"PublicDescription": "This thread was not arbitrated because it was not ready for scheduling. For example, due to a cache miss or TLB miss",
"EventCode": "0xDF",
"EventName": "IFU_NOT_RDY_FOR_ARB",
"BriefDescription": "This thread was not arbitrated because it was not ready for scheduling. For example, due to a cache miss or TLB miss"
},
{
"PublicDescription": "The thread moved from an active state to an inactive state (long-term sleep state, causing deallocation of some resources)",
"EventCode": "0xE0",
"EventName": "IFU_GOTO_IDLE",
"BriefDescription": "The thread moved from an active state to an inactive state (long-term sleep state, causing deallocation of some resources)"
},
{
"PublicDescription": "I-Cache lookup under miss from other thread",
"EventCode": "0xE1",
"EventName": "IFU_IC_LOOKUP_UNDER_MISS",
"BriefDescription": "I-Cache lookup under miss from other thread"
},
{
"PublicDescription": "I-Cache miss under miss from other thread",
"EventCode": "0xE2",
"EventName": "IFU_IC_MISS_UNDER_MISS",
"BriefDescription": "I-Cache miss under miss from other thread"
},
{
"PublicDescription": "This thread pushed an instruction into the IQ",
"EventCode": "0xE3",
"EventName": "IFU_INSTR_PUSHED",
"BriefDescription": "This thread pushed an instruction into the IQ"
},
{
"PublicDescription": "I-Cache Speculative line fill",
"EventCode": "0xE4",
"EventName": "IFU_IC_LF_SP",
"BriefDescription": "I-Cache Speculative line fill"
}
]
[
{
"ArchStdEvent": "SW_INCR"
},
{
"ArchStdEvent": "LD_RETIRED"
},
{
"ArchStdEvent": "ST_RETIRED"
},
{
"ArchStdEvent": "INST_RETIRED"
},
{
"ArchStdEvent": "EXC_RETURN"
},
{
"ArchStdEvent": "CID_WRITE_RETIRED"
},
{
"ArchStdEvent": "PC_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_IMMED_RETIRED"
},
{
"ArchStdEvent": "BR_RETURN_RETIRED"
},
{
"ArchStdEvent": "INST_SPEC"
},
{
"ArchStdEvent": "TTBR_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_RETIRED"
},
{
"ArchStdEvent": "BR_MIS_PRED_RETIRED"
},
{
"ArchStdEvent": "LD_SPEC"
},
{
"ArchStdEvent": "ST_SPEC"
},
{
"ArchStdEvent": "LDST_SPEC"
},
{
"ArchStdEvent": "DP_SPEC"
},
{
"ArchStdEvent": "ASE_SPEC"
},
{
"ArchStdEvent": "VFP_SPEC"
},
{
"ArchStdEvent": "CRYPTO_SPEC"
},
{
"ArchStdEvent": "ISB_SPEC"
},
{
"PublicDescription": "Instruction retired, conditional branch",
"EventCode": "0xE8",
"EventName": "DPU_BR_COND_RETIRED",
"BriefDescription": "Instruction retired, conditional branch"
}
]
[
{
"ArchStdEvent": "MEM_ACCESS"
},
{
"ArchStdEvent": "REMOTE_ACCESS_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_WR"
},
{
"ArchStdEvent": "UNALIGNED_LD_SPEC"
},
{
"ArchStdEvent": "UNALIGNED_ST_SPEC"
},
{
"ArchStdEvent": "UNALIGNED_LDST_SPEC"
},
{
"PublicDescription": "External memory request",
"EventCode": "0xC1",
"EventName": "BIU_EXT_MEM_REQ",
"BriefDescription": "External memory request"
},
{
"PublicDescription": "External memory request to non-cacheable memory",
"EventCode": "0xC2",
"EventName": "BIU_EXT_MEM_REQ_NC",
"BriefDescription": "External memory request to non-cacheable memory"
}
]
[
{
"ArchStdEvent": "STALL_FRONTEND"
},
{
"ArchStdEvent": "STALL_BACKEND"
}
]
[
{
"ArchStdEvent": "BR_MIS_PRED"
},
{
"ArchStdEvent": "BR_PRED"
},
{
"ArchStdEvent": "BR_IMMED_SPEC"
},
{
"ArchStdEvent": "BR_RETURN_SPEC"
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC"
}
]
[
{
"ArchStdEvent": "CPU_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS"
},
{
"ArchStdEvent": "BUS_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS_RD"
},
{
"ArchStdEvent": "BUS_ACCESS_WR"
},
{
"ArchStdEvent": "CNT_CYCLES"
}
]
[ [
{ {
"ArchStdEvent": "L1D_CACHE_RD" "ArchStdEvent": "L1I_CACHE_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE_WR"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_RD"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_WR"
},
{
"ArchStdEvent": "L1D_CACHE_WB_VICTIM"
},
{
"ArchStdEvent": "L1D_CACHE_WB_CLEAN"
},
{
"ArchStdEvent": "L1D_CACHE_INVAL"
},
{
"ArchStdEvent": "L1D_TLB_REFILL_RD"
},
{
"ArchStdEvent": "L1D_TLB_REFILL_WR"
}, },
{ {
"ArchStdEvent": "L2D_CACHE_RD" "ArchStdEvent": "L1I_TLB_REFILL"
}, },
{ {
"ArchStdEvent": "L2D_CACHE_WR" "ArchStdEvent": "L1D_CACHE_REFILL"
}, },
{ {
"ArchStdEvent": "L2D_CACHE_REFILL_RD" "ArchStdEvent": "L1D_CACHE"
}, },
{ {
"ArchStdEvent": "L2D_CACHE_REFILL_WR" "ArchStdEvent": "L1D_TLB_REFILL"
}, },
{ {
"ArchStdEvent": "L2D_CACHE_WB_VICTIM" "ArchStdEvent": "L1I_CACHE"
}, },
{ {
"ArchStdEvent": "L2D_CACHE_WB_CLEAN" "ArchStdEvent": "L1D_CACHE_WB"
}, },
{ {
"ArchStdEvent": "L2D_CACHE_INVAL" "ArchStdEvent": "L2D_CACHE"
}, },
{ {
"ArchStdEvent": "BUS_ACCESS_RD" "ArchStdEvent": "L2D_CACHE_REFILL"
}, },
{ {
"ArchStdEvent": "BUS_ACCESS_WR" "ArchStdEvent": "L2D_CACHE_WB"
}, },
{ {
"ArchStdEvent": "BUS_ACCESS_SHARED" "ArchStdEvent": "L2D_CACHE_ALLOCATE"
}, },
{ {
"ArchStdEvent": "BUS_ACCESS_NOT_SHARED" "ArchStdEvent": "L1D_TLB"
}, },
{ {
"ArchStdEvent": "BUS_ACCESS_NORMAL" "ArchStdEvent": "L1I_TLB"
}, },
{ {
"ArchStdEvent": "BUS_ACCESS_PERIPH" "ArchStdEvent": "L3D_CACHE_ALLOCATE"
}, },
{ {
"ArchStdEvent": "MEM_ACCESS_RD" "ArchStdEvent": "L3D_CACHE_REFILL"
}, },
{ {
"ArchStdEvent": "MEM_ACCESS_WR" "ArchStdEvent": "L3D_CACHE"
}, },
{ {
"ArchStdEvent": "UNALIGNED_LD_SPEC" "ArchStdEvent": "L2D_TLB_REFILL"
}, },
{ {
"ArchStdEvent": "UNALIGNED_ST_SPEC" "ArchStdEvent": "L2D_TLB"
}, },
{ {
"ArchStdEvent": "UNALIGNED_LDST_SPEC" "ArchStdEvent": "DTLB_WALK"
}, },
{ {
"ArchStdEvent": "LDREX_SPEC" "ArchStdEvent": "ITLB_WALK"
}, },
{ {
"ArchStdEvent": "STREX_PASS_SPEC" "ArchStdEvent": "LL_CACHE_RD"
}, },
{ {
"ArchStdEvent": "STREX_FAIL_SPEC" "ArchStdEvent": "LL_CACHE_MISS_RD"
}, },
{ {
"ArchStdEvent": "LD_SPEC" "ArchStdEvent": "L1D_CACHE_LMISS_RD"
}, },
{ {
"ArchStdEvent": "ST_SPEC" "ArchStdEvent": "L1D_CACHE_RD"
}, },
{ {
"ArchStdEvent": "LDST_SPEC" "ArchStdEvent": "L1D_CACHE_WR"
}, },
{ {
"ArchStdEvent": "DP_SPEC" "ArchStdEvent": "L1D_CACHE_REFILL_RD"
}, },
{ {
"ArchStdEvent": "ASE_SPEC" "ArchStdEvent": "L1D_CACHE_REFILL_WR"
}, },
{ {
"ArchStdEvent": "VFP_SPEC" "ArchStdEvent": "L1D_CACHE_REFILL_INNER"
}, },
{ {
"ArchStdEvent": "PC_WRITE_SPEC" "ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
}, },
{ {
"ArchStdEvent": "CRYPTO_SPEC" "ArchStdEvent": "L1D_CACHE_WB_VICTIM"
}, },
{ {
"ArchStdEvent": "BR_IMMED_SPEC" "ArchStdEvent": "L1D_CACHE_WB_CLEAN"
}, },
{ {
"ArchStdEvent": "BR_RETURN_SPEC" "ArchStdEvent": "L1D_CACHE_INVAL"
}, },
{ {
"ArchStdEvent": "BR_INDIRECT_SPEC" "ArchStdEvent": "L1D_TLB_REFILL_RD"
}, },
{ {
"ArchStdEvent": "ISB_SPEC" "ArchStdEvent": "L1D_TLB_REFILL_WR"
}, },
{ {
"ArchStdEvent": "DSB_SPEC" "ArchStdEvent": "L1D_TLB_RD"
}, },
{ {
"ArchStdEvent": "DMB_SPEC" "ArchStdEvent": "L1D_TLB_WR"
}, },
{ {
"ArchStdEvent": "EXC_UNDEF" "ArchStdEvent": "L2D_CACHE_RD"
}, },
{ {
"ArchStdEvent": "EXC_SVC" "ArchStdEvent": "L2D_CACHE_WR"
}, },
{ {
"ArchStdEvent": "EXC_PABORT" "ArchStdEvent": "L2D_CACHE_REFILL_RD"
}, },
{ {
"ArchStdEvent": "EXC_DABORT" "ArchStdEvent": "L2D_CACHE_REFILL_WR"
}, },
{ {
"ArchStdEvent": "EXC_IRQ" "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
}, },
{ {
"ArchStdEvent": "EXC_FIQ" "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
}, },
{ {
"ArchStdEvent": "EXC_SMC" "ArchStdEvent": "L2D_CACHE_INVAL"
}, },
{ {
"ArchStdEvent": "EXC_HVC" "ArchStdEvent": "L2D_TLB_REFILL_RD"
}, },
{ {
"ArchStdEvent": "EXC_TRAP_PABORT" "ArchStdEvent": "L2D_TLB_REFILL_WR"
}, },
{ {
"ArchStdEvent": "EXC_TRAP_DABORT" "ArchStdEvent": "L2D_TLB_RD"
}, },
{ {
"ArchStdEvent": "EXC_TRAP_OTHER" "ArchStdEvent": "L2D_TLB_WR"
}, },
{ {
"ArchStdEvent": "EXC_TRAP_IRQ" "ArchStdEvent": "L3D_CACHE_RD"
}, },
{ {
"ArchStdEvent": "EXC_TRAP_FIQ" "ArchStdEvent": "L1I_CACHE_LMISS"
}, },
{ {
"ArchStdEvent": "RC_LD_SPEC" "ArchStdEvent": "L2D_CACHE_LMISS_RD"
}, },
{ {
"ArchStdEvent": "RC_ST_SPEC" "ArchStdEvent": "L3D_CACHE_LMISS_RD"
} }
] ]
[
{
"ArchStdEvent": "EXC_TAKEN"
},
{
"ArchStdEvent": "MEMORY_ERROR"
},
{
"ArchStdEvent": "EXC_UNDEF"
},
{
"ArchStdEvent": "EXC_SVC"
},
{
"ArchStdEvent": "EXC_PABORT"
},
{
"ArchStdEvent": "EXC_DABORT"
},
{
"ArchStdEvent": "EXC_IRQ"
},
{
"ArchStdEvent": "EXC_FIQ"
},
{
"ArchStdEvent": "EXC_SMC"
},
{
"ArchStdEvent": "EXC_HVC"
},
{
"ArchStdEvent": "EXC_TRAP_PABORT"
},
{
"ArchStdEvent": "EXC_TRAP_DABORT"
},
{
"ArchStdEvent": "EXC_TRAP_OTHER"
},
{
"ArchStdEvent": "EXC_TRAP_IRQ"
},
{
"ArchStdEvent": "EXC_TRAP_FIQ"
}
]
[
{
"ArchStdEvent": "SW_INCR"
},
{
"ArchStdEvent": "INST_RETIRED"
},
{
"ArchStdEvent": "EXC_RETURN"
},
{
"ArchStdEvent": "CID_WRITE_RETIRED"
},
{
"ArchStdEvent": "INST_SPEC"
},
{
"ArchStdEvent": "TTBR_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_RETIRED"
},
{
"ArchStdEvent": "BR_MIS_PRED_RETIRED"
},
{
"ArchStdEvent": "OP_RETIRED"
},
{
"ArchStdEvent": "OP_SPEC"
},
{
"ArchStdEvent": "LDREX_SPEC"
},
{
"ArchStdEvent": "STREX_PASS_SPEC"
},
{
"ArchStdEvent": "STREX_FAIL_SPEC"
},
{
"ArchStdEvent": "STREX_SPEC"
},
{
"ArchStdEvent": "LD_SPEC"
},
{
"ArchStdEvent": "ST_SPEC"
},
{
"ArchStdEvent": "DP_SPEC"
},
{
"ArchStdEvent": "ASE_SPEC"
},
{
"ArchStdEvent": "VFP_SPEC"
},
{
"ArchStdEvent": "PC_WRITE_SPEC"
},
{
"ArchStdEvent": "CRYPTO_SPEC"
},
{
"ArchStdEvent": "ISB_SPEC"
},
{
"ArchStdEvent": "DSB_SPEC"
},
{
"ArchStdEvent": "DMB_SPEC"
},
{
"ArchStdEvent": "RC_LD_SPEC"
},
{
"ArchStdEvent": "RC_ST_SPEC"
},
{
"ArchStdEvent": "ASE_INST_SPEC"
},
{
"ArchStdEvent": "SVE_INST_SPEC"
},
{
"ArchStdEvent": "FP_HP_SPEC"
},
{
"ArchStdEvent": "FP_SP_SPEC"
},
{
"ArchStdEvent": "FP_DP_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_EMPTY_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_FULL_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_PARTIAL_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC"
},
{
"ArchStdEvent": "SVE_LDFF_SPEC"
},
{
"ArchStdEvent": "SVE_LDFF_FAULT_SPEC"
},
{
"ArchStdEvent": "FP_SCALE_OPS_SPEC"
},
{
"ArchStdEvent": "FP_FIXED_OPS_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT8_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT16_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT32_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT64_SPEC"
}
]
[
{
"ArchStdEvent": "MEM_ACCESS"
},
{
"ArchStdEvent": "REMOTE_ACCESS"
},
{
"ArchStdEvent": "MEM_ACCESS_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_WR"
},
{
"ArchStdEvent": "UNALIGNED_LD_SPEC"
},
{
"ArchStdEvent": "UNALIGNED_ST_SPEC"
},
{
"ArchStdEvent": "UNALIGNED_LDST_SPEC"
},
{
"ArchStdEvent": "LDST_ALIGN_LAT"
},
{
"ArchStdEvent": "LD_ALIGN_LAT"
},
{
"ArchStdEvent": "ST_ALIGN_LAT"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
}
]
[
{
"ArchStdEvent": "STALL_FRONTEND"
},
{
"ArchStdEvent": "STALL_BACKEND"
},
{
"ArchStdEvent": "STALL"
},
{
"ArchStdEvent": "STALL_SLOT_BACKEND"
},
{
"ArchStdEvent": "STALL_SLOT_FRONTEND"
},
{
"ArchStdEvent": "STALL_SLOT"
},
{
"ArchStdEvent": "STALL_BACKEND_MEM"
}
]
[
{
"ArchStdEvent": "TRB_WRAP"
},
{
"ArchStdEvent": "TRCEXTOUT0"
},
{
"ArchStdEvent": "TRCEXTOUT1"
},
{
"ArchStdEvent": "TRCEXTOUT2"
},
{
"ArchStdEvent": "TRCEXTOUT3"
},
{
"ArchStdEvent": "CTI_TRIGOUT4"
},
{
"ArchStdEvent": "CTI_TRIGOUT5"
},
{
"ArchStdEvent": "CTI_TRIGOUT6"
},
{
"ArchStdEvent": "CTI_TRIGOUT7"
}
]
[
{
"ArchStdEvent": "BR_MIS_PRED"
},
{
"ArchStdEvent": "BR_PRED"
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC"
}
]
[
{
"ArchStdEvent": "CPU_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS"
},
{
"ArchStdEvent": "BUS_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS_SHARED"
},
{
"ArchStdEvent": "BUS_ACCESS_NOT_SHARED"
},
{
"ArchStdEvent": "BUS_ACCESS_NORMAL"
},
{
"ArchStdEvent": "BUS_ACCESS_PERIPH"
}
]
This diff is collapsed.
[
{
"PublicDescription": "ETM trace unit output 0",
"EventCode": "0xDE",
"EventName": "ETM_EXT_OUT0",
"BriefDescription": "ETM trace unit output 0"
},
{
"PublicDescription": "ETM trace unit output 1",
"EventCode": "0xDF",
"EventName": "ETM_EXT_OUT1",
"BriefDescription": "ETM trace unit output 1"
}
]
[
{
"ArchStdEvent": "EXC_TAKEN"
},
{
"ArchStdEvent": "EXC_HVC"
},
{
"PublicDescription": "Number of Traps to hypervisor",
"EventCode": "0xDC",
"EventName": "EXC_TRAP_HYP",
"BriefDescription": "Number of Traps to hypervisor"
}
]
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment