Commit 11737ca9 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.17-20180216' of...

Merge tag 'perf-core-for-mingo-4.17-20180216' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Fix wrong jump arrow in systems with branch records with cycles,
  i.e. Intel's >= Skylake (Jin Yao)

- Fix 'perf record --per-thread' problem introduced when
  implementing 'perf stat --per-thread (Jin Yao)

- Use arch__compare_symbol_names() to fix 'perf test vmlinux',
  that was using strcmp(symbol names) while the dso routines
  doing symbol lookups used the arch overridable one, making
  this test fail in architectures that overrided that function
  with something other than strcmp() (Jiri Olsa)

- Add 'perf script --show-round-event' to display
  PERF_RECORD_FINISHED_ROUND entries (Jiri Olsa)

- Fix dwarf unwind for stripped binaries in 'perf test' (Jiri Olsa)

- Use ordered_events for 'perf report --tasks', otherwise we may get
  artifacts when PERF_RECORD_FORK gets processed before PERF_RECORD_COMM
  (when they got recorded in different CPUs) (Jiri Olsa)

- Add support to display group output for non group events, i.e.
  now when one uses 'perf report --group' on a perf.data file
  recorded without explicitly grouping events with {} (e.g.
  "perf record -e '{cycles,instructions}'" get the same output
  that would produce, i.e. see all those non-grouped events in
  multiple columns, at the same time (Jiri Olsa)

- Skip non-address kallsyms entries, e.g. '(null)' for !root (Jiri Olsa)

- Kernel maps fixes wrt perf.data(report) versus live system (top)
  (Jiri Olsa)

- Fix memory corruption when using 'perf record -j call -g -a <application>'
  followed by 'perf report --branch-history' (Jiri Olsa)

- ARM CoreSight fixes (Mathieu Poirier)

- Add inject capability for CoreSight Traces (Robert Waker)

- Update documentation for use of 'perf' + ARM CoreSight (Robert Walker)

- Man pages fixes (Sangwon Hong, Jaecheol Shin)

- Fix some 'perf test' cases on s/390 and x86_64 (some backtraces
  changed with a glibc update) (Thomas Richter)

- Add detailed CPUID info in the 'perf.data' headers for s/390 to
  then use it in 'perf annotate' (Thomas Richter)

- Add '--interval-count N' to 'perf stat', to use with -I, i.e.
  'perf stat -I 1000 --interval-count 2' will show stats every
   1000ms, two times (yuzhoujian)

- Add 'perf stat --timeout Nms', that will run for that many
  milliseconds and then stop, printing the counters (yuzhoujian)

- Fix description for 'perf report --mem-modex (Andi Kleen)

- Use a wildcard to remove the vfs_getname probe in the
  'perf test' shell based test cases (Arnaldo Carvalho de Melo)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 7057bb97 21316ac6
......@@ -330,3 +330,54 @@ Details on how to use the generic STM API can be found here [2].
[1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
[2]. Documentation/trace/stm.txt
Using perf tools
----------------
perf can be used to record and analyze trace of programs.
Execution can be recorded using 'perf record' with the cs_etm event,
specifying the name of the sink to record to, e.g:
perf record -e cs_etm/@20070000.etr/u --per-thread
The 'perf report' and 'perf script' commands can be used to analyze execution,
synthesizing instruction and branch events from the instruction trace.
'perf inject' can be used to replace the trace data with the synthesized events.
The --itrace option controls the type and frequency of synthesized events
(see perf documentation).
Note that only 64-bit programs are currently supported - further work is
required to support instruction decode of 32-bit Arm programs.
Generating coverage files for Feedback Directed Optimization: AutoFDO
---------------------------------------------------------------------
'perf inject' accepts the --itrace option in which case tracing data is
removed and replaced with the synthesized events. e.g.
perf inject --itrace --strip -i perf.data -o perf.data.new
Below is an example of using ARM ETM for autoFDO. It requires autofdo
(https://github.com/google/autofdo) and gcc version 5. The bubble
sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial).
$ gcc-5 -O3 sort.c -o sort
$ taskset -c 2 ./sort
Bubble sorting array of 30000 elements
5910 ms
$ perf record -e cs_etm/@20070000.etr/u --per-thread taskset -c 2 ./sort
Bubble sorting array of 30000 elements
12543 ms
[ perf record: Woken up 35 times to write data ]
[ perf record: Captured and wrote 69.640 MB perf.data ]
$ perf inject -i perf.data -o inj.data --itrace=il64 --strip
$ create_gcov --binary=./sort --profile=inj.data --gcov=sort.gcov -gcov_version=1
$ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
$ taskset -c 2 ./sort_autofdo
Bubble sorting array of 30000 elements
5806 ms
This diff is collapsed.
......@@ -315,12 +315,8 @@ int filename__read_int(const char *filename, int *value)
return err;
}
/*
* Parses @value out of @filename with strtoull.
* By using 0 for base, the strtoull detects the
* base automatically (see man strtoull).
*/
int filename__read_ull(const char *filename, unsigned long long *value)
static int filename__read_ull_base(const char *filename,
unsigned long long *value, int base)
{
char line[64];
int fd = open(filename, O_RDONLY), err = -1;
......@@ -329,7 +325,7 @@ int filename__read_ull(const char *filename, unsigned long long *value)
return -1;
if (read(fd, line, sizeof(line)) > 0) {
*value = strtoull(line, NULL, 0);
*value = strtoull(line, NULL, base);
if (*value != ULLONG_MAX)
err = 0;
}
......@@ -338,6 +334,25 @@ int filename__read_ull(const char *filename, unsigned long long *value)
return err;
}
/*
* Parses @value out of @filename with strtoull.
* By using 16 for base to treat the number as hex.
*/
int filename__read_xll(const char *filename, unsigned long long *value)
{
return filename__read_ull_base(filename, value, 16);
}
/*
* Parses @value out of @filename with strtoull.
* By using 0 for base, the strtoull detects the
* base automatically (see man strtoull).
*/
int filename__read_ull(const char *filename, unsigned long long *value)
{
return filename__read_ull_base(filename, value, 0);
}
#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */
int filename__read_str(const char *filename, char **buf, size_t *sizep)
......@@ -417,7 +432,8 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep)
return filename__read_str(path, buf, sizep);
}
int sysfs__read_ull(const char *entry, unsigned long long *value)
static int sysfs__read_ull_base(const char *entry,
unsigned long long *value, int base)
{
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
......@@ -427,7 +443,17 @@ int sysfs__read_ull(const char *entry, unsigned long long *value)
snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
return filename__read_ull(path, value);
return filename__read_ull_base(path, value, base);
}
int sysfs__read_xll(const char *entry, unsigned long long *value)
{
return sysfs__read_ull_base(entry, value, 16);
}
int sysfs__read_ull(const char *entry, unsigned long long *value)
{
return sysfs__read_ull_base(entry, value, 0);
}
int sysfs__read_int(const char *entry, int *value)
......
......@@ -30,6 +30,7 @@ FS(bpf_fs)
int filename__read_int(const char *filename, int *value);
int filename__read_ull(const char *filename, unsigned long long *value);
int filename__read_xll(const char *filename, unsigned long long *value);
int filename__read_str(const char *filename, char **buf, size_t *sizep);
int filename__write_int(const char *filename, int value);
......@@ -39,6 +40,7 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysctl__read_int(const char *sysctl, int *value);
int sysfs__read_int(const char *entry, int *value);
int sysfs__read_ull(const char *entry, unsigned long long *value);
int sysfs__read_xll(const char *entry, unsigned long long *value);
int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysfs__read_bool(const char *entry, bool *value);
......
......@@ -38,6 +38,10 @@ int kallsyms__parse(const char *filename, void *arg,
len = hex2u64(line, &start);
/* Skip the line if we failed to parse the address. */
if (!len)
continue;
len++;
if (len + 2 >= line_len)
continue;
......
......@@ -21,7 +21,7 @@ If there is no debug info in the object, then annotated assembly is displayed.
OPTIONS
-------
-i::
--input=::
--input=<file>::
Input file name. (default: perf.data unless stdin is a fifo)
-d::
......@@ -69,7 +69,7 @@ OPTIONS
--stdio:: Use the stdio interface.
--stdio-color::
--stdio-color=<mode>::
'always', 'never' or 'auto', allowing configuring color output
via the command line, in addition to via "color.ui" .perfconfig.
Use '--stdio-color always' to generate color even when redirecting
......@@ -84,7 +84,7 @@ OPTIONS
--gtk:: Use the GTK interface.
-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
--cpu=<cpu>:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
CPUs.
......
......@@ -25,6 +25,10 @@ OPTIONS
--input=<file>::
Select the input file (default: perf.data unless stdin is a fifo)
-f::
--force::
Don't do ownership validation
-v::
--verbose::
Be more verbose. (show symbol address, etc)
......@@ -61,7 +65,7 @@ OPTIONS
default, but this option shows live (currently allocated) pages
instead. (This option works with --page option only)
--time::
--time=<start>,<stop>::
Only analyze samples within given time window: <start>,<stop>. Times
have the format seconds.microseconds. If start is not given (i.e., time
string is ',x.y') then analysis starts at the beginning of the file. If
......
......@@ -28,6 +28,10 @@ OPTIONS
<command>...::
Any command you can specify in a shell.
-f::
--force::
Don't do ownership validation
-t::
--type=::
Select the memory operation type: load or store (default: load,store)
......
......@@ -354,7 +354,8 @@ OPTIONS
Path to objdump binary.
--group::
Show event group information together.
Show event group information together. It forces group output also
if there are no groups defined in data file.
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
......@@ -367,7 +368,7 @@ OPTIONS
Use the data addresses of samples in addition to instruction addresses
to build the histograms. To generate meaningful output, the perf.data
file must have been obtained using perf record -d -W and using a
special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
special event -e cpu/mem-loads/p or -e cpu/mem-stores/p. See
'perf mem' for simpler access.
--percent-limit::
......
......@@ -303,6 +303,9 @@ OPTIONS
--show-lost-events
Display lost events i.e. events of type PERF_RECORD_LOST.
--show-round-events
Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND.
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
......
......@@ -146,6 +146,16 @@ Print count deltas every N milliseconds (minimum: 10ms)
The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
example: 'perf stat -I 1000 -e cycles -a sleep 5'
--interval-count times::
Print count deltas for fixed number of times.
This option should be used together with "-I" option.
example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
--timeout msecs::
Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
This option is not supported with the "-I" option.
example: 'perf stat --time 2000 -e cycles -a'
--metric-only::
Only print computed metrics. Print them in a single line.
Don't show any raw values. Not supported with --per-thread.
......
......@@ -27,6 +27,8 @@ NO_SYSCALL_TABLE := 1
# Additional ARCH settings for ppc
ifeq ($(SRCARCH),powerpc)
NO_PERF_REGS := 0
NO_SYSCALL_TABLE := 0
CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
endif
......
......@@ -68,7 +68,7 @@ struct auxtrace_record
bool found_spe = false;
static struct perf_pmu **arm_spe_pmus = NULL;
static int nr_spes = 0;
int i;
int i = 0;
if (!evlist)
return NULL;
......
......@@ -298,12 +298,17 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
{
int i;
int etmv3 = 0, etmv4 = 0;
const struct cpu_map *cpus = evlist->cpus;
struct cpu_map *event_cpus = evlist->cpus;
struct cpu_map *online_cpus = cpu_map__new(NULL);
/* cpu map is not empty, we have specific CPUs to work with */
if (!cpu_map__empty(cpus)) {
for (i = 0; i < cpu_map__nr(cpus); i++) {
if (cs_etm_is_etmv4(itr, cpus->map[i]))
if (!cpu_map__empty(event_cpus)) {
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(event_cpus, i) ||
!cpu_map__has(online_cpus, i))
continue;
if (cs_etm_is_etmv4(itr, i))
etmv4++;
else
etmv3++;
......@@ -311,6 +316,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
} else {
/* get configuration for all CPUs in the system */
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(online_cpus, i))
continue;
if (cs_etm_is_etmv4(itr, i))
etmv4++;
else
......@@ -318,6 +326,8 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
}
}
cpu_map__put(online_cpus);
return (CS_ETM_HEADER_SIZE +
(etmv4 * CS_ETMV4_PRIV_SIZE) +
(etmv3 * CS_ETMV3_PRIV_SIZE));
......@@ -447,7 +457,9 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
int i;
u32 offset;
u64 nr_cpu, type;
const struct cpu_map *cpus = session->evlist->cpus;
struct cpu_map *cpu_map;
struct cpu_map *event_cpus = session->evlist->cpus;
struct cpu_map *online_cpus = cpu_map__new(NULL);
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
......@@ -458,8 +470,21 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
if (!session->evlist->nr_mmaps)
return -EINVAL;
/* If the cpu_map is empty all CPUs are involved */
nr_cpu = cpu_map__empty(cpus) ? cpu__max_cpu() : cpu_map__nr(cpus);
/* If the cpu_map is empty all online CPUs are involved */
if (cpu_map__empty(event_cpus)) {
cpu_map = online_cpus;
} else {
/* Make sure all specified CPUs are online */
for (i = 0; i < cpu_map__nr(event_cpus); i++) {
if (cpu_map__has(event_cpus, i) &&
!cpu_map__has(online_cpus, i))
return -EINVAL;
}
cpu_map = event_cpus;
}
nr_cpu = cpu_map__nr(cpu_map);
/* Get PMU type as dynamically assigned by the core */
type = cs_etm_pmu->type;
......@@ -472,15 +497,11 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
offset = CS_ETM_SNAPSHOT + 1;
/* cpu map is not empty, we have specific CPUs to work with */
if (!cpu_map__empty(cpus)) {
for (i = 0; i < cpu_map__nr(cpus) && offset < priv_size; i++)
cs_etm_get_metadata(cpus->map[i], &offset, itr, info);
} else {
/* get configuration for all CPUs in the system */
for (i = 0; i < cpu__max_cpu(); i++)
for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
if (cpu_map__has(cpu_map, i))
cs_etm_get_metadata(i, &offset, itr, info);
}
cpu_map__put(online_cpus);
return 0;
}
......
......@@ -6,3 +6,28 @@ endif
HAVE_KVM_STAT_SUPPORT := 1
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
PERF_HAVE_JITDUMP := 1
#
# Syscall table generation for perf
#
out := $(OUTPUT)arch/powerpc/include/generated/asm
header32 := $(out)/syscalls_32.c
header64 := $(out)/syscalls_64.c
sysdef := $(srctree)/tools/arch/powerpc/include/uapi/asm/unistd.h
sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls/
systbl := $(sysprf)/mksyscalltbl
# Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
$(header64): $(sysdef) $(systbl)
$(Q)$(SHELL) '$(systbl)' '64' '$(CC)' $(sysdef) > $@
$(header32): $(sysdef) $(systbl)
$(Q)$(SHELL) '$(systbl)' '32' '$(CC)' $(sysdef) > $@
clean::
$(call QUIET_CLEAN, powerpc) $(RM) $(header32) $(header64)
archheaders: $(header32) $(header64)
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
#
# Generate system call table for perf. Derived from
# s390 script.
#
# Copyright IBM Corp. 2017
# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
wordsize=$1
gcc=$2
input=$3
if ! test -r $input; then
echo "Could not read input file" >&2
exit 1
fi
create_table()
{
local wordsize=$1
local max_nr
echo "static const char *syscalltbl_powerpc_${wordsize}[] = {"
while read sc nr; do
printf '\t[%d] = "%s",\n' $nr $sc
max_nr=$nr
done
echo '};'
echo "#define SYSCALLTBL_POWERPC_${wordsize}_MAX_ID $max_nr"
}
$gcc -m${wordsize} -E -dM -x c $input \
|sed -ne 's/^#define __NR_//p' \
|sort -t' ' -k2 -nu \
|create_table ${wordsize}
......@@ -23,12 +23,37 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
return ops;
}
static int s390__cpuid_parse(struct arch *arch, char *cpuid)
{
unsigned int family;
char model[16], model_c[16], cpumf_v[16], cpumf_a[16];
int ret;
/*
* cpuid string format:
* "IBM,family,model-capacity,model[,cpum_cf-version,cpum_cf-authorization]"
*/
ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%s", &family, model_c,
model, cpumf_v, cpumf_a);
if (ret >= 2) {
arch->family = family;
arch->model = 0;
return 0;
}
return -1;
}
static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
int err = 0;
if (!arch->initialized) {
arch->initialized = true;
arch->associate_instruction_ops = s390__associate_ins_ops;
if (cpuid)
err = s390__cpuid_parse(arch, cpuid);
}
return 0;
return err;
}
/*
* Implementation of get_cpuid().
*
* Copyright 2014 IBM Corp.
* Copyright IBM Corp. 2014, 2018
* Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
* Thomas Richter <tmricht@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
......@@ -13,16 +14,153 @@
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "../../util/header.h"
#include "../../util/util.h"
#define SYSINFO_MANU "Manufacturer:"
#define SYSINFO_TYPE "Type:"
#define SYSINFO_MODEL "Model:"
#define SRVLVL_CPUMF "CPU-MF:"
#define SRVLVL_VERSION "version="
#define SRVLVL_AUTHORIZATION "authorization="
#define SYSINFO "/proc/sysinfo"
#define SRVLVL "/proc/service_levels"
int get_cpuid(char *buffer, size_t sz)
{
const char *cpuid = "IBM/S390";
char *cp, *line = NULL, *line2;
char type[8], model[33], version[8], manufacturer[32], authorization[8];
int tpsize = 0, mdsize = 0, vssize = 0, mfsize = 0, atsize = 0;
int read;
unsigned long line_sz;
size_t nbytes;
FILE *sysinfo;
/*
* Scan /proc/sysinfo line by line and read out values for
* Manufacturer:, Type: and Model:, for example:
* Manufacturer: IBM
* Type: 2964
* Model: 702 N96
* The first word is the Model Capacity and the second word is
* Model (can be omitted). Both words have a maximum size of 16
* bytes.
*/
memset(manufacturer, 0, sizeof(manufacturer));
memset(type, 0, sizeof(type));
memset(model, 0, sizeof(model));
memset(version, 0, sizeof(version));
memset(authorization, 0, sizeof(authorization));
sysinfo = fopen(SYSINFO, "r");
if (sysinfo == NULL)
return -1;
while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) {
line2 = line + strlen(SYSINFO_MANU);
while ((cp = strtok_r(line2, "\n ", &line2))) {
mfsize += scnprintf(manufacturer + mfsize,
sizeof(manufacturer) - mfsize, "%s", cp);
}
}
if (!strncmp(line, SYSINFO_TYPE, strlen(SYSINFO_TYPE))) {
line2 = line + strlen(SYSINFO_TYPE);
if (strlen(cpuid) + 1 > sz)
while ((cp = strtok_r(line2, "\n ", &line2))) {
tpsize += scnprintf(type + tpsize,
sizeof(type) - tpsize, "%s", cp);
}
}
if (!strncmp(line, SYSINFO_MODEL, strlen(SYSINFO_MODEL))) {
line2 = line + strlen(SYSINFO_MODEL);
while ((cp = strtok_r(line2, "\n ", &line2))) {
mdsize += scnprintf(model + mdsize, sizeof(type) - mdsize,
"%s%s", model[0] ? "," : "", cp);
}
break;
}
}
fclose(sysinfo);
/* Missing manufacturer, type or model information should not happen */
if (!manufacturer[0] || !type[0] || !model[0])
return -1;
strcpy(buffer, cpuid);
return 0;
/*
* Scan /proc/service_levels and return the CPU-MF counter facility
* version number and authorization level.
* Optional, does not exist on z/VM guests.
*/
sysinfo = fopen(SRVLVL, "r");
if (sysinfo == NULL)
goto skip_sysinfo;
while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
if (strncmp(line, SRVLVL_CPUMF, strlen(SRVLVL_CPUMF)))
continue;
line2 = line + strlen(SRVLVL_CPUMF);
while ((cp = strtok_r(line2, "\n ", &line2))) {
if (!strncmp(cp, SRVLVL_VERSION,
strlen(SRVLVL_VERSION))) {
char *sep = strchr(cp, '=');
vssize += scnprintf(version + vssize,
sizeof(version) - vssize, "%s", sep + 1);
}
if (!strncmp(cp, SRVLVL_AUTHORIZATION,
strlen(SRVLVL_AUTHORIZATION))) {
char *sep = strchr(cp, '=');
atsize += scnprintf(authorization + atsize,
sizeof(authorization) - atsize, "%s", sep + 1);
}
}
}
fclose(sysinfo);
skip_sysinfo:
free(line);
if (version[0] && authorization[0] )
nbytes = snprintf(buffer, sz, "%s,%s,%s,%s,%s",
manufacturer, type, model, version,
authorization);
else
nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type,
model);
return (nbytes >= sz) ? -1 : 0;
}
char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
char *buf = malloc(128);
if (buf && get_cpuid(buf, 128) < 0)
zfree(&buf);
return buf;
}
/*
* Compare the cpuid string returned by get_cpuid() function
* with the name generated by the jevents file read from
* pmu-events/arch/s390/mapfile.csv.
*
* Parameter mapcpuid is the cpuid as stored in the
* pmu-events/arch/s390/mapfile.csv. This is just the type number.
* Parameter cpuid is the cpuid returned by function get_cpuid().
*/
int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
{
char *cp = strchr(cpuid, ',');
if (cp == NULL)
return -1;
return strncmp(cp + 1, mapcpuid, strlen(mapcpuid));
}
......@@ -1803,7 +1803,7 @@ int cmd_record(int argc, const char **argv)
err = target__validate(&rec->opts.target);
if (err) {
target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
ui__warning("%s", errbuf);
ui__warning("%s\n", errbuf);
}
err = target__parse_uid(&rec->opts.target);
......
......@@ -614,6 +614,7 @@ static int stats_print(struct report *rep)
static void tasks_setup(struct report *rep)
{
memset(&rep->tool, 0, sizeof(rep->tool));
rep->tool.ordered_events = true;
if (rep->mmaps_mode) {
rep->tool.mmap = perf_event__process_mmap;
rep->tool.mmap2 = perf_event__process_mmap2;
......@@ -937,6 +938,7 @@ int cmd_report(int argc, const char **argv)
"perf report [<options>]",
NULL
};
bool group_set = false;
struct report report = {
.tool = {
.sample = process_sample_event,
......@@ -1056,7 +1058,7 @@ int cmd_report(int argc, const char **argv)
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &group_set,
"Show event group information together"),
OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
"use branch records for per branch histogram filling",
......@@ -1173,6 +1175,9 @@ int cmd_report(int argc, const char **argv)
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
if (group_set && !session->evlist->nr_groups)
perf_evlist__set_leader(session->evlist);
if (itrace_synth_opts.last_branch)
has_br_stack = true;
......
......@@ -1489,6 +1489,7 @@ struct perf_script {
bool show_switch_events;
bool show_namespace_events;
bool show_lost_events;
bool show_round_events;
bool allocated;
bool per_event_dump;
struct cpu_map *cpus;
......@@ -2104,6 +2105,16 @@ process_lost_event(struct perf_tool *tool,
return 0;
}
static int
process_finished_round_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct ordered_events *oe __maybe_unused)
{
perf_event__fprintf(event, stdout);
return 0;
}
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
......@@ -2200,6 +2211,10 @@ static int __cmd_script(struct perf_script *script)
script->tool.namespaces = process_namespaces_event;
if (script->show_lost_events)
script->tool.lost = process_lost_event;
if (script->show_round_events) {
script->tool.ordered_events = false;
script->tool.finished_round = process_finished_round_event;
}
if (perf_script__setup_per_event_dump(script)) {
pr_err("Couldn't create the per event dump files\n");
......@@ -3139,6 +3154,8 @@ int cmd_script(int argc, const char **argv)
"Show namespace events (if recorded)"),
OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
"Show lost events (if recorded)"),
OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
"Show round events (if recorded)"),
OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
"Dump trace output to files named by the monitored events"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
......
......@@ -168,6 +168,7 @@ static struct timespec ref_time;
static struct cpu_map *aggr_map;
static aggr_get_id_t aggr_get_id;
static bool append_file;
static bool interval_count;
static const char *output_name;
static int output_fd;
static int print_free_counters_hint;
......@@ -571,6 +572,8 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
static int __run_perf_stat(int argc, const char **argv)
{
int interval = stat_config.interval;
int times = stat_config.times;
int timeout = stat_config.timeout;
char msg[BUFSIZ];
unsigned long long t0, t1;
struct perf_evsel *counter;
......@@ -584,6 +587,9 @@ static int __run_perf_stat(int argc, const char **argv)
if (interval) {
ts.tv_sec = interval / USEC_PER_MSEC;
ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
} else if (timeout) {
ts.tv_sec = timeout / USEC_PER_MSEC;
ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
} else {
ts.tv_sec = 1;
ts.tv_nsec = 0;
......@@ -696,10 +702,14 @@ static int __run_perf_stat(int argc, const char **argv)
perf_evlist__start_workload(evsel_list);
enable_counters();
if (interval) {
if (interval || timeout) {
while (!waitpid(child_pid, &status, WNOHANG)) {
nanosleep(&ts, NULL);
if (timeout)
break;
process_interval();
if (interval_count && !(--times))
break;
}
}
waitpid(child_pid, &status, 0);
......@@ -716,8 +726,13 @@ static int __run_perf_stat(int argc, const char **argv)
enable_counters();
while (!done) {
nanosleep(&ts, NULL);
if (interval)
if (timeout)
break;
if (interval) {
process_interval();
if (interval_count && !(--times))
break;
}
}
}
......@@ -1891,6 +1906,10 @@ static const struct option stat_options[] = {
"command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &stat_config.interval,
"print counts at regular interval in ms (>= 10)"),
OPT_INTEGER(0, "interval-count", &stat_config.times,
"print counts for fixed number of times"),
OPT_UINTEGER(0, "timeout", &stat_config.timeout,
"stop workload and print counts after a timeout period in ms (>= 10ms)"),
OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
......@@ -2688,7 +2707,7 @@ int cmd_stat(int argc, const char **argv)
int status = -EINVAL, run_idx;
const char *mode;
FILE *output = stderr;
unsigned int interval;
unsigned int interval, timeout;
const char * const stat_subcommands[] = { "record", "report" };
setlocale(LC_ALL, "");
......@@ -2719,6 +2738,7 @@ int cmd_stat(int argc, const char **argv)
return __cmd_report(argc, argv);
interval = stat_config.interval;
timeout = stat_config.timeout;
/*
* For record command the -o is already taken care of.
......@@ -2871,6 +2891,33 @@ int cmd_stat(int argc, const char **argv)
"Please proceed with caution.\n");
}
if (stat_config.times && interval)
interval_count = true;
else if (stat_config.times && !interval) {
pr_err("interval-count option should be used together with "
"interval-print.\n");
parse_options_usage(stat_usage, stat_options, "interval-count", 0);
parse_options_usage(stat_usage, stat_options, "I", 1);
goto out;
}
if (timeout && timeout < 100) {
if (timeout < 10) {
pr_err("timeout must be >= 10ms.\n");
parse_options_usage(stat_usage, stat_options, "timeout", 0);
goto out;
} else
pr_warning("timeout < 100ms. "
"The overhead percentage could be high in some cases. "
"Please proceed with caution.\n");
}
if (timeout && interval) {
pr_err("timeout option is not supported with interval-print.\n");
parse_options_usage(stat_usage, stat_options, "timeout", 0);
parse_options_usage(stat_usage, stat_options, "I", 1);
goto out;
}
if (perf_evlist__alloc_stats(evsel_list, interval))
goto out;
......
......@@ -42,6 +42,7 @@ arch/parisc/include/uapi/asm/errno.h
arch/powerpc/include/uapi/asm/errno.h
arch/sparc/include/uapi/asm/errno.h
arch/x86/include/uapi/asm/errno.h
arch/powerpc/include/uapi/asm/unistd.h
include/asm-generic/bitops/arch_hweight.h
include/asm-generic/bitops/const_hweight.h
include/asm-generic/bitops/__fls.h
......
......@@ -482,6 +482,34 @@ static void fs_something(void)
}
}
static const char *do_determine_event(bool excl_kernel)
{
const char *event = excl_kernel ? "cycles:u" : "cycles";
#ifdef __s390x__
char cpuid[128], model[16], model_c[16], cpum_cf_v[16];
unsigned int family;
int ret, cpum_cf_a;
if (get_cpuid(cpuid, sizeof(cpuid)))
goto out_clocks;
ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c,
model, cpum_cf_v, &cpum_cf_a);
if (ret != 5) /* Not available */
goto out_clocks;
if (excl_kernel && (cpum_cf_a & 4))
return event;
if (!excl_kernel && (cpum_cf_a & 2))
return event;
/* Fall through: missing authorization */
out_clocks:
event = excl_kernel ? "cpu-clock:u" : "cpu-clock";
#endif
return event;
}
static void do_something(void)
{
fs_something();
......@@ -592,10 +620,7 @@ static int do_test_code_reading(bool try_kcore)
perf_evlist__set_maps(evlist, cpus, threads);
if (excl_kernel)
str = "cycles:u";
else
str = "cycles";
str = do_determine_event(excl_kernel);
pr_debug("Parsing event '%s'\n", str);
ret = parse_events(evlist, str, NULL);
if (ret < 0) {
......
......@@ -37,6 +37,19 @@ static int init_live_machine(struct machine *machine)
mmap_handler, machine, true, 500);
}
/*
* We need to keep these functions global, despite the
* fact that they are used only locally in this object,
* in order to keep them around even if the binary is
* stripped. If they are gone, the unwind check for
* symbol fails.
*/
int test_dwarf_unwind__thread(struct thread *thread);
int test_dwarf_unwind__compare(void *p1, void *p2);
int test_dwarf_unwind__krava_3(struct thread *thread);
int test_dwarf_unwind__krava_2(struct thread *thread);
int test_dwarf_unwind__krava_1(struct thread *thread);
#define MAX_STACK 8
static int unwind_entry(struct unwind_entry *entry, void *arg)
......@@ -45,12 +58,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
char *symbol = entry->sym ? entry->sym->name : NULL;
static const char *funcs[MAX_STACK] = {
"test__arch_unwind_sample",
"unwind_thread",
"compare",
"test_dwarf_unwind__thread",
"test_dwarf_unwind__compare",
"bsearch",
"krava_3",
"krava_2",
"krava_1",
"test_dwarf_unwind__krava_3",
"test_dwarf_unwind__krava_2",
"test_dwarf_unwind__krava_1",
"test__dwarf_unwind"
};
/*
......@@ -77,7 +90,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
return strcmp((const char *) symbol, funcs[idx]);
}
static noinline int unwind_thread(struct thread *thread)
noinline int test_dwarf_unwind__thread(struct thread *thread)
{
struct perf_sample sample;
unsigned long cnt = 0;
......@@ -108,7 +121,7 @@ static noinline int unwind_thread(struct thread *thread)
static int global_unwind_retval = -INT_MAX;
static noinline int compare(void *p1, void *p2)
noinline int test_dwarf_unwind__compare(void *p1, void *p2)
{
/* Any possible value should be 'thread' */
struct thread *thread = *(struct thread **)p1;
......@@ -117,17 +130,17 @@ static noinline int compare(void *p1, void *p2)
/* Call unwinder twice for both callchain orders. */
callchain_param.order = ORDER_CALLER;
global_unwind_retval = unwind_thread(thread);
global_unwind_retval = test_dwarf_unwind__thread(thread);
if (!global_unwind_retval) {
callchain_param.order = ORDER_CALLEE;
global_unwind_retval = unwind_thread(thread);
global_unwind_retval = test_dwarf_unwind__thread(thread);
}
}
return p1 - p2;
}
static noinline int krava_3(struct thread *thread)
noinline int test_dwarf_unwind__krava_3(struct thread *thread)
{
struct thread *array[2] = {thread, thread};
void *fp = &bsearch;
......@@ -141,18 +154,19 @@ static noinline int krava_3(struct thread *thread)
size_t, int (*)(void *, void *));
_bsearch = fp;
_bsearch(array, &thread, 2, sizeof(struct thread **), compare);
_bsearch(array, &thread, 2, sizeof(struct thread **),
test_dwarf_unwind__compare);
return global_unwind_retval;
}
static noinline int krava_2(struct thread *thread)
noinline int test_dwarf_unwind__krava_2(struct thread *thread)
{
return krava_3(thread);
return test_dwarf_unwind__krava_3(thread);
}
static noinline int krava_1(struct thread *thread)
noinline int test_dwarf_unwind__krava_1(struct thread *thread)
{
return krava_2(thread);
return test_dwarf_unwind__krava_2(thread);
}
int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused)
......@@ -189,7 +203,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
goto out;
}
err = krava_1(thread);
err = test_dwarf_unwind__krava_1(thread);
thread__put(thread);
out:
......
......@@ -5,7 +5,7 @@ had_vfs_getname=$?
cleanup_probe_vfs_getname() {
if [ $had_vfs_getname -eq 1 ] ; then
perf probe -q -d probe:vfs_getname
perf probe -q -d probe:vfs_getname*
fi
}
......
......@@ -21,12 +21,12 @@ trace_libc_inet_pton_backtrace() {
expected[3]=".*packets transmitted.*"
expected[4]="rtt min.*"
expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
expected[6]=".*inet_pton[[:space:]]\($libc\)$"
expected[6]=".*inet_pton[[:space:]]\($libc|inlined\)$"
case "$(uname -m)" in
s390x)
eventattr='call-graph=dwarf'
expected[7]="gaih_inet[[:space:]]\(inlined\)$"
expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$"
expected[7]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
expected[8]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
expected[10]="__libc_start_main[[:space:]]\($libc\)$"
expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
......
......@@ -56,7 +56,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
* be compacted against the list of modules found in the "vmlinux"
* code and with the one got from /proc/modules from the "kallsyms" code.
*/
if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) {
if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) {
pr_debug("dso__load_kallsyms ");
goto out;
}
......@@ -125,7 +125,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
if (pair && UM(pair->start) == mem_start) {
next_pair:
if (strcmp(sym->name, pair->name) == 0) {
if (arch__compare_symbol_names(sym->name, pair->name) == 0) {
/*
* kallsyms don't have the symbol end, so we
* set that by using the next symbol start - 1,
......
......@@ -319,6 +319,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
struct map_symbol *ms = ab->b.priv;
struct symbol *sym = ms->sym;
u8 pcnt_width = annotate_browser__pcnt_width(ab);
int width = 0;
/* PLT symbols contain external offsets */
if (strstr(sym->name, "@plt"))
......@@ -340,13 +341,17 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
to = (u64)btarget->idx;
}
if (ab->have_cycles)
width = IPC_WIDTH + CYCLES_WIDTH;
ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
__ui_browser__line_arrow(browser,
pcnt_width + 2 + ab->addr_width + width,
from, to);
if (is_fused(ab, cursor)) {
ui_browser__mark_fused(browser,
pcnt_width + 3 + ab->addr_width,
pcnt_width + 3 + ab->addr_width + width,
from - 1,
to > from ? true : false);
}
......
......@@ -316,7 +316,6 @@ static int machine__write_buildid_table(struct machine *machine,
struct feat_fd *fd)
{
int err = 0;
char nm[PATH_MAX];
struct dso *pos;
u16 kmisc = PERF_RECORD_MISC_KERNEL,
umisc = PERF_RECORD_MISC_USER;
......@@ -338,9 +337,8 @@ static int machine__write_buildid_table(struct machine *machine,
name = pos->short_name;
name_len = pos->short_name_len;
} else if (dso__is_kcore(pos)) {
machine__mmap_name(machine, nm, sizeof(nm));
name = nm;
name_len = strlen(nm);
name = machine->mmap_name;
name_len = strlen(name);
} else {
name = pos->long_name;
name_len = pos->long_name_len;
......@@ -813,12 +811,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine)
bool is_kallsyms = dso__is_kallsyms(dso);
bool is_vdso = dso__is_vdso(dso);
const char *name = dso->long_name;
char nm[PATH_MAX];
if (dso__is_kcore(dso)) {
is_kallsyms = true;
machine__mmap_name(machine, nm, sizeof(nm));
name = nm;
name = machine->mmap_name;
}
return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
dso->nsinfo, is_kallsyms, is_vdso);
......
......@@ -78,6 +78,8 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
{
ocsd_datapath_resp_t dp_ret;
decoder->prev_return = OCSD_RESP_CONT;
dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
0, 0, NULL, NULL);
if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
......@@ -253,16 +255,16 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
decoder->packet_count = 0;
for (i = 0; i < MAX_BUFFER; i++) {
decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL;
decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
decoder->packet_buffer[i].exc = false;
decoder->packet_buffer[i].exc_ret = false;
decoder->packet_buffer[i].cpu = INT_MIN;
decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
decoder->packet_buffer[i].last_instr_taken_branch = false;
decoder->packet_buffer[i].exc = false;
decoder->packet_buffer[i].exc_ret = false;
decoder->packet_buffer[i].cpu = INT_MIN;
}
}
static ocsd_datapath_resp_t
cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
const ocsd_generic_trace_elem *elem,
const u8 trace_chan_id,
enum cs_etm_sample_type sample_type)
{
......@@ -278,18 +280,16 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
return OCSD_RESP_FATAL_SYS_ERR;
et = decoder->tail;
et = (et + 1) & (MAX_BUFFER - 1);
decoder->tail = et;
decoder->packet_count++;
decoder->packet_buffer[et].sample_type = sample_type;
decoder->packet_buffer[et].start_addr = elem->st_addr;
decoder->packet_buffer[et].end_addr = elem->en_addr;
decoder->packet_buffer[et].exc = false;
decoder->packet_buffer[et].exc_ret = false;
decoder->packet_buffer[et].cpu = *((int *)inode->priv);
/* Wrap around if need be */
et = (et + 1) & (MAX_BUFFER - 1);
decoder->tail = et;
decoder->packet_count++;
decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL;
decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL;
if (decoder->packet_count == MAX_BUFFER - 1)
return OCSD_RESP_WAIT;
......@@ -297,6 +297,47 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
return OCSD_RESP_CONT;
}
static ocsd_datapath_resp_t
cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
{
int ret = 0;
struct cs_etm_packet *packet;
ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
CS_ETM_RANGE);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
packet = &decoder->packet_buffer[decoder->tail];
packet->start_addr = elem->st_addr;
packet->end_addr = elem->en_addr;
switch (elem->last_i_type) {
case OCSD_INSTR_BR:
case OCSD_INSTR_BR_INDIRECT:
packet->last_instr_taken_branch = elem->last_instr_exec;
break;
case OCSD_INSTR_ISB:
case OCSD_INSTR_DSB_DMB:
case OCSD_INSTR_OTHER:
default:
packet->last_instr_taken_branch = false;
break;
}
return ret;
}
static ocsd_datapath_resp_t
cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder,
const uint8_t trace_chan_id)
{
return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
CS_ETM_TRACE_ON);
}
static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
const void *context,
const ocsd_trc_index_t indx __maybe_unused,
......@@ -313,12 +354,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
decoder->trace_on = false;
break;
case OCSD_GEN_TRC_ELEM_TRACE_ON:
resp = cs_etm_decoder__buffer_trace_on(decoder,
trace_chan_id);
decoder->trace_on = true;
break;
case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
resp = cs_etm_decoder__buffer_packet(decoder, elem,
trace_chan_id,
CS_ETM_RANGE);
resp = cs_etm_decoder__buffer_range(decoder, elem,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION:
decoder->packet_buffer[decoder->tail].exc = true;
......
......@@ -24,12 +24,14 @@ struct cs_etm_buffer {
enum cs_etm_sample_type {
CS_ETM_RANGE = 1 << 0,
CS_ETM_TRACE_ON = 1 << 1,
};
struct cs_etm_packet {
enum cs_etm_sample_type sample_type;
u64 start_addr;
u64 end_addr;
u8 last_instr_taken_branch;
u8 exc;
u8 exc_ret;
int cpu;
......
This diff is collapsed.
......@@ -894,8 +894,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
struct machine *machine)
{
size_t size;
const char *mmap_name;
char name_buff[PATH_MAX];
struct map *map = machine__kernel_map(machine);
struct kmap *kmap;
int err;
......@@ -918,7 +916,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
return -1;
}
mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
if (machine__is_host(machine)) {
/*
* kernel uses PERF_RECORD_MISC_USER for user space maps,
......@@ -931,7 +928,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
kmap = map__kmap(map);
size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
"%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1;
"%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
size = PERF_ALIGN(size, sizeof(u64));
event->mmap.header.type = PERF_RECORD_MMAP;
event->mmap.header.size = (sizeof(event->mmap) -
......@@ -1591,17 +1588,6 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
return -1;
dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
/*
* Have we already created the kernel maps for this machine?
*
* This should have happened earlier, when we processed the kernel MMAP
* events, but for older perf.data files there was no such thing, so do
* it now.
*/
if (sample->cpumode == PERF_RECORD_MISC_KERNEL &&
machine__kernel_map(machine) == NULL)
machine__create_kernel_maps(machine);
thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
dump_printf(" ...... dso: %s\n",
al->map ? al->map->dso->long_name :
......
......@@ -1086,11 +1086,30 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
{
bool all_threads = (target->per_thread && target->system_wide);
struct cpu_map *cpus;
struct thread_map *threads;
/*
* If specify '-a' and '--per-thread' to perf record, perf record
* will override '--per-thread'. target->per_thread = false and
* target->system_wide = true.
*
* If specify '--per-thread' only to perf record,
* target->per_thread = true and target->system_wide = false.
*
* So target->per_thread && target->system_wide is false.
* For perf record, thread_map__new_str doesn't call
* thread_map__new_all_cpus. That will keep perf record's
* current behavior.
*
* For perf stat, it allows the case that target->per_thread and
* target->system_wide are all true. It means to collect system-wide
* per-thread data. thread_map__new_str will call
* thread_map__new_all_cpus to enumerate all threads.
*/
threads = thread_map__new_str(target->pid, target->tid, target->uid,
target->per_thread);
all_threads);
if (!threads)
return -1;
......
......@@ -174,4 +174,5 @@ int write_padded(struct feat_fd *fd, const void *bf,
int get_cpuid(char *buffer, size_t sz);
char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
int strcmp_cpuid_str(const char *s1, const char *s2);
#endif /* __PERF_HEADER_H */
......@@ -879,7 +879,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
* cumulated only one time to prevent entries more than 100%
* overhead.
*/
he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1));
he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
if (he_cache == NULL)
return -ENOMEM;
......@@ -1045,8 +1045,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
if (err)
return err;
iter->max_stack = max_stack_depth;
err = iter->ops->prepare_entry(iter, al);
if (err)
goto out;
......
......@@ -107,7 +107,6 @@ struct hist_entry_iter {
int curr;
bool hide_unresolved;
int max_stack;
struct perf_evsel *evsel;
struct perf_sample *sample;
......
......@@ -48,8 +48,31 @@ static void machine__threads_init(struct machine *machine)
}
}
static int machine__set_mmap_name(struct machine *machine)
{
if (machine__is_host(machine)) {
if (symbol_conf.vmlinux_name)
machine->mmap_name = strdup(symbol_conf.vmlinux_name);
else
machine->mmap_name = strdup("[kernel.kallsyms]");
} else if (machine__is_default_guest(machine)) {
if (symbol_conf.default_guest_vmlinux_name)
machine->mmap_name = strdup(symbol_conf.default_guest_vmlinux_name);
else
machine->mmap_name = strdup("[guest.kernel.kallsyms]");
} else {
if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
machine->pid) < 0)
machine->mmap_name = NULL;
}
return machine->mmap_name ? 0 : -ENOMEM;
}
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
int err = -ENOMEM;
memset(machine, 0, sizeof(*machine));
map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
......@@ -73,13 +96,16 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
if (machine->root_dir == NULL)
return -ENOMEM;
if (machine__set_mmap_name(machine))
goto out;
if (pid != HOST_KERNEL_ID) {
struct thread *thread = machine__findnew_thread(machine, -1,
pid);
char comm[64];
if (thread == NULL)
return -ENOMEM;
goto out;
snprintf(comm, sizeof(comm), "[guest/%d]", pid);
thread__set_comm(thread, comm, 0);
......@@ -87,7 +113,13 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
}
machine->current_tid = NULL;
err = 0;
out:
if (err) {
zfree(&machine->root_dir);
zfree(&machine->mmap_name);
}
return 0;
}
......@@ -119,7 +151,7 @@ struct machine *machine__new_kallsyms(void)
* ask for not using the kcore parsing code, once this one is fixed
* to create a map per module.
*/
if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) {
if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) {
machine__delete(machine);
machine = NULL;
}
......@@ -180,6 +212,7 @@ void machine__exit(struct machine *machine)
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
zfree(&machine->root_dir);
zfree(&machine->mmap_name);
zfree(&machine->current_tid);
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
......@@ -322,20 +355,6 @@ void machines__process_guests(struct machines *machines,
}
}
char *machine__mmap_name(struct machine *machine, char *bf, size_t size)
{
if (machine__is_host(machine))
snprintf(bf, size, "[%s]", "kernel.kallsyms");
else if (machine__is_default_guest(machine))
snprintf(bf, size, "[%s]", "guest.kernel.kallsyms");
else {
snprintf(bf, size, "[%s.%d]", "guest.kernel.kallsyms",
machine->pid);
}
return bf;
}
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
{
struct rb_node *node;
......@@ -771,25 +790,13 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
static struct dso *machine__get_kernel(struct machine *machine)
{
const char *vmlinux_name = NULL;
const char *vmlinux_name = machine->mmap_name;
struct dso *kernel;
if (machine__is_host(machine)) {
vmlinux_name = symbol_conf.vmlinux_name;
if (!vmlinux_name)
vmlinux_name = DSO__NAME_KALLSYMS;
kernel = machine__findnew_kernel(machine, vmlinux_name,
"[kernel]", DSO_TYPE_KERNEL);
} else {
char bf[PATH_MAX];
if (machine__is_default_guest(machine))
vmlinux_name = symbol_conf.default_guest_vmlinux_name;
if (!vmlinux_name)
vmlinux_name = machine__mmap_name(machine, bf,
sizeof(bf));
kernel = machine__findnew_kernel(machine, vmlinux_name,
"[guest.kernel]",
DSO_TYPE_GUEST_KERNEL);
......@@ -849,13 +856,10 @@ static int machine__get_running_kernel_start(struct machine *machine,
return 0;
}
int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
static int
__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
{
int type;
u64 start = 0;
if (machine__get_running_kernel_start(machine, NULL, &start))
return -1;
/* In case of renewal the kernel map, destroy previous one */
machine__destroy_kernel_maps(machine);
......@@ -864,7 +868,7 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
struct kmap *kmap;
struct map *map;
machine->vmlinux_maps[type] = map__new2(start, kernel, type);
machine->vmlinux_maps[type] = map__new2(0, kernel, type);
if (machine->vmlinux_maps[type] == NULL)
return -1;
......@@ -987,11 +991,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid)
return machine__create_kernel_maps(machine);
}
int __machine__load_kallsyms(struct machine *machine, const char *filename,
enum map_type type, bool no_kcore)
int machine__load_kallsyms(struct machine *machine, const char *filename,
enum map_type type)
{
struct map *map = machine__kernel_map(machine);
int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore);
int ret = __dso__load_kallsyms(map->dso, filename, map, true);
if (ret > 0) {
dso__set_loaded(map->dso, type);
......@@ -1006,12 +1010,6 @@ int __machine__load_kallsyms(struct machine *machine, const char *filename,
return ret;
}
int machine__load_kallsyms(struct machine *machine, const char *filename,
enum map_type type)
{
return __machine__load_kallsyms(machine, filename, type, false);
}
int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
{
struct map *map = machine__kernel_map(machine);
......@@ -1215,6 +1213,24 @@ static int machine__create_modules(struct machine *machine)
return 0;
}
static void machine__set_kernel_mmap(struct machine *machine,
u64 start, u64 end)
{
int i;
for (i = 0; i < MAP__NR_TYPES; i++) {
machine->vmlinux_maps[i]->start = start;
machine->vmlinux_maps[i]->end = end;
/*
* Be a bit paranoid here, some perf.data file came with
* a zero sized synthesized MMAP event for the kernel.
*/
if (machine->vmlinux_maps[i]->end == 0)
machine->vmlinux_maps[i]->end = ~0ULL;
}
}
int machine__create_kernel_maps(struct machine *machine)
{
struct dso *kernel = machine__get_kernel(machine);
......@@ -1239,40 +1255,22 @@ int machine__create_kernel_maps(struct machine *machine)
"continuing anyway...\n", machine->pid);
}
/*
* Now that we have all the maps created, just set the ->end of them:
*/
map_groups__fixup_end(&machine->kmaps);
if (!machine__get_running_kernel_start(machine, &name, &addr)) {
if (name &&
maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
machine__destroy_kernel_maps(machine);
return -1;
}
machine__set_kernel_mmap(machine, addr, 0);
}
/*
* Now that we have all the maps created, just set the ->end of them:
*/
map_groups__fixup_end(&machine->kmaps);
return 0;
}
static void machine__set_kernel_mmap_len(struct machine *machine,
union perf_event *event)
{
int i;
for (i = 0; i < MAP__NR_TYPES; i++) {
machine->vmlinux_maps[i]->start = event->mmap.start;
machine->vmlinux_maps[i]->end = (event->mmap.start +
event->mmap.len);
/*
* Be a bit paranoid here, some perf.data file came with
* a zero sized synthesized MMAP event for the kernel.
*/
if (machine->vmlinux_maps[i]->end == 0)
machine->vmlinux_maps[i]->end = ~0ULL;
}
}
static bool machine__uses_kcore(struct machine *machine)
{
struct dso *dso;
......@@ -1289,7 +1287,6 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
union perf_event *event)
{
struct map *map;
char kmmap_prefix[PATH_MAX];
enum dso_kernel_type kernel_type;
bool is_kernel_mmap;
......@@ -1297,15 +1294,14 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (machine__uses_kcore(machine))
return 0;
machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
if (machine__is_host(machine))
kernel_type = DSO_TYPE_KERNEL;
else
kernel_type = DSO_TYPE_GUEST_KERNEL;
is_kernel_mmap = memcmp(event->mmap.filename,
kmmap_prefix,
strlen(kmmap_prefix) - 1) == 0;
machine->mmap_name,
strlen(machine->mmap_name) - 1) == 0;
if (event->mmap.filename[0] == '/' ||
(!is_kernel_mmap && event->mmap.filename[0] == '[')) {
map = machine__findnew_module_map(machine, event->mmap.start,
......@@ -1316,7 +1312,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
map->end = map->start + event->mmap.len;
} else if (is_kernel_mmap) {
const char *symbol_name = (event->mmap.filename +
strlen(kmmap_prefix));
strlen(machine->mmap_name));
/*
* Should be there already, from the build-id table in
* the header.
......@@ -1357,7 +1353,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
up_read(&machine->dsos.lock);
if (kernel == NULL)
kernel = machine__findnew_dso(machine, kmmap_prefix);
kernel = machine__findnew_dso(machine, machine->mmap_name);
if (kernel == NULL)
goto out_problem;
......@@ -1370,7 +1366,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (strstr(kernel->long_name, "vmlinux"))
dso__set_short_name(kernel, "[kernel.vmlinux]", false);
machine__set_kernel_mmap_len(machine, event);
machine__set_kernel_mmap(machine, event->mmap.start,
event->mmap.start + event->mmap.len);
/*
* Avoid using a zero address (kptr_restrict) for the ref reloc
......
......@@ -43,6 +43,7 @@ struct machine {
bool comm_exec;
bool kptr_restrict_warned;
char *root_dir;
char *mmap_name;
struct threads threads[THREADS__TABLE_SIZE];
struct vdso_info *vdso_info;
struct perf_env *env;
......@@ -142,8 +143,6 @@ struct machine *machines__find(struct machines *machines, pid_t pid);
struct machine *machines__findnew(struct machines *machines, pid_t pid);
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
void machines__set_comm_exec(struct machines *machines, bool comm_exec);
struct machine *machine__new_host(void);
......@@ -226,8 +225,6 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
const char *filename);
int arch__fix_module_text_start(u64 *start, const char *name);
int __machine__load_kallsyms(struct machine *machine, const char *filename,
enum map_type type, bool no_kcore);
int machine__load_kallsyms(struct machine *machine, const char *filename,
enum map_type type);
int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
......@@ -239,7 +236,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm);
void machine__destroy_kernel_maps(struct machine *machine);
int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
int machine__create_kernel_maps(struct machine *machine);
int machines__create_kernel_maps(struct machines *machines, pid_t pid);
......
......@@ -576,6 +576,34 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
return NULL;
}
/* Return zero when the cpuid from the mapfile.csv matches the
* cpuid string generated on this platform.
* Otherwise return non-zero.
*/
int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
{
regex_t re;
regmatch_t pmatch[1];
int match;
if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
/* Warn unable to generate match particular string. */
pr_info("Invalid regular expression %s\n", mapcpuid);
return 1;
}
match = !regexec(&re, cpuid, 1, pmatch, 0);
regfree(&re);
if (match) {
size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
/* Verify the entire string matched. */
if (match_len == strlen(cpuid))
return 0;
}
return 1;
}
static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
{
char *cpuid;
......@@ -610,31 +638,14 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
i = 0;
for (;;) {
regex_t re;
regmatch_t pmatch[1];
int match;
map = &pmu_events_map[i++];
if (!map->table) {
map = NULL;
break;
}
if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) {
/* Warn unable to generate match particular string. */
pr_info("Invalid regular expression %s\n", map->cpuid);
if (!strcmp_cpuid_str(map->cpuid, cpuid))
break;
}
match = !regexec(&re, cpuid, 1, pmatch, 0);
regfree(&re);
if (match) {
size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
/* Verify the entire string matched. */
if (match_len == strlen(cpuid))
break;
}
}
free(cpuid);
return map;
......
......@@ -111,17 +111,20 @@ struct sort_entry sort_thread = {
/* --sort comm */
/*
* We can't use pointer comparison in functions below,
* because it gives different results based on pointer
* values, which could break some sorting assumptions.
*/
static int64_t
sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
{
/* Compare the addr that should be unique among comm */
return strcmp(comm__str(right->comm), comm__str(left->comm));
}
static int64_t
sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
{
/* Compare the addr that should be unique among comm */
return strcmp(comm__str(right->comm), comm__str(left->comm));
}
......
......@@ -90,6 +90,8 @@ struct perf_stat_config {
bool scale;
FILE *output;
unsigned int interval;
unsigned int timeout;
int times;
struct runtime_stat *stats;
int stats_num;
};
......
......@@ -1582,7 +1582,7 @@ int dso__load(struct dso *dso, struct map *map)
bool next_slot = false;
bool is_reg;
bool nsexit;
int sirc;
int sirc = -1;
enum dso_binary_type symtab_type = binary_type_symtab[i];
......@@ -1600,16 +1600,14 @@ int dso__load(struct dso *dso, struct map *map)
nsinfo__mountns_exit(&nsc);
is_reg = is_regular_file(name);
sirc = symsrc__init(ss, dso, name, symtab_type);
if (is_reg)
sirc = symsrc__init(ss, dso, name, symtab_type);
if (nsexit)
nsinfo__mountns_enter(dso->nsinfo, &nsc);
if (!is_reg || sirc < 0) {
if (sirc >= 0)
symsrc__destroy(ss);
if (!is_reg || sirc < 0)
continue;
}
if (!syms_ss && symsrc__has_symtab(ss)) {
syms_ss = ss;
......@@ -1960,8 +1958,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
pr_debug("Using %s for symbols\n", kallsyms_filename);
if (err > 0 && !dso__is_kcore(dso)) {
dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
machine__mmap_name(machine, path, sizeof(path));
dso__set_long_name(dso, strdup(path), true);
dso__set_long_name(dso, machine->mmap_name, false);
map__fixup_start(map);
map__fixup_end(map);
}
......
......@@ -30,6 +30,14 @@ static const char **syscalltbl_native = syscalltbl_x86_64;
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
static const char **syscalltbl_native = syscalltbl_s390_64;
#elif defined(__powerpc64__)
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
static const char **syscalltbl_native = syscalltbl_powerpc_64;
#elif defined(__powerpc__)
#include <asm/syscalls_32.c>
const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
static const char **syscalltbl_native = syscalltbl_powerpc_32;
#endif
struct syscall {
......
......@@ -323,7 +323,7 @@ struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
}
struct thread_map *thread_map__new_str(const char *pid, const char *tid,
uid_t uid, bool per_thread)
uid_t uid, bool all_threads)
{
if (pid)
return thread_map__new_by_pid_str(pid);
......@@ -331,7 +331,7 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid,
if (!tid && uid != UINT_MAX)
return thread_map__new_by_uid(uid);
if (per_thread)
if (all_threads)
return thread_map__new_all_cpus();
return thread_map__new_by_tid_str(tid);
......
......@@ -31,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map);
void thread_map__put(struct thread_map *map);
struct thread_map *thread_map__new_str(const char *pid,
const char *tid, uid_t uid, bool per_thread);
const char *tid, uid_t uid, bool all_threads);
struct thread_map *thread_map__new_by_tid_str(const char *tid_str);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment