Commit 3e71713c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-tools-fixes-for-v5.18-2022-04-29' of...

Merge tag 'perf-tools-fixes-for-v5.18-2022-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fix Intel PT (Processor Trace) timeless decoding with perf.data
   directory.

 - ARM SPE (Statistical Profiling Extensions) address fixes, for
   synthesized events and for SPE events with physical addresses. Add a
   simple 'perf test' entry to make sure this doesn't regress.

 - Remove arch specific processing of kallsyms data to fixup symbol end
   address, fixing excessive memory consumption in the annotation code.

* tag 'perf-tools-fixes-for-v5.18-2022-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf symbol: Remove arch__symbols__fixup_end()
  perf symbol: Update symbols__fixup_end()
  perf symbol: Pass is_kallsyms to symbols__fixup_end()
  perf test: Add perf_event_attr test for Arm SPE
  perf arm-spe: Fix SPE events with phys addresses
  perf arm-spe: Fix addresses of synthesized SPE events
  perf intel-pt: Fix timeless decoding with perf.data directory
parents 2d0de93c a5d20d42
...@@ -148,6 +148,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, ...@@ -148,6 +148,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
bool privileged = perf_event_paranoid_check(-1); bool privileged = perf_event_paranoid_check(-1);
struct evsel *tracking_evsel; struct evsel *tracking_evsel;
int err; int err;
u64 bit;
sper->evlist = evlist; sper->evlist = evlist;
...@@ -245,6 +246,15 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, ...@@ -245,6 +246,15 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
*/ */
evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
/*
* The PHYS_ADDR flag does not affect the driver behaviour, it is used to
* inform that the resulting output's SPE samples contain physical addresses
* where applicable.
*/
bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable");
if (arm_spe_evsel->core.attr.config & bit)
evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
/* Add dummy event to keep tracking */ /* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL); err = parse_events(evlist, "dummy:u", NULL);
if (err) if (err)
......
...@@ -8,27 +8,6 @@ ...@@ -8,27 +8,6 @@
#include "callchain.h" #include "callchain.h"
#include "record.h" #include "record.h"
/* On arm64, kernel text segment starts at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
* address, like 0xffff 0000 00ax xxxx. When only small amount of
* memory is used by modules, gap between end of module's text segment
* and start of kernel text segment may reach 2G.
* Therefore do not fill this gap and do not assign it to the kernel dso map.
*/
#define SYMBOL_LIMIT (1 << 12) /* 4K */
void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
{
if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
(strchr(p->name, '[') == NULL && strchr(c->name, '[')))
/* Limit range of last symbol in module and kernel */
p->end += SYMBOL_LIMIT;
else
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
void arch__add_leaf_frame_record_opts(struct record_opts *opts) void arch__add_leaf_frame_record_opts(struct record_opts *opts)
{ {
opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask; opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
......
perf-y += header.o perf-y += header.o
perf-y += machine.o
perf-y += kvm-stat.o perf-y += kvm-stat.o
perf-y += perf_regs.o perf-y += perf_regs.o
perf-y += mem-events.o perf-y += mem-events.o
......
// SPDX-License-Identifier: GPL-2.0
#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#include <internal/lib.h> // page_size
#include "debug.h"
#include "symbol.h"
/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000
* whereas the modules are located at very high memory addresses,
* for example 0xc00800000xxxxxxx. The gap between end of kernel text segment
* and beginning of first module's text segment is very high.
* Therefore do not fill this gap and do not assign it to the kernel dso map.
*/
void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
{
if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
/* Limit the range of last kernel symbol */
p->end += page_size;
else
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
...@@ -35,19 +35,3 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name) ...@@ -35,19 +35,3 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name)
return 0; return 0;
} }
/* On s390 kernel text segment start is located at very low memory addresses,
* for example 0x10000. Modules are located at very high memory addresses,
* for example 0x3ff xxxx xxxx. The gap between end of kernel text segment
* and beginning of first module's text segment is very big.
* Therefore do not fill this gap and do not assign it to the kernel dso map.
*/
void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
{
if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
/* Last kernel symbol mapped to end of page */
p->end = roundup(p->end, page_size);
else
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
...@@ -60,6 +60,7 @@ Following tests are defined (with perf commands): ...@@ -60,6 +60,7 @@ Following tests are defined (with perf commands):
perf record -R kill (test-record-raw) perf record -R kill (test-record-raw)
perf record -c 2 -e arm_spe_0// -- kill (test-record-spe-period) perf record -c 2 -e arm_spe_0// -- kill (test-record-spe-period)
perf record -e arm_spe_0/period=3/ -- kill (test-record-spe-period-term) perf record -e arm_spe_0/period=3/ -- kill (test-record-spe-period-term)
perf record -e arm_spe_0/pa_enable=1/ -- kill (test-record-spe-physical-address)
perf stat -e cycles kill (test-stat-basic) perf stat -e cycles kill (test-stat-basic)
perf stat kill (test-stat-default) perf stat kill (test-stat-default)
perf stat -d kill (test-stat-detailed-1) perf stat -d kill (test-stat-detailed-1)
......
[config]
command = record
args = --no-bpf-event -e arm_spe_0/pa_enable=1/ -- kill >/dev/null 2>&1
ret = 1
arch = aarch64
[event-10:base-record-spe]
# 622727 is the decimal of IP|TID|TIME|CPU|IDENTIFIER|DATA_SRC|PHYS_ADDR
sample_type=622727
# dummy event
[event-1:base-record-spe]
\ No newline at end of file
...@@ -1033,10 +1033,11 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) ...@@ -1033,10 +1033,11 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
memset(&attr, 0, sizeof(struct perf_event_attr)); memset(&attr, 0, sizeof(struct perf_event_attr));
attr.size = sizeof(struct perf_event_attr); attr.size = sizeof(struct perf_event_attr);
attr.type = PERF_TYPE_HARDWARE; attr.type = PERF_TYPE_HARDWARE;
attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; attr.sample_type = evsel->core.attr.sample_type &
(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
PERF_SAMPLE_WEIGHT; PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
if (spe->timeless_decoding) if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else else
......
...@@ -2576,7 +2576,7 @@ int perf_session__process_events(struct perf_session *session) ...@@ -2576,7 +2576,7 @@ int perf_session__process_events(struct perf_session *session)
if (perf_data__is_pipe(session->data)) if (perf_data__is_pipe(session->data))
return __perf_session__process_pipe_events(session); return __perf_session__process_pipe_events(session);
if (perf_data__is_dir(session->data)) if (perf_data__is_dir(session->data) && session->data->dir.nr)
return __perf_session__process_dir_events(session); return __perf_session__process_dir_events(session);
return __perf_session__process_events(session); return __perf_session__process_events(session);
......
...@@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, ...@@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
* For misannotated, zeroed, ASM function sizes. * For misannotated, zeroed, ASM function sizes.
*/ */
if (nr > 0) { if (nr > 0) {
symbols__fixup_end(&dso->symbols); symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols); symbols__fixup_duplicate(&dso->symbols);
if (kmap) { if (kmap) {
/* /*
......
...@@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str) ...@@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str)
return tail - str; return tail - str;
} }
void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
{
p->end = c->start;
}
const char * __weak arch__normalize_symbol_name(const char *name) const char * __weak arch__normalize_symbol_name(const char *name)
{ {
return name; return name;
...@@ -217,7 +212,8 @@ void symbols__fixup_duplicate(struct rb_root_cached *symbols) ...@@ -217,7 +212,8 @@ void symbols__fixup_duplicate(struct rb_root_cached *symbols)
} }
} }
void symbols__fixup_end(struct rb_root_cached *symbols) /* Update zero-sized symbols using the address of the next symbol */
void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
{ {
struct rb_node *nd, *prevnd = rb_first_cached(symbols); struct rb_node *nd, *prevnd = rb_first_cached(symbols);
struct symbol *curr, *prev; struct symbol *curr, *prev;
...@@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols) ...@@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
prev = curr; prev = curr;
curr = rb_entry(nd, struct symbol, rb_node); curr = rb_entry(nd, struct symbol, rb_node);
if (prev->end == prev->start || prev->end != curr->start) /*
arch__symbols__fixup_end(prev, curr); * On some architecture kernel text segment start is located at
* some low memory address, while modules are located at high
* memory addresses (or vice versa). The gap between end of
* kernel text segment and beginning of first module's text
* segment is very big. Therefore do not fill this gap and do
* not assign it to the kernel dso map (kallsyms).
*
* In kallsyms, it determines module symbols using '[' character
* like in:
* ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi]
*/
if (prev->end == prev->start) {
/* Last kernel/module symbol mapped to end of page */
if (is_kallsyms && (!strchr(prev->name, '[') !=
!strchr(curr->name, '[')))
prev->end = roundup(prev->end + 4096, 4096);
else
prev->end = curr->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
__func__, prev->name, prev->end);
}
} }
/* Last entry */ /* Last entry */
...@@ -1467,7 +1484,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, ...@@ -1467,7 +1484,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
if (kallsyms__delta(kmap, filename, &delta)) if (kallsyms__delta(kmap, filename, &delta))
return -1; return -1;
symbols__fixup_end(&dso->symbols); symbols__fixup_end(&dso->symbols, true);
symbols__fixup_duplicate(&dso->symbols); symbols__fixup_duplicate(&dso->symbols);
if (dso->kernel == DSO_SPACE__KERNEL_GUEST) if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
...@@ -1659,7 +1676,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) ...@@ -1659,7 +1676,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
#undef bfd_asymbol_section #undef bfd_asymbol_section
#endif #endif
symbols__fixup_end(&dso->symbols); symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols); symbols__fixup_duplicate(&dso->symbols);
dso->adjust_symbols = 1; dso->adjust_symbols = 1;
......
...@@ -203,7 +203,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, ...@@ -203,7 +203,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
bool kernel); bool kernel);
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root_cached *symbols); void symbols__fixup_duplicate(struct rb_root_cached *symbols);
void symbols__fixup_end(struct rb_root_cached *symbols); void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
void maps__fixup_end(struct maps *maps); void maps__fixup_end(struct maps *maps);
typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
...@@ -241,7 +241,6 @@ const char *arch__normalize_symbol_name(const char *name); ...@@ -241,7 +241,6 @@ const char *arch__normalize_symbol_name(const char *name);
#define SYMBOL_A 0 #define SYMBOL_A 0
#define SYMBOL_B 1 #define SYMBOL_B 1
void arch__symbols__fixup_end(struct symbol *p, struct symbol *c);
int arch__compare_symbol_names(const char *namea, const char *nameb); int arch__compare_symbol_names(const char *namea, const char *nameb);
int arch__compare_symbol_names_n(const char *namea, const char *nameb, int arch__compare_symbol_names_n(const char *namea, const char *nameb,
unsigned int n); unsigned int n);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment