Commit 47414424 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-20161123' of...

Merge tag 'perf-core-for-mingo-20161123' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New tool:

- 'perf sched timehist' provides an analysis of scheduling events.

  Example usage:
      perf sched record -- sleep 1
      perf sched timehist

  By default it shows the individual schedule events, including the wait
  time (time between sched-out and next sched-in events for the task), the
  task scheduling delay (time between wakeup and actually running) and run
  time for the task:

        time    cpu  task name         wait time  sch delay  run time
                     [tid/pid]            (msec)     (msec)    (msec)
    -------- ------  ----------------  ---------  ---------  --------
    1.874569 [0011]  gcc[31949]            0.014      0.000     1.148
    1.874591 [0010]  gcc[31951]            0.000      0.000     0.024
    1.874603 [0010]  migration/10[59]      3.350      0.004     0.011
    1.874604 [0011]  <idle>                1.148      0.000     0.035
    1.874723 [0005]  <idle>                0.016      0.000     1.383
    1.874746 [0005]  gcc[31949]            0.153      0.078     0.022
  ...

  Times are in msec.usec. (David Ahern, Namhyung Kim)

Improvements:

- Make 'perf c2c report' support -f/--force, to allow skipping the
  ownership check for root users, for instance, just like the other
  tools (Jiri Olsa)

- Allow sorting cachelines by total number of HITMs, in addition to
  local and remote numbers (Jiri Olsa)

Fixes:

- Make sure errors aren't suppressed by the TUI reset at the end of
  a 'perf c2c report' session (Jiri Olsa)

Infrastructure changes:

- Initial work on having the annotate code better support multiple
  architectures, including the ability to cross-annotate, i.e. to
  annotate perf.data files collected on an ARM system on a x86_64
  workstation (Arnaldo Carvalho de Melo, Ravi Bangoria, Kim Phillips)

- Use USECS_PER_SEC instead of hard coded number in libtraceevent (Steven Rostedt)

- Add retrieval of preempt count and latency flags in libtraceevent (Steven Rostedt)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 69e6cdd0 a407b067
......@@ -33,6 +33,7 @@
#include <stdint.h>
#include <limits.h>
#include <linux/string.h>
#include <linux/time64.h>
#include <netinet/in.h>
#include "event-parse.h"
......@@ -5191,17 +5192,43 @@ struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type
}
/**
* pevent_data_pid - parse the PID from raw data
* pevent_data_pid - parse the PID from record
* @pevent: a handle to the pevent
* @rec: the record to parse
*
* This returns the PID from a raw data.
* This returns the PID from a record.
*/
int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec)
{
return parse_common_pid(pevent, rec->data);
}
/**
* pevent_data_prempt_count - parse the preempt count from the record
* @pevent: a handle to the pevent
* @rec: the record to parse
*
* This returns the preempt count from a record.
*/
int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec)
{
return parse_common_pc(pevent, rec->data);
}
/**
* pevent_data_flags - parse the latency flags from the record
* @pevent: a handle to the pevent
* @rec: the record to parse
*
* This returns the latency flags from a record.
*
* Use trace_flag_type enum for the flags (see event-parse.h).
*/
int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec)
{
return parse_common_flags(pevent, rec->data);
}
/**
* pevent_data_comm_from_pid - return the command line from PID
* @pevent: a handle to the pevent
......@@ -5424,8 +5451,8 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
use_usec_format = is_timestamp_in_us(pevent->trace_clock,
use_trace_clock);
if (use_usec_format) {
secs = record->ts / NSECS_PER_SEC;
nsecs = record->ts - secs * NSECS_PER_SEC;
secs = record->ts / NSEC_PER_SEC;
nsecs = record->ts - secs * NSEC_PER_SEC;
}
if (pevent->latency_format) {
......@@ -5437,10 +5464,10 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
usecs = nsecs;
p = 9;
} else {
usecs = (nsecs + 500) / NSECS_PER_USEC;
usecs = (nsecs + 500) / NSEC_PER_USEC;
/* To avoid usecs larger than 1 sec */
if (usecs >= 1000000) {
usecs -= 1000000;
if (usecs >= USEC_PER_SEC) {
usecs -= USEC_PER_SEC;
secs++;
}
p = 6;
......
......@@ -172,9 +172,6 @@ struct pevent_plugin_option {
#define PEVENT_PLUGIN_OPTIONS_NAME MAKE_STR(PEVENT_PLUGIN_OPTIONS)
#define PEVENT_PLUGIN_ALIAS_NAME MAKE_STR(PEVENT_PLUGIN_ALIAS)
#define NSECS_PER_SEC 1000000000ULL
#define NSECS_PER_USEC 1000ULL
enum format_flags {
FIELD_IS_ARRAY = 1,
FIELD_IS_POINTER = 2,
......@@ -712,6 +709,8 @@ void pevent_data_lat_fmt(struct pevent *pevent,
int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type);
int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec);
int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec);
int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec);
const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid);
struct cmdline;
struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm,
......
......@@ -100,6 +100,14 @@ REPORT OPTIONS
--show-all::
Show all captured HITM lines, with no regard to HITM % 0.0005 limit.
-f::
--force::
Don't do ownership validation.
-d::
--display::
Siwtch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
C2C RECORD
----------
The perf c2c record command setup options related to HITM cacheline analysis
......
......@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
SYNOPSIS
--------
[verse]
'perf sched' {record|latency|map|replay|script}
'perf sched' {record|latency|map|replay|script|timehist}
DESCRIPTION
-----------
There are five variants of perf sched:
There are several variants of 'perf sched':
'perf sched record <command>' to record the scheduling events
of an arbitrary workload.
......@@ -36,6 +36,30 @@ There are five variants of perf sched:
are running on a CPU. A '*' denotes the CPU that had the event, and
a dot signals an idle CPU.
'perf sched timehist' provides an analysis of scheduling events.
Example usage:
perf sched record -- sleep 1
perf sched timehist
By default it shows the individual schedule events, including the wait
time (time between sched-out and next sched-in events for the task), the
task scheduling delay (time between wakeup and actually running) and run
time for the task:
time cpu task name wait time sch delay run time
[tid/pid] (msec) (msec) (msec)
-------------- ------ -------------------- --------- --------- ---------
79371.874569 [0011] gcc[31949] 0.014 0.000 1.148
79371.874591 [0010] gcc[31951] 0.000 0.000 0.024
79371.874603 [0010] migration/10[59] 3.350 0.004 0.011
79371.874604 [0011] <idle> 1.148 0.000 0.035
79371.874723 [0005] <idle> 0.016 0.000 1.383
79371.874746 [0005] gcc[31949] 0.153 0.078 0.022
...
Times are in msec.usec.
OPTIONS
-------
-i::
......@@ -66,6 +90,44 @@ OPTIONS for 'perf sched map'
--color-pids::
Highlight the given pids.
OPTIONS for 'perf sched timehist'
---------------------------------
-k::
--vmlinux=<file>::
vmlinux pathname
--kallsyms=<file>::
kallsyms pathname
-g::
--no-call-graph::
Do not display call chains if present.
--max-stack::
Maximum number of functions to display in backtrace, default 5.
-s::
--summary::
Show only a summary of scheduling by thread with min, max, and average
run times (in sec) and relative stddev.
-S::
--with-summary::
Show all scheduling events followed by a summary by thread with min,
max, and average run times (in sec) and relative stddev.
--symfs=<directory>::
Look for files with symbols relative to this directory.
-V::
--cpu-visual::
Show visual aid for sched switches by CPU: 'i' marks idle time,
's' are scheduler events.
-w::
--wakeups::
Show wakeup events.
SEE ALSO
--------
linkperf:perf-record[1]
static struct ins arm__instructions[] = {
{ .name = "add", .ops = &mov_ops, },
{ .name = "addl", .ops = &mov_ops, },
{ .name = "addq", .ops = &mov_ops, },
{ .name = "addw", .ops = &mov_ops, },
{ .name = "and", .ops = &mov_ops, },
{ .name = "b", .ops = &jump_ops, }, // might also be a call
{ .name = "bcc", .ops = &jump_ops, },
{ .name = "bcs", .ops = &jump_ops, },
{ .name = "beq", .ops = &jump_ops, },
{ .name = "bge", .ops = &jump_ops, },
{ .name = "bgt", .ops = &jump_ops, },
{ .name = "bhi", .ops = &jump_ops, },
{ .name = "bl", .ops = &call_ops, },
{ .name = "bls", .ops = &jump_ops, },
{ .name = "blt", .ops = &jump_ops, },
{ .name = "blx", .ops = &call_ops, },
{ .name = "bne", .ops = &jump_ops, },
{ .name = "bts", .ops = &mov_ops, },
{ .name = "call", .ops = &call_ops, },
{ .name = "callq", .ops = &call_ops, },
{ .name = "cmp", .ops = &mov_ops, },
{ .name = "cmpb", .ops = &mov_ops, },
{ .name = "cmpl", .ops = &mov_ops, },
{ .name = "cmpq", .ops = &mov_ops, },
{ .name = "cmpw", .ops = &mov_ops, },
{ .name = "cmpxch", .ops = &mov_ops, },
{ .name = "dec", .ops = &dec_ops, },
{ .name = "decl", .ops = &dec_ops, },
{ .name = "imul", .ops = &mov_ops, },
{ .name = "inc", .ops = &dec_ops, },
{ .name = "incl", .ops = &dec_ops, },
{ .name = "ja", .ops = &jump_ops, },
{ .name = "jae", .ops = &jump_ops, },
{ .name = "jb", .ops = &jump_ops, },
{ .name = "jbe", .ops = &jump_ops, },
{ .name = "jc", .ops = &jump_ops, },
{ .name = "jcxz", .ops = &jump_ops, },
{ .name = "je", .ops = &jump_ops, },
{ .name = "jecxz", .ops = &jump_ops, },
{ .name = "jg", .ops = &jump_ops, },
{ .name = "jge", .ops = &jump_ops, },
{ .name = "jl", .ops = &jump_ops, },
{ .name = "jle", .ops = &jump_ops, },
{ .name = "jmp", .ops = &jump_ops, },
{ .name = "jmpq", .ops = &jump_ops, },
{ .name = "jna", .ops = &jump_ops, },
{ .name = "jnae", .ops = &jump_ops, },
{ .name = "jnb", .ops = &jump_ops, },
{ .name = "jnbe", .ops = &jump_ops, },
{ .name = "jnc", .ops = &jump_ops, },
{ .name = "jne", .ops = &jump_ops, },
{ .name = "jng", .ops = &jump_ops, },
{ .name = "jnge", .ops = &jump_ops, },
{ .name = "jnl", .ops = &jump_ops, },
{ .name = "jnle", .ops = &jump_ops, },
{ .name = "jno", .ops = &jump_ops, },
{ .name = "jnp", .ops = &jump_ops, },
{ .name = "jns", .ops = &jump_ops, },
{ .name = "jnz", .ops = &jump_ops, },
{ .name = "jo", .ops = &jump_ops, },
{ .name = "jp", .ops = &jump_ops, },
{ .name = "jpe", .ops = &jump_ops, },
{ .name = "jpo", .ops = &jump_ops, },
{ .name = "jrcxz", .ops = &jump_ops, },
{ .name = "js", .ops = &jump_ops, },
{ .name = "jz", .ops = &jump_ops, },
{ .name = "lea", .ops = &mov_ops, },
{ .name = "lock", .ops = &lock_ops, },
{ .name = "mov", .ops = &mov_ops, },
{ .name = "movb", .ops = &mov_ops, },
{ .name = "movdqa", .ops = &mov_ops, },
{ .name = "movl", .ops = &mov_ops, },
{ .name = "movq", .ops = &mov_ops, },
{ .name = "movslq", .ops = &mov_ops, },
{ .name = "movzbl", .ops = &mov_ops, },
{ .name = "movzwl", .ops = &mov_ops, },
{ .name = "nop", .ops = &nop_ops, },
{ .name = "nopl", .ops = &nop_ops, },
{ .name = "nopw", .ops = &nop_ops, },
{ .name = "or", .ops = &mov_ops, },
{ .name = "orl", .ops = &mov_ops, },
{ .name = "test", .ops = &mov_ops, },
{ .name = "testb", .ops = &mov_ops, },
{ .name = "testl", .ops = &mov_ops, },
{ .name = "xadd", .ops = &mov_ops, },
{ .name = "xbeginl", .ops = &jump_ops, },
{ .name = "xbeginq", .ops = &jump_ops, },
{ .name = "retq", .ops = &ret_ops, },
};
static struct ins x86__instructions[] = {
{ .name = "add", .ops = &mov_ops, },
{ .name = "addl", .ops = &mov_ops, },
{ .name = "addq", .ops = &mov_ops, },
{ .name = "addw", .ops = &mov_ops, },
{ .name = "and", .ops = &mov_ops, },
{ .name = "bts", .ops = &mov_ops, },
{ .name = "call", .ops = &call_ops, },
{ .name = "callq", .ops = &call_ops, },
{ .name = "cmp", .ops = &mov_ops, },
{ .name = "cmpb", .ops = &mov_ops, },
{ .name = "cmpl", .ops = &mov_ops, },
{ .name = "cmpq", .ops = &mov_ops, },
{ .name = "cmpw", .ops = &mov_ops, },
{ .name = "cmpxch", .ops = &mov_ops, },
{ .name = "dec", .ops = &dec_ops, },
{ .name = "decl", .ops = &dec_ops, },
{ .name = "imul", .ops = &mov_ops, },
{ .name = "inc", .ops = &dec_ops, },
{ .name = "incl", .ops = &dec_ops, },
{ .name = "ja", .ops = &jump_ops, },
{ .name = "jae", .ops = &jump_ops, },
{ .name = "jb", .ops = &jump_ops, },
{ .name = "jbe", .ops = &jump_ops, },
{ .name = "jc", .ops = &jump_ops, },
{ .name = "jcxz", .ops = &jump_ops, },
{ .name = "je", .ops = &jump_ops, },
{ .name = "jecxz", .ops = &jump_ops, },
{ .name = "jg", .ops = &jump_ops, },
{ .name = "jge", .ops = &jump_ops, },
{ .name = "jl", .ops = &jump_ops, },
{ .name = "jle", .ops = &jump_ops, },
{ .name = "jmp", .ops = &jump_ops, },
{ .name = "jmpq", .ops = &jump_ops, },
{ .name = "jna", .ops = &jump_ops, },
{ .name = "jnae", .ops = &jump_ops, },
{ .name = "jnb", .ops = &jump_ops, },
{ .name = "jnbe", .ops = &jump_ops, },
{ .name = "jnc", .ops = &jump_ops, },
{ .name = "jne", .ops = &jump_ops, },
{ .name = "jng", .ops = &jump_ops, },
{ .name = "jnge", .ops = &jump_ops, },
{ .name = "jnl", .ops = &jump_ops, },
{ .name = "jnle", .ops = &jump_ops, },
{ .name = "jno", .ops = &jump_ops, },
{ .name = "jnp", .ops = &jump_ops, },
{ .name = "jns", .ops = &jump_ops, },
{ .name = "jnz", .ops = &jump_ops, },
{ .name = "jo", .ops = &jump_ops, },
{ .name = "jp", .ops = &jump_ops, },
{ .name = "jpe", .ops = &jump_ops, },
{ .name = "jpo", .ops = &jump_ops, },
{ .name = "jrcxz", .ops = &jump_ops, },
{ .name = "js", .ops = &jump_ops, },
{ .name = "jz", .ops = &jump_ops, },
{ .name = "lea", .ops = &mov_ops, },
{ .name = "lock", .ops = &lock_ops, },
{ .name = "mov", .ops = &mov_ops, },
{ .name = "movb", .ops = &mov_ops, },
{ .name = "movdqa", .ops = &mov_ops, },
{ .name = "movl", .ops = &mov_ops, },
{ .name = "movq", .ops = &mov_ops, },
{ .name = "movslq", .ops = &mov_ops, },
{ .name = "movzbl", .ops = &mov_ops, },
{ .name = "movzwl", .ops = &mov_ops, },
{ .name = "nop", .ops = &nop_ops, },
{ .name = "nopl", .ops = &nop_ops, },
{ .name = "nopw", .ops = &nop_ops, },
{ .name = "or", .ops = &mov_ops, },
{ .name = "orl", .ops = &mov_ops, },
{ .name = "test", .ops = &mov_ops, },
{ .name = "testb", .ops = &mov_ops, },
{ .name = "testl", .ops = &mov_ops, },
{ .name = "xadd", .ops = &mov_ops, },
{ .name = "xbeginl", .ops = &jump_ops, },
{ .name = "xbeginq", .ops = &jump_ops, },
{ .name = "retq", .ops = &ret_ops, },
};
......@@ -91,6 +91,19 @@ struct perf_c2c {
enum {
DISPLAY_LCL,
DISPLAY_RMT,
DISPLAY_TOT,
DISPLAY_MAX,
};
static const char *display_str[DISPLAY_MAX] = {
[DISPLAY_LCL] = "Local",
[DISPLAY_RMT] = "Remote",
[DISPLAY_TOT] = "Total",
};
static const struct option c2c_options[] = {
OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
OPT_END()
};
static struct perf_c2c c2c;
......@@ -745,6 +758,10 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he)
case DISPLAY_LCL:
st = stats->lcl_hitm;
tot = total->lcl_hitm;
break;
case DISPLAY_TOT:
st = stats->tot_hitm;
tot = total->tot_hitm;
default:
break;
}
......@@ -1044,6 +1061,9 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
break;
case DISPLAY_LCL:
DISPLAY_HITM(lcl_hitm);
break;
case DISPLAY_TOT:
DISPLAY_HITM(tot_hitm);
default:
break;
}
......@@ -1351,6 +1371,7 @@ static struct c2c_dimension dim_tot_loads = {
static struct c2c_header percent_hitm_header[] = {
[DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
[DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
[DISPLAY_TOT] = HEADER_BOTH("Tot", "Hitm"),
};
static struct c2c_dimension dim_percent_hitm = {
......@@ -1794,6 +1815,9 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
break;
case DISPLAY_RMT:
FILTER_HITM(rmt_hitm);
break;
case DISPLAY_TOT:
FILTER_HITM(tot_hitm);
default:
break;
};
......@@ -1809,8 +1833,9 @@ static inline int valid_hitm_or_store(struct hist_entry *he)
bool has_hitm;
c2c_he = container_of(he, struct c2c_hist_entry, he);
has_hitm = c2c.display == DISPLAY_LCL ?
c2c_he->stats.lcl_hitm : c2c_he->stats.rmt_hitm;
has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm :
c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm :
c2c_he->stats.rmt_hitm;
return has_hitm || c2c_he->stats.store;
}
......@@ -2095,7 +2120,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
first = false;
}
fprintf(out, " Cachelines sort on : %s HITMs\n",
c2c.display == DISPLAY_LCL ? "Local" : "Remote");
display_str[c2c.display]);
fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort);
}
......@@ -2250,7 +2275,7 @@ static int perf_c2c_browser__title(struct hist_browser *browser,
"Shared Data Cache Line Table "
"(%lu entries, sorted on %s HITMs)",
browser->nr_non_filtered_entries,
c2c.display == DISPLAY_LCL ? "local" : "remote");
display_str[c2c.display]);
return 0;
}
......@@ -2387,9 +2412,11 @@ static int setup_callchain(struct perf_evlist *evlist)
static int setup_display(const char *str)
{
const char *display = str ?: "rmt";
const char *display = str ?: "tot";
if (!strcmp(display, "rmt"))
if (!strcmp(display, "tot"))
c2c.display = DISPLAY_TOT;
else if (!strcmp(display, "rmt"))
c2c.display = DISPLAY_RMT;
else if (!strcmp(display, "lcl"))
c2c.display = DISPLAY_LCL;
......@@ -2474,6 +2501,8 @@ static int setup_coalesce(const char *coalesce, bool no_source)
return -1;
if (asprintf(&c2c.cl_resort, "offset,%s",
c2c.display == DISPLAY_TOT ?
"tot_hitm" :
c2c.display == DISPLAY_RMT ?
"rmt_hitm,lcl_hitm" :
"lcl_hitm,rmt_hitm") < 0)
......@@ -2496,11 +2525,9 @@ static int perf_c2c__report(int argc, const char **argv)
const char *display = NULL;
const char *coalesce = NULL;
bool no_source = false;
const struct option c2c_options[] = {
const struct option options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_STRING('i', "input", &input_name, "file",
"the input file to process"),
OPT_INCR('N', "node-info", &c2c.node_info,
......@@ -2520,32 +2547,28 @@ static int perf_c2c__report(int argc, const char **argv)
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
callchain_help, &parse_callchain_opt,
callchain_default_opt),
OPT_STRING('d', "display", &display, NULL, "lcl,rmt"),
OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"),
OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
"coalesce fields: pid,tid,iaddr,dso"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
OPT_PARENT(c2c_options),
OPT_END()
};
int err = 0;
argc = parse_options(argc, argv, c2c_options, report_c2c_usage,
argc = parse_options(argc, argv, options, report_c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (argc)
usage_with_options(report_c2c_usage, c2c_options);
usage_with_options(report_c2c_usage, options);
if (c2c.stats_only)
c2c.use_stdio = true;
if (c2c.use_stdio)
use_browser = 0;
else
use_browser = 1;
setup_browser(false);
if (!input_name || !strlen(input_name))
input_name = "perf.data";
file.path = input_name;
file.path = input_name;
file.force = symbol_conf.force;
err = setup_display(display);
if (err)
......@@ -2568,6 +2591,7 @@ static int perf_c2c__report(int argc, const char **argv)
pr_debug("No memory for session\n");
goto out;
}
err = setup_nodes(session);
if (err) {
pr_err("Failed setup nodes\n");
......@@ -2587,6 +2611,13 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_session;
}
if (c2c.use_stdio)
use_browser = 0;
else
use_browser = 1;
setup_browser(false);
err = perf_session__process_events(session);
if (err) {
pr_err("failed to process sample\n");
......@@ -2605,6 +2636,7 @@ static int perf_c2c__report(int argc, const char **argv)
"tot_loads,"
"ld_fbhit,ld_l1hit,ld_l2hit,"
"ld_lclhit,ld_rmthit",
c2c.display == DISPLAY_TOT ? "tot_hitm" :
c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
);
......@@ -2655,11 +2687,10 @@ static int perf_c2c__record(int argc, const char **argv)
OPT_CALLBACK('e', "event", &event_set, "event",
"event selector. Use 'perf mem record -e list' to list available events",
parse_record_events),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
OPT_PARENT(c2c_options),
OPT_END()
};
......@@ -2731,11 +2762,6 @@ static int perf_c2c__record(int argc, const char **argv)
int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
{
const struct option c2c_options[] = {
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_END()
};
argc = parse_options(argc, argv, c2c_options, c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
......
This diff is collapsed.
......@@ -130,7 +130,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
return err;
}
err = symbol__disassemble(sym, map, 0);
err = symbol__disassemble(sym, map, NULL, 0);
if (err == 0) {
out_assign:
top->sym_filter_entry = he;
......
......@@ -1050,7 +1050,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
(nr_pcnt - 1);
}
err = symbol__disassemble(sym, map, sizeof_bdl);
err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl);
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
......
......@@ -167,7 +167,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
if (map->dso->annotate_warned)
return -1;
err = symbol__disassemble(sym, map, 0);
err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0);
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
......
This diff is collapsed.
......@@ -34,9 +34,11 @@ struct ins_operands {
};
};
struct arch;
struct ins_ops {
void (*free)(struct ins_operands *ops);
int (*parse)(struct ins_operands *ops, struct map *map);
int (*parse)(struct arch *arch, struct ins_operands *ops, struct map *map);
int (*scnprintf)(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops);
};
......@@ -156,7 +158,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
int symbol__alloc_hist(struct symbol *sym);
void symbol__annotate_zero_histograms(struct symbol *sym);
int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize);
int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize);
enum symbol_disassemble_errno {
SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0,
......
......@@ -1481,7 +1481,7 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
group_fd = get_group_fd(evsel, cpu, thread);
retry_open:
pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n",
pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
pid, cpus->map[cpu], group_fd, flags);
FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
......@@ -1490,11 +1490,13 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
group_fd, flags);
if (FD(evsel, cpu, thread) < 0) {
err = -errno;
pr_debug2("sys_perf_event_open failed, error %d\n",
pr_debug2("\nsys_perf_event_open failed, error %d\n",
err);
goto try_fallback;
}
pr_debug2(" = %d\n", FD(evsel, cpu, thread));
if (evsel->bpf_fd >= 0) {
int evt_fd = FD(evsel, cpu, thread);
int bpf_fd = evsel->bpf_fd;
......
......@@ -391,6 +391,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
#define EVSEL__PRINT_ONELINE (1<<4)
#define EVSEL__PRINT_SRCLINE (1<<5)
#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6)
#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7)
struct callchain_cursor;
......
......@@ -108,7 +108,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW;
char s = print_oneline ? ' ' : '\t';
bool first = true;
if (sample->callchain) {
struct addr_location node_al;
......@@ -124,6 +126,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
if (print_arrow && !first)
printed += fprintf(fp, " <-");
if (print_ip)
printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
......@@ -137,7 +142,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
if (print_symoffset) {
printed += __symbol__fprintf_symname_offs(node->sym, &node_al,
print_unknown_as_addr, fp);
print_unknown_as_addr,
true, fp);
} else {
printed += __symbol__fprintf_symname(node->sym, &node_al,
print_unknown_as_addr, fp);
......@@ -157,6 +163,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
printed += fprintf(fp, "\n");
callchain_cursor_advance(cursor);
first = false;
}
}
......@@ -188,7 +195,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
printed += fprintf(fp, " ");
if (print_symoffset) {
printed += __symbol__fprintf_symname_offs(al->sym, al,
print_unknown_as_addr, fp);
print_unknown_as_addr,
true, fp);
} else {
printed += __symbol__fprintf_symname(al->sym, al,
print_unknown_as_addr, fp);
......
......@@ -280,6 +280,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
u64 lock = data_src->mem_lock;
int err = 0;
#define HITM_INC(__f) \
do { \
stats->__f++; \
stats->tot_hitm++; \
} while (0)
#define P(a, b) PERF_MEM_##a##_##b
stats->nr_entries++;
......@@ -303,7 +309,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
if (lvl & P(LVL, L3 )) {
if (snoop & P(SNOOP, HITM))
stats->lcl_hitm++;
HITM_INC(lcl_hitm);
else
stats->ld_llchit++;
}
......@@ -331,7 +337,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
if (snoop & P(SNOOP, HIT))
stats->rmt_hit++;
else if (snoop & P(SNOOP, HITM))
stats->rmt_hitm++;
HITM_INC(rmt_hitm);
}
if ((lvl & P(LVL, MISS)))
......@@ -364,6 +370,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
}
#undef P
#undef HITM_INC
return err;
}
......@@ -390,6 +397,7 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
stats->ld_llchit += add->ld_llchit;
stats->lcl_hitm += add->lcl_hitm;
stats->rmt_hitm += add->rmt_hitm;
stats->tot_hitm += add->tot_hitm;
stats->rmt_hit += add->rmt_hit;
stats->lcl_dram += add->lcl_dram;
stats->rmt_dram += add->rmt_dram;
......
......@@ -59,6 +59,7 @@ struct c2c_stats {
u32 ld_llchit; /* count of loads that hit LLC */
u32 lcl_hitm; /* count of loads with local HITM */
u32 rmt_hitm; /* count of loads with remote HITM */
u32 tot_hitm; /* count of loads with local and remote HITM */
u32 rmt_hit; /* count of loads with remote hit clean; */
u32 lcl_dram; /* count of loads miss to local DRAM */
u32 rmt_dram; /* count of loads miss to remote DRAM */
......
......@@ -282,7 +282,8 @@ int symbol__annotation_init(void);
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
const struct addr_location *al,
bool unknown_as_addr, FILE *fp);
bool unknown_as_addr,
bool print_offsets, FILE *fp);
size_t symbol__fprintf_symname_offs(const struct symbol *sym,
const struct addr_location *al, FILE *fp);
size_t __symbol__fprintf_symname(const struct symbol *sym,
......
......@@ -15,14 +15,15 @@ size_t symbol__fprintf(struct symbol *sym, FILE *fp)
size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
const struct addr_location *al,
bool unknown_as_addr, FILE *fp)
bool unknown_as_addr,
bool print_offsets, FILE *fp)
{
unsigned long offset;
size_t length;
if (sym && sym->name) {
length = fprintf(fp, "%s", sym->name);
if (al) {
if (al && print_offsets) {
if (al->addr < sym->end)
offset = al->addr - sym->start;
else
......@@ -40,19 +41,19 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym,
const struct addr_location *al,
FILE *fp)
{
return __symbol__fprintf_symname_offs(sym, al, false, fp);
return __symbol__fprintf_symname_offs(sym, al, false, true, fp);
}
size_t __symbol__fprintf_symname(const struct symbol *sym,
const struct addr_location *al,
bool unknown_as_addr, FILE *fp)
{
return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp);
return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp);
}
size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
{
return __symbol__fprintf_symname_offs(sym, NULL, false, fp);
return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp);
}
size_t dso__fprintf_symbols_by_name(struct dso *dso,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment