Commit f2b91386 authored by Adrian Hunter's avatar Adrian Hunter Committed by Arnaldo Carvalho de Melo

perf intel-pt: Support itrace A option to approximate IPC

Normally, for cycle-acccurate mode, IPC values are an exact number of
instructions and cycles. Due to the granularity of timestamps, that happens
only when a CYC packet correlates to the event.

Support the itrace 'A' option, to use instead, the number of cycles
associated with the current timestamp. This provides IPC information for
every change of timestamp, but at the expense of accuracy. Due to the
granularity of timestamps, the actual number of cycles increases even
though the cycles reported does not. The number of instructions is known,
but if IPC is reported, cycles can be too low and so IPC is too high. Note
that inaccuracy decreases as the period of sampling increases i.e. if the
number of cycles is too low by a small amount, that becomes less
significant if the number of cycles is large.

Furthermore, it can be used in conjunction with dlfilter-show-cycles.so
to provide higher granularity cycle information.
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarAdrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20211027080334.365596-4-adrian.hunter@intel.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent b6778fe1
...@@ -157,6 +157,15 @@ of instructions and number of cycles since the last update, and thus represent ...@@ -157,6 +157,15 @@ of instructions and number of cycles since the last update, and thus represent
the average IPC since the last IPC for that event type. Note IPC for "branches" the average IPC since the last IPC for that event type. Note IPC for "branches"
events is calculated separately from IPC for "instructions" events. events is calculated separately from IPC for "instructions" events.
Even with the 'cyc' config term, it is possible to produce IPC information for
every change of timestamp, but at the expense of accuracy. That is selected by
specifying the itrace 'A' option. Due to the granularity of timestamps, the
actual number of cycles increases even though the cycles reported does not.
The number of instructions is known, but if IPC is reported, cycles can be too
low and so IPC is too high. Note that inaccuracy decreases as the period of
sampling increases i.e. if the number of cycles is too low by a small amount,
that becomes less significant if the number of cycles is large.
Also note that the IPC instruction count may or may not include the current Also note that the IPC instruction count may or may not include the current
instruction. If the cycle count is associated with an asynchronous branch instruction. If the cycle count is associated with an asynchronous branch
(e.g. page fault or interrupt), then the instruction count does not include the (e.g. page fault or interrupt), then the instruction count does not include the
...@@ -873,6 +882,7 @@ The letters are: ...@@ -873,6 +882,7 @@ The letters are:
L synthesize last branch entries on existing event records L synthesize last branch entries on existing event records
s skip initial number of events s skip initial number of events
q quicker (less detailed) decoding q quicker (less detailed) decoding
A approximate IPC
Z prefer to ignore timestamps (so-called "timeless" decoding) Z prefer to ignore timestamps (so-called "timeless" decoding)
"Instructions" events look like they were recorded by "perf record -e "Instructions" events look like they were recorded by "perf record -e
......
...@@ -608,6 +608,7 @@ static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) ...@@ -608,6 +608,7 @@ static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder)
{ {
decoder->sample_timestamp = decoder->timestamp; decoder->sample_timestamp = decoder->timestamp;
decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
decoder->state.cycles = decoder->tot_cyc_cnt;
} }
static void intel_pt_reposition(struct intel_pt_decoder *decoder) static void intel_pt_reposition(struct intel_pt_decoder *decoder)
......
...@@ -218,6 +218,7 @@ struct intel_pt_state { ...@@ -218,6 +218,7 @@ struct intel_pt_state {
uint64_t to_ip; uint64_t to_ip;
uint64_t tot_insn_cnt; uint64_t tot_insn_cnt;
uint64_t tot_cyc_cnt; uint64_t tot_cyc_cnt;
uint64_t cycles;
uint64_t timestamp; uint64_t timestamp;
uint64_t est_timestamp; uint64_t est_timestamp;
uint64_t trace_nr; uint64_t trace_nr;
......
...@@ -172,6 +172,7 @@ struct intel_pt_queue { ...@@ -172,6 +172,7 @@ struct intel_pt_queue {
bool step_through_buffers; bool step_through_buffers;
bool use_buffer_pid_tid; bool use_buffer_pid_tid;
bool sync_switch; bool sync_switch;
bool sample_ipc;
pid_t pid, tid; pid_t pid, tid;
int cpu; int cpu;
int switch_state; int switch_state;
...@@ -1581,7 +1582,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) ...@@ -1581,7 +1582,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.branch_stack = (struct branch_stack *)&dummy_bs; sample.branch_stack = (struct branch_stack *)&dummy_bs;
} }
if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) if (ptq->sample_ipc)
sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
if (sample.cyc_cnt) { if (sample.cyc_cnt) {
sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
...@@ -1632,7 +1633,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ...@@ -1632,7 +1633,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
else else
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) if (ptq->sample_ipc)
sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
if (sample.cyc_cnt) { if (sample.cyc_cnt) {
sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
...@@ -2245,8 +2246,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ...@@ -2245,8 +2246,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false; ptq->have_sample = false;
ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; if (pt->synth_opts.approx_ipc) {
ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
ptq->ipc_cyc_cnt = ptq->state->cycles;
ptq->sample_ipc = true;
} else {
ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC;
}
/* /*
* Do PEBS first to allow for the possibility that the PEBS timestamp * Do PEBS first to allow for the possibility that the PEBS timestamp
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment