Commit c4ee0625 authored by Jin Yao's avatar Jin Yao Committed by Arnaldo Carvalho de Melo

perf report: Calculate the average cycles of iterations

The branch history code has a loop detection function. With this, we can
get the number of iterations by calculating the removed loops.

While it would be nice for knowing the average cycles of iterations.
This patch adds up the cycles in branch entries of removed loops and
save the result to the next branch entry (e.g. branch entry A).

Finally it will display the iteration number and average cycles at the
"from" of branch entry A.

For example:
perf record -g -j any,save_type ./div
perf report --branch-history --no-children --stdio

--22.63%--main div.c:42 (RET CROSS_2M)
          compute_flag div.c:28 (cycles:2 iter:173115 avg_cycles:2)
          |
           --10.73%--compute_flag div.c:27 (RET CROSS_2M)
                     rand rand.c:28 (cycles:1)
                     rand rand.c:28 (RET CROSS_2M)
                     __random random.c:298 (cycles:1)
                     __random random.c:297 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (RET CROSS_2M)
Signed-off-by: default avatarYao Jin <yao.jin@linux.intel.com>
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1502111115-18305-1-git-send-email-yao.jin@linux.intel.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 1b2f76d7
...@@ -931,11 +931,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, ...@@ -931,11 +931,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
browser->show_dso); browser->show_dso);
if (symbol_conf.show_branchflag_count) { if (symbol_conf.show_branchflag_count) {
if (need_percent) callchain_list_counts__printf_value(chain, NULL,
callchain_list_counts__printf_value(node, chain, NULL,
buf, sizeof(buf));
else
callchain_list_counts__printf_value(NULL, chain, NULL,
buf, sizeof(buf)); buf, sizeof(buf));
if (asprintf(&alloc_str2, "%s%s", str, buf) < 0) if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
......
...@@ -124,11 +124,7 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, ...@@ -124,11 +124,7 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
str = callchain_list__sym_name(chain, bf, sizeof(bf), false); str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
if (symbol_conf.show_branchflag_count) { if (symbol_conf.show_branchflag_count) {
if (!period) callchain_list_counts__printf_value(chain, NULL,
callchain_list_counts__printf_value(node, chain, NULL,
buf, sizeof(buf));
else
callchain_list_counts__printf_value(NULL, chain, NULL,
buf, sizeof(buf)); buf, sizeof(buf));
if (asprintf(&alloc_str, "%s%s", str, buf) < 0) if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
...@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, ...@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
if (symbol_conf.show_branchflag_count) if (symbol_conf.show_branchflag_count)
ret += callchain_list_counts__printf_value( ret += callchain_list_counts__printf_value(
NULL, chain, fp, NULL, 0); chain, fp, NULL, 0);
ret += fprintf(fp, "\n"); ret += fprintf(fp, "\n");
if (++entries_printed == callchain_param.print_limit) if (++entries_printed == callchain_param.print_limit)
......
...@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) ...@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
call->cycles_count = call->cycles_count =
cursor_node->branch_flags.cycles; cursor_node->branch_flags.cycles;
call->iter_count = cursor_node->nr_loop_iter; call->iter_count = cursor_node->nr_loop_iter;
call->samples_count = cursor_node->samples; call->iter_cycles = cursor_node->iter_cycles;
} }
} }
...@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node, ...@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
cnode->cycles_count += cnode->cycles_count +=
node->branch_flags.cycles; node->branch_flags.cycles;
cnode->iter_count += node->nr_loop_iter; cnode->iter_count += node->nr_loop_iter;
cnode->samples_count += node->samples; cnode->iter_cycles += node->iter_cycles;
} }
} }
...@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor, ...@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,
int callchain_cursor_append(struct callchain_cursor *cursor, int callchain_cursor_append(struct callchain_cursor *cursor,
u64 ip, struct map *map, struct symbol *sym, u64 ip, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags, bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples, u64 branch_from) int nr_loop_iter, u64 iter_cycles, u64 branch_from)
{ {
struct callchain_cursor_node *node = *cursor->last; struct callchain_cursor_node *node = *cursor->last;
...@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, ...@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->sym = sym; node->sym = sym;
node->branch = branch; node->branch = branch;
node->nr_loop_iter = nr_loop_iter; node->nr_loop_iter = nr_loop_iter;
node->samples = samples; node->iter_cycles = iter_cycles;
if (flags) if (flags)
memcpy(&node->branch_flags, flags, memcpy(&node->branch_flags, flags,
...@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize, ...@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize,
static int branch_from_str(char *bf, int bfsize, static int branch_from_str(char *bf, int bfsize,
u64 branch_count, u64 branch_count,
u64 cycles_count, u64 iter_count, u64 cycles_count, u64 iter_count,
u64 samples_count) u64 iter_cycles)
{ {
int printed = 0, i = 0; int printed = 0, i = 0;
u64 cycles; u64 cycles;
...@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize, ...@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize,
bf + printed, bfsize - printed); bf + printed, bfsize - printed);
} }
if (iter_count && samples_count) { if (iter_count) {
printed += count_pri64_printf(i++, "iterations", printed += count_pri64_printf(i++, "iter",
iter_count / samples_count, iter_count,
bf + printed, bfsize - printed);
printed += count_pri64_printf(i++, "avg_cycles",
iter_cycles / iter_count,
bf + printed, bfsize - printed); bf + printed, bfsize - printed);
} }
...@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize, ...@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize,
static int counts_str_build(char *bf, int bfsize, static int counts_str_build(char *bf, int bfsize,
u64 branch_count, u64 predicted_count, u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count, u64 abort_count, u64 cycles_count,
u64 iter_count, u64 samples_count, u64 iter_count, u64 iter_cycles,
struct branch_type_stat *brtype_stat) struct branch_type_stat *brtype_stat)
{ {
int printed; int printed;
...@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize, ...@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize,
predicted_count, abort_count, brtype_stat); predicted_count, abort_count, brtype_stat);
} else { } else {
printed = branch_from_str(bf, bfsize, branch_count, printed = branch_from_str(bf, bfsize, branch_count,
cycles_count, iter_count, samples_count); cycles_count, iter_count, iter_cycles);
} }
if (!printed) if (!printed)
...@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize, ...@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize,
static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
u64 branch_count, u64 predicted_count, u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count, u64 abort_count, u64 cycles_count,
u64 iter_count, u64 samples_count, u64 iter_count, u64 iter_cycles,
struct branch_type_stat *brtype_stat) struct branch_type_stat *brtype_stat)
{ {
char str[256]; char str[256];
counts_str_build(str, sizeof(str), branch_count, counts_str_build(str, sizeof(str), branch_count,
predicted_count, abort_count, cycles_count, predicted_count, abort_count, cycles_count,
iter_count, samples_count, brtype_stat); iter_count, iter_cycles, brtype_stat);
if (fp) if (fp)
return fprintf(fp, "%s", str); return fprintf(fp, "%s", str);
...@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, ...@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
return scnprintf(bf, bfsize, "%s", str); return scnprintf(bf, bfsize, "%s", str);
} }
int callchain_list_counts__printf_value(struct callchain_node *node, int callchain_list_counts__printf_value(struct callchain_list *clist,
struct callchain_list *clist,
FILE *fp, char *bf, int bfsize) FILE *fp, char *bf, int bfsize)
{ {
u64 branch_count, predicted_count; u64 branch_count, predicted_count;
u64 abort_count, cycles_count; u64 abort_count, cycles_count;
u64 iter_count = 0, samples_count = 0; u64 iter_count, iter_cycles;
branch_count = clist->branch_count; branch_count = clist->branch_count;
predicted_count = clist->predicted_count; predicted_count = clist->predicted_count;
abort_count = clist->abort_count; abort_count = clist->abort_count;
cycles_count = clist->cycles_count; cycles_count = clist->cycles_count;
iter_count = clist->iter_count;
if (node) { iter_cycles = clist->iter_cycles;
struct callchain_list *call;
list_for_each_entry(call, &node->val, list) {
iter_count += call->iter_count;
samples_count += call->samples_count;
}
}
return callchain_counts_printf(fp, bf, bfsize, branch_count, return callchain_counts_printf(fp, bf, bfsize, branch_count,
predicted_count, abort_count, predicted_count, abort_count,
cycles_count, iter_count, samples_count, cycles_count, iter_count, iter_cycles,
&clist->brtype_stat); &clist->brtype_stat);
} }
...@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst, ...@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
rc = callchain_cursor_append(dst, node->ip, node->map, node->sym, rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
node->branch, &node->branch_flags, node->branch, &node->branch_flags,
node->nr_loop_iter, node->samples, node->nr_loop_iter,
node->iter_cycles,
node->branch_from); node->branch_from);
if (rc) if (rc)
break; break;
......
...@@ -119,7 +119,7 @@ struct callchain_list { ...@@ -119,7 +119,7 @@ struct callchain_list {
u64 abort_count; u64 abort_count;
u64 cycles_count; u64 cycles_count;
u64 iter_count; u64 iter_count;
u64 samples_count; u64 iter_cycles;
struct branch_type_stat brtype_stat; struct branch_type_stat brtype_stat;
char *srcline; char *srcline;
struct list_head list; struct list_head list;
...@@ -139,7 +139,7 @@ struct callchain_cursor_node { ...@@ -139,7 +139,7 @@ struct callchain_cursor_node {
struct branch_flags branch_flags; struct branch_flags branch_flags;
u64 branch_from; u64 branch_from;
int nr_loop_iter; int nr_loop_iter;
int samples; u64 iter_cycles;
struct callchain_cursor_node *next; struct callchain_cursor_node *next;
}; };
...@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) ...@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
struct map *map, struct symbol *sym, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags, bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples, u64 branch_from); int nr_loop_iter, u64 iter_cycles, u64 branch_from);
/* Close a cursor writing session. Initialize for the reader */ /* Close a cursor writing session. Initialize for the reader */
static inline void callchain_cursor_commit(struct callchain_cursor *cursor) static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
...@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node, ...@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
int callchain_node__fprintf_value(struct callchain_node *node, int callchain_node__fprintf_value(struct callchain_node *node,
FILE *fp, u64 total); FILE *fp, u64 total);
int callchain_list_counts__printf_value(struct callchain_node *node, int callchain_list_counts__printf_value(struct callchain_list *clist,
struct callchain_list *clist,
FILE *fp, char *bf, int bfsize); FILE *fp, char *bf, int bfsize);
void free_callchain(struct callchain_root *root); void free_callchain(struct callchain_root *root);
......
...@@ -1675,6 +1675,11 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, ...@@ -1675,6 +1675,11 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
return mi; return mi;
} }
struct iterations {
int nr_loop_iter;
u64 cycles;
};
static int add_callchain_ip(struct thread *thread, static int add_callchain_ip(struct thread *thread,
struct callchain_cursor *cursor, struct callchain_cursor *cursor,
struct symbol **parent, struct symbol **parent,
...@@ -1683,11 +1688,12 @@ static int add_callchain_ip(struct thread *thread, ...@@ -1683,11 +1688,12 @@ static int add_callchain_ip(struct thread *thread,
u64 ip, u64 ip,
bool branch, bool branch,
struct branch_flags *flags, struct branch_flags *flags,
int nr_loop_iter, struct iterations *iter,
int samples,
u64 branch_from) u64 branch_from)
{ {
struct addr_location al; struct addr_location al;
int nr_loop_iter = 0;
u64 iter_cycles = 0;
al.filtered = 0; al.filtered = 0;
al.sym = NULL; al.sym = NULL;
...@@ -1737,9 +1743,15 @@ static int add_callchain_ip(struct thread *thread, ...@@ -1737,9 +1743,15 @@ static int add_callchain_ip(struct thread *thread,
if (symbol_conf.hide_unresolved && al.sym == NULL) if (symbol_conf.hide_unresolved && al.sym == NULL)
return 0; return 0;
if (iter) {
nr_loop_iter = iter->nr_loop_iter;
iter_cycles = iter->cycles;
}
return callchain_cursor_append(cursor, al.addr, al.map, al.sym, return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
branch, flags, nr_loop_iter, samples, branch, flags, nr_loop_iter,
branch_from); iter_cycles, branch_from);
} }
struct branch_info *sample__resolve_bstack(struct perf_sample *sample, struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
...@@ -1760,6 +1772,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, ...@@ -1760,6 +1772,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
return bi; return bi;
} }
static void save_iterations(struct iterations *iter,
struct branch_entry *be, int nr)
{
int i;
iter->nr_loop_iter = nr;
iter->cycles = 0;
for (i = 0; i < nr; i++)
iter->cycles += be[i].flags.cycles;
}
#define CHASHSZ 127 #define CHASHSZ 127
#define CHASHBITS 7 #define CHASHBITS 7
#define NO_ENTRY 0xff #define NO_ENTRY 0xff
...@@ -1767,7 +1791,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, ...@@ -1767,7 +1791,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
#define PERF_MAX_BRANCH_DEPTH 127 #define PERF_MAX_BRANCH_DEPTH 127
/* Remove loops. */ /* Remove loops. */
static int remove_loops(struct branch_entry *l, int nr) static int remove_loops(struct branch_entry *l, int nr,
struct iterations *iter)
{ {
int i, j, off; int i, j, off;
unsigned char chash[CHASHSZ]; unsigned char chash[CHASHSZ];
...@@ -1792,8 +1817,18 @@ static int remove_loops(struct branch_entry *l, int nr) ...@@ -1792,8 +1817,18 @@ static int remove_loops(struct branch_entry *l, int nr)
break; break;
} }
if (is_loop) { if (is_loop) {
j = nr - (i + off);
if (j > 0) {
save_iterations(iter + i + off,
l + i, off);
memmove(iter + i, iter + i + off,
j * sizeof(*iter));
memmove(l + i, l + i + off, memmove(l + i, l + i + off,
(nr - (i + off)) * sizeof(*l)); j * sizeof(*l));
}
nr -= off; nr -= off;
} }
} }
...@@ -1883,7 +1918,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ...@@ -1883,7 +1918,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent, err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip, root_al, &cpumode, ip,
branch, flags, 0, 0, branch, flags, NULL,
branch_from); branch_from);
if (err) if (err)
return (err < 0) ? err : 0; return (err < 0) ? err : 0;
...@@ -1909,7 +1944,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, ...@@ -1909,7 +1944,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries; int i, j, err, nr_entries;
int skip_idx = -1; int skip_idx = -1;
int first_call = 0; int first_call = 0;
int nr_loop_iter;
if (chain) if (chain)
chain_nr = chain->nr; chain_nr = chain->nr;
...@@ -1942,6 +1976,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, ...@@ -1942,6 +1976,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
if (branch && callchain_param.branch_callstack) { if (branch && callchain_param.branch_callstack) {
int nr = min(max_stack, (int)branch->nr); int nr = min(max_stack, (int)branch->nr);
struct branch_entry be[nr]; struct branch_entry be[nr];
struct iterations iter[nr];
if (branch->nr > PERF_MAX_BRANCH_DEPTH) { if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
pr_warning("corrupted branch chain. skipping...\n"); pr_warning("corrupted branch chain. skipping...\n");
...@@ -1972,38 +2007,21 @@ static int thread__resolve_callchain_sample(struct thread *thread, ...@@ -1972,38 +2007,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i] = branch->entries[branch->nr - i - 1]; be[i] = branch->entries[branch->nr - i - 1];
} }
nr_loop_iter = nr; memset(iter, 0, sizeof(struct iterations) * nr);
nr = remove_loops(be, nr); nr = remove_loops(be, nr, iter);
/*
* Get the number of iterations.
* It's only approximation, but good enough in practice.
*/
if (nr_loop_iter > nr)
nr_loop_iter = nr_loop_iter - nr + 1;
else
nr_loop_iter = 0;
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
if (i == nr - 1)
err = add_callchain_ip(thread, cursor, parent, err = add_callchain_ip(thread, cursor, parent,
root_al, root_al,
NULL, be[i].to, NULL, be[i].to,
true, &be[i].flags, true, &be[i].flags,
nr_loop_iter, 1, NULL, be[i].from);
be[i].from);
else
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
0, 0, be[i].from);
if (!err) if (!err)
err = add_callchain_ip(thread, cursor, parent, root_al, err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from, NULL, be[i].from,
true, &be[i].flags, true, &be[i].flags,
0, 0, 0); &iter[i], 0);
if (err == -EINVAL) if (err == -EINVAL)
break; break;
if (err) if (err)
...@@ -2037,7 +2055,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, ...@@ -2037,7 +2055,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent, err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip, root_al, &cpumode, ip,
false, NULL, 0, 0, 0); false, NULL, NULL, 0);
if (err) if (err)
return (err < 0) ? err : 0; return (err < 0) ? err : 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment