Commit 1fc570ad authored by Ingo Molnar's avatar Ingo Molnar

perf stat: Add stalled cycles to the default output

The new default output looks like this:

 Performance counter stats for './loop_1b_instructions':

        236.010686 task-clock               #    0.996 CPUs utilized
                 0 context-switches         #    0.000 M/sec
                 0 CPU-migrations           #    0.000 M/sec
                99 page-faults              #    0.000 M/sec
       756,487,646 cycles                   #    3.205 GHz
       354,938,996 stalled-cycles           #   46.92% of all cycles are idle
     1,001,403,797 instructions             #    1.32  insns per cycle
                                            #    0.35  stalled cycles per insn
       100,279,773 branches                 #  424.895 M/sec
            12,646 branch-misses            #    0.013 % of all branches

        0.236902540  seconds time elapsed

We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.

If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.orgSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 481f988a
...@@ -65,11 +65,10 @@ static struct perf_event_attr default_attrs[] = { ...@@ -65,11 +65,10 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
}; };
...@@ -468,7 +467,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) ...@@ -468,7 +467,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (total) if (total)
ratio = avg * 100 / total; ratio = avg * 100 / total;
fprintf(stderr, " # %8.3f %% of all branches", ratio); fprintf(stderr, " # %5.2f %% of all branches ", ratio);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) { runtime_cacherefs_stats[cpu].n != 0) {
......
...@@ -32,13 +32,13 @@ char debugfs_path[MAXPATHLEN]; ...@@ -32,13 +32,13 @@ char debugfs_path[MAXPATHLEN];
static struct event_symbol event_symbols[] = { static struct event_symbol event_symbols[] = {
{ CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, { CHW(CPU_CYCLES), "cpu-cycles", "cycles" },
{ CHW(STALLED_CYCLES), "stalled-cycles", "idle-cycles" },
{ CHW(INSTRUCTIONS), "instructions", "" }, { CHW(INSTRUCTIONS), "instructions", "" },
{ CHW(CACHE_REFERENCES), "cache-references", "" }, { CHW(CACHE_REFERENCES), "cache-references", "" },
{ CHW(CACHE_MISSES), "cache-misses", "" }, { CHW(CACHE_MISSES), "cache-misses", "" },
{ CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" },
{ CHW(BRANCH_MISSES), "branch-misses", "" }, { CHW(BRANCH_MISSES), "branch-misses", "" },
{ CHW(BUS_CYCLES), "bus-cycles", "" }, { CHW(BUS_CYCLES), "bus-cycles", "" },
{ CHW(STALLED_CYCLES), "stalled-cycles", "" },
{ CSW(CPU_CLOCK), "cpu-clock", "" }, { CSW(CPU_CLOCK), "cpu-clock", "" },
{ CSW(TASK_CLOCK), "task-clock", "" }, { CSW(TASK_CLOCK), "task-clock", "" },
...@@ -67,6 +67,7 @@ static const char *hw_event_names[] = { ...@@ -67,6 +67,7 @@ static const char *hw_event_names[] = {
"branches", "branches",
"branch-misses", "branch-misses",
"bus-cycles", "bus-cycles",
"stalled-cycles",
}; };
static const char *sw_event_names[] = { static const char *sw_event_names[] = {
...@@ -308,7 +309,7 @@ const char *__event_name(int type, u64 config) ...@@ -308,7 +309,7 @@ const char *__event_name(int type, u64 config)
switch (type) { switch (type) {
case PERF_TYPE_HARDWARE: case PERF_TYPE_HARDWARE:
if (config < PERF_COUNT_HW_MAX) if (config < PERF_COUNT_HW_MAX && hw_event_names[config])
return hw_event_names[config]; return hw_event_names[config];
return "unknown-hardware"; return "unknown-hardware";
...@@ -334,7 +335,7 @@ const char *__event_name(int type, u64 config) ...@@ -334,7 +335,7 @@ const char *__event_name(int type, u64 config)
} }
case PERF_TYPE_SOFTWARE: case PERF_TYPE_SOFTWARE:
if (config < PERF_COUNT_SW_MAX) if (config < PERF_COUNT_SW_MAX && sw_event_names[config])
return sw_event_names[config]; return sw_event_names[config];
return "unknown-software"; return "unknown-software";
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment