Commit 6db175c7 authored by Ingo Molnar's avatar Ingo Molnar Committed by Arnaldo Carvalho de Melo

perf bench: Remove the prefaulting complication from 'perf bench mem mem*'

So 'perf bench mem memcpy/memset' has elaborate code to measure
memcpy()/memset() performance both with freshly allocated buffers (which
includes initial page fault overhead) and with preallocated buffers.

But the thing is, the resulting bandwidth results are mostly
meaningless, because page faults dominate so much of the cost.

It might make sense to measure cache cold vs. cache hot performance, but
the code does not do this.

So remove this complication, and always prefault the ranges before using
them.
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1445241870-24854-6-git-send-email-mingo@kernel.org
[ Remove --no-prefault, --only-prefault from docs, noticed by David Ahern ]
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 9b2fa7f3
...@@ -157,14 +157,6 @@ Repeat memcpy invocation this number of times. ...@@ -157,14 +157,6 @@ Repeat memcpy invocation this number of times.
--cycle:: --cycle::
Use perf's cpu-cycles event instead of gettimeofday syscall. Use perf's cpu-cycles event instead of gettimeofday syscall.
-o::
--only-prefault::
Show only the result with page faults before memcpy.
-n::
--no-prefault::
Show only the result without page faults before memcpy.
*memset*:: *memset*::
Suite for evaluating performance of simple memory set in various ways. Suite for evaluating performance of simple memory set in various ways.
...@@ -189,14 +181,6 @@ Repeat memset invocation this number of times. ...@@ -189,14 +181,6 @@ Repeat memset invocation this number of times.
--cycle:: --cycle::
Use perf's cpu-cycles event instead of gettimeofday syscall. Use perf's cpu-cycles event instead of gettimeofday syscall.
-o::
--only-prefault::
Show only the result with page faults before memset.
-n::
--no-prefault::
Show only the result without page faults before memset.
SUITES FOR 'numa' SUITES FOR 'numa'
~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~
*mem*:: *mem*::
......
...@@ -28,8 +28,6 @@ static const char *routine = "all"; ...@@ -28,8 +28,6 @@ static const char *routine = "all";
static int iterations = 1; static int iterations = 1;
static bool use_cycle; static bool use_cycle;
static int cycle_fd; static int cycle_fd;
static bool only_prefault;
static bool no_prefault;
static const struct option options[] = { static const struct option options[] = {
OPT_STRING('l', "length", &length_str, "1MB", OPT_STRING('l', "length", &length_str, "1MB",
...@@ -41,10 +39,6 @@ static const struct option options[] = { ...@@ -41,10 +39,6 @@ static const struct option options[] = {
"repeat memcpy() invocation this number of times"), "repeat memcpy() invocation this number of times"),
OPT_BOOLEAN('c', "cycle", &use_cycle, OPT_BOOLEAN('c', "cycle", &use_cycle,
"Use cycles event instead of gettimeofday() for measuring"), "Use cycles event instead of gettimeofday() for measuring"),
OPT_BOOLEAN('o', "only-prefault", &only_prefault,
"Show only the result with page faults before memcpy()"),
OPT_BOOLEAN('n', "no-prefault", &no_prefault,
"Show only the result without page faults before memcpy()"),
OPT_END() OPT_END()
}; };
...@@ -110,103 +104,60 @@ static double timeval2double(struct timeval *ts) ...@@ -110,103 +104,60 @@ static double timeval2double(struct timeval *ts)
return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
} }
#define print_bps(x) do { \ #define print_bps(x) do { \
if (x < K) \ if (x < K) \
printf(" %14lf B/Sec", x); \ printf(" %14lf B/Sec\n", x); \
else if (x < K * K) \ else if (x < K * K) \
printf(" %14lfd KB/Sec", x / K); \ printf(" %14lfd KB/Sec\n", x / K); \
else if (x < K * K * K) \ else if (x < K * K * K) \
printf(" %14lf MB/Sec", x / K / K); \ printf(" %14lf MB/Sec\n", x / K / K); \
else \ else \
printf(" %14lf GB/Sec", x / K / K / K); \ printf(" %14lf GB/Sec\n", x / K / K / K); \
} while (0) } while (0)
struct bench_mem_info { struct bench_mem_info {
const struct routine *routines; const struct routine *routines;
u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); u64 (*do_cycle)(const struct routine *r, size_t len);
double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); double (*do_gettimeofday)(const struct routine *r, size_t len);
const char *const *usage; const char *const *usage;
}; };
static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
{ {
const struct routine *r = &info->routines[r_idx]; const struct routine *r = &info->routines[r_idx];
double result_bps[2]; double result_bps = 0.0;
u64 result_cycle[2]; u64 result_cycle = 0;
int prefault = no_prefault ? 0 : 1;
result_cycle[0] = result_cycle[1] = 0ULL;
result_bps[0] = result_bps[1] = 0.0;
printf("Routine %s (%s)\n", r->name, r->desc); printf("Routine %s (%s)\n", r->name, r->desc);
if (bench_format == BENCH_FORMAT_DEFAULT) if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s Bytes ...\n\n", length_str); printf("# Copying %s Bytes ...\n\n", length_str);
if (!only_prefault && prefault) { if (use_cycle) {
/* Show both results: */ result_cycle = info->do_cycle(r, len);
if (use_cycle) {
result_cycle[0] = info->do_cycle(r, len, false);
result_cycle[1] = info->do_cycle(r, len, true);
} else {
result_bps[0] = info->do_gettimeofday(r, len, false);
result_bps[1] = info->do_gettimeofday(r, len, true);
}
} else { } else {
if (use_cycle) result_bps = info->do_gettimeofday(r, len);
result_cycle[prefault] = info->do_cycle(r, len, only_prefault);
else
result_bps[prefault] = info->do_gettimeofday(r, len, only_prefault);
} }
switch (bench_format) { switch (bench_format) {
case BENCH_FORMAT_DEFAULT: case BENCH_FORMAT_DEFAULT:
if (!only_prefault && prefault) { if (use_cycle) {
if (use_cycle) { printf(" %14lf Cycle/Byte\n", (double)result_cycle/totallen);
printf(" %14lf Cycle/Byte\n",
(double)result_cycle[0]
/ totallen);
printf(" %14lf Cycle/Byte (with prefault)\n",
(double)result_cycle[1]
/ totallen);
} else {
print_bps(result_bps[0]);
printf("\n");
print_bps(result_bps[1]);
printf(" (with prefault)\n");
}
} else { } else {
if (use_cycle) { print_bps(result_bps);
printf(" %14lf Cycle/Byte",
(double)result_cycle[prefault]
/ totallen);
} else
print_bps(result_bps[prefault]);
printf("%s\n", only_prefault ? " (with prefault)" : "");
} }
break; break;
case BENCH_FORMAT_SIMPLE: case BENCH_FORMAT_SIMPLE:
if (!only_prefault && prefault) { if (use_cycle) {
if (use_cycle) { printf("%lf\n", (double)result_cycle/totallen);
printf("%lf %lf\n",
(double)result_cycle[0] / totallen,
(double)result_cycle[1] / totallen);
} else {
printf("%lf %lf\n",
result_bps[0], result_bps[1]);
}
} else { } else {
if (use_cycle) { printf("%lf\n", result_bps);
printf("%lf\n", (double)result_cycle[prefault]
/ totallen);
} else
printf("%lf\n", result_bps[prefault]);
} }
break; break;
default: default:
/* Reaching this means there's some disaster: */ BUG_ON(1);
die("unknown format: %d\n", bench_format);
break; break;
} }
} }
...@@ -219,11 +170,6 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * ...@@ -219,11 +170,6 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
argc = parse_options(argc, argv, options, info->usage, 0); argc = parse_options(argc, argv, options, info->usage, 0);
if (no_prefault && only_prefault) {
fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
return 1;
}
if (use_cycle) if (use_cycle)
init_cycle(); init_cycle();
...@@ -235,10 +181,6 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * ...@@ -235,10 +181,6 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 1; return 1;
} }
/* Same as without specifying either of prefault and no-prefault: */
if (only_prefault && no_prefault)
only_prefault = no_prefault = false;
if (!strncmp(routine, "all", 3)) { if (!strncmp(routine, "all", 3)) {
for (i = 0; info->routines[i].name; i++) for (i = 0; info->routines[i].name; i++)
__bench_mem_routine(info, i, len, totallen); __bench_mem_routine(info, i, len, totallen);
...@@ -278,7 +220,7 @@ static void memcpy_alloc_mem(void **dst, void **src, size_t length) ...@@ -278,7 +220,7 @@ static void memcpy_alloc_mem(void **dst, void **src, size_t length)
memset(*src, 0, length); memset(*src, 0, length);
} }
static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) static u64 do_memcpy_cycle(const struct routine *r, size_t len)
{ {
u64 cycle_start = 0ULL, cycle_end = 0ULL; u64 cycle_start = 0ULL, cycle_end = 0ULL;
void *src = NULL, *dst = NULL; void *src = NULL, *dst = NULL;
...@@ -287,8 +229,11 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) ...@@ -287,8 +229,11 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
memcpy_alloc_mem(&dst, &src, len); memcpy_alloc_mem(&dst, &src, len);
if (prefault) /*
fn(dst, src, len); * We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
*/
fn(dst, src, len);
cycle_start = get_cycle(); cycle_start = get_cycle();
for (i = 0; i < iterations; ++i) for (i = 0; i < iterations; ++i)
...@@ -300,7 +245,7 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) ...@@ -300,7 +245,7 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
return cycle_end - cycle_start; return cycle_end - cycle_start;
} }
static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool prefault) static double do_memcpy_gettimeofday(const struct routine *r, size_t len)
{ {
struct timeval tv_start, tv_end, tv_diff; struct timeval tv_start, tv_end, tv_diff;
memcpy_t fn = r->fn.memcpy; memcpy_t fn = r->fn.memcpy;
...@@ -309,8 +254,11 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool p ...@@ -309,8 +254,11 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool p
memcpy_alloc_mem(&dst, &src, len); memcpy_alloc_mem(&dst, &src, len);
if (prefault) /*
fn(dst, src, len); * We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
*/
fn(dst, src, len);
BUG_ON(gettimeofday(&tv_start, NULL)); BUG_ON(gettimeofday(&tv_start, NULL));
for (i = 0; i < iterations; ++i) for (i = 0; i < iterations; ++i)
...@@ -321,6 +269,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool p ...@@ -321,6 +269,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool p
free(src); free(src);
free(dst); free(dst);
return (double)(((double)len * iterations) / timeval2double(&tv_diff)); return (double)(((double)len * iterations) / timeval2double(&tv_diff));
} }
...@@ -343,7 +292,7 @@ static void memset_alloc_mem(void **dst, size_t length) ...@@ -343,7 +292,7 @@ static void memset_alloc_mem(void **dst, size_t length)
die("memory allocation failed - maybe length is too large?\n"); die("memory allocation failed - maybe length is too large?\n");
} }
static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) static u64 do_memset_cycle(const struct routine *r, size_t len)
{ {
u64 cycle_start = 0ULL, cycle_end = 0ULL; u64 cycle_start = 0ULL, cycle_end = 0ULL;
memset_t fn = r->fn.memset; memset_t fn = r->fn.memset;
...@@ -352,8 +301,11 @@ static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) ...@@ -352,8 +301,11 @@ static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
memset_alloc_mem(&dst, len); memset_alloc_mem(&dst, len);
if (prefault) /*
fn(dst, -1, len); * We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
*/
fn(dst, -1, len);
cycle_start = get_cycle(); cycle_start = get_cycle();
for (i = 0; i < iterations; ++i) for (i = 0; i < iterations; ++i)
...@@ -364,8 +316,7 @@ static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) ...@@ -364,8 +316,7 @@ static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
return cycle_end - cycle_start; return cycle_end - cycle_start;
} }
static double do_memset_gettimeofday(const struct routine *r, size_t len, static double do_memset_gettimeofday(const struct routine *r, size_t len)
bool prefault)
{ {
struct timeval tv_start, tv_end, tv_diff; struct timeval tv_start, tv_end, tv_diff;
memset_t fn = r->fn.memset; memset_t fn = r->fn.memset;
...@@ -374,8 +325,11 @@ static double do_memset_gettimeofday(const struct routine *r, size_t len, ...@@ -374,8 +325,11 @@ static double do_memset_gettimeofday(const struct routine *r, size_t len,
memset_alloc_mem(&dst, len); memset_alloc_mem(&dst, len);
if (prefault) /*
fn(dst, -1, len); * We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
*/
fn(dst, -1, len);
BUG_ON(gettimeofday(&tv_start, NULL)); BUG_ON(gettimeofday(&tv_start, NULL));
for (i = 0; i < iterations; ++i) for (i = 0; i < iterations; ++i)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment