Commit 64c349f4 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

mm: add infrastructure for get_user_pages_fast() benchmarking

Performance of get_user_pages_fast() is critical for some workloads, but
it's tricky to test it directly.

This patch provides /sys/kernel/debug/gup_benchmark that helps with
testing performance of it.

See tools/testing/selftests/vm/gup_benchmark.c for userspace
counterpart.

Link: http://lkml.kernel.org/r/20170908215603.9189-2-kirill.shutemov@linux.intel.comSigned-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 15df03c8
...@@ -756,3 +756,12 @@ config PERCPU_STATS ...@@ -756,3 +756,12 @@ config PERCPU_STATS
This feature collects and exposes statistics via debugfs. The This feature collects and exposes statistics via debugfs. The
information includes global and per chunk statistics, which can information includes global and per chunk statistics, which can
be used to help understand percpu memory usage. be used to help understand percpu memory usage.
config GUP_BENCHMARK
bool "Enable infrastructure for get_user_pages_fast() benchmarking"
default n
help
Provides /sys/kernel/debug/gup_benchmark that helps with testing
performance of get_user_pages_fast().
See tools/testing/selftests/vm/gup_benchmark.c
...@@ -80,6 +80,7 @@ obj-$(CONFIG_PAGE_COUNTER) += page_counter.o ...@@ -80,6 +80,7 @@ obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
obj-$(CONFIG_GUP_BENCHMARK) += gup_benchmark.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
......
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/ktime.h>
#include <linux/debugfs.h>
#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
struct gup_benchmark {
__u64 delta_usec;
__u64 addr;
__u64 size;
__u32 nr_pages_per_call;
__u32 flags;
};
static int __gup_benchmark_ioctl(unsigned int cmd,
struct gup_benchmark *gup)
{
ktime_t start_time, end_time;
unsigned long i, nr, nr_pages, addr, next;
struct page **pages;
nr_pages = gup->size / PAGE_SIZE;
pages = kvmalloc(sizeof(void *) * nr_pages, GFP_KERNEL);
if (!pages)
return -ENOMEM;
i = 0;
nr = gup->nr_pages_per_call;
start_time = ktime_get();
for (addr = gup->addr; addr < gup->addr + gup->size; addr = next) {
if (nr != gup->nr_pages_per_call)
break;
next = addr + nr * PAGE_SIZE;
if (next > gup->addr + gup->size) {
next = gup->addr + gup->size;
nr = (next - addr) / PAGE_SIZE;
}
nr = get_user_pages_fast(addr, nr, gup->flags & 1, pages + i);
i += nr;
}
end_time = ktime_get();
gup->delta_usec = ktime_us_delta(end_time, start_time);
gup->size = addr - gup->addr;
for (i = 0; i < nr_pages; i++) {
if (!pages[i])
break;
put_page(pages[i]);
}
kvfree(pages);
return 0;
}
static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,
unsigned long arg)
{
struct gup_benchmark gup;
int ret;
if (cmd != GUP_FAST_BENCHMARK)
return -EINVAL;
if (copy_from_user(&gup, (void __user *)arg, sizeof(gup)))
return -EFAULT;
ret = __gup_benchmark_ioctl(cmd, &gup);
if (ret)
return ret;
if (copy_to_user((void __user *)arg, &gup, sizeof(gup)))
return -EFAULT;
return 0;
}
static const struct file_operations gup_benchmark_fops = {
.open = nonseekable_open,
.unlocked_ioctl = gup_benchmark_ioctl,
};
static int gup_benchmark_init(void)
{
void *ret;
ret = debugfs_create_file_unsafe("gup_benchmark", 0600, NULL, NULL,
&gup_benchmark_fops);
if (!ret)
pr_warn("Failed to create gup_benchmark in debugfs");
return 0;
}
late_initcall(gup_benchmark_init);
...@@ -18,6 +18,7 @@ TEST_GEN_FILES += transhuge-stress ...@@ -18,6 +18,7 @@ TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd TEST_GEN_FILES += userfaultfd
TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock-random-test
TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += virtual_address_range
TEST_GEN_FILES += gup_benchmark
TEST_PROGS := run_vmtests TEST_PROGS := run_vmtests
......
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <linux/types.h>
#define MB (1UL << 20)
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
struct gup_benchmark {
__u64 delta_usec;
__u64 addr;
__u64 size;
__u32 nr_pages_per_call;
__u32 flags;
};
int main(int argc, char **argv)
{
struct gup_benchmark gup;
unsigned long size = 128 * MB;
int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
char *p;
while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) {
switch (opt) {
case 'm':
size = atoi(optarg) * MB;
break;
case 'r':
repeats = atoi(optarg);
break;
case 'n':
nr_pages = atoi(optarg);
break;
case 't':
thp = 1;
break;
case 'T':
thp = 0;
break;
case 'w':
write = 1;
default:
return -1;
}
}
gup.nr_pages_per_call = nr_pages;
gup.flags = write;
fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
if (fd == -1)
perror("open"), exit(1);
p = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (p == MAP_FAILED)
perror("mmap"), exit(1);
gup.addr = (unsigned long)p;
if (thp == 1)
madvise(p, size, MADV_HUGEPAGE);
else if (thp == 0)
madvise(p, size, MADV_NOHUGEPAGE);
for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
p[0] = 0;
for (i = 0; i < repeats; i++) {
gup.size = size;
if (ioctl(fd, GUP_FAST_BENCHMARK, &gup))
perror("ioctl"), exit(1);
printf("Time: %lld us", gup.delta_usec);
if (gup.size != size)
printf(", truncated (size: %lld)", gup.size);
printf("\n");
}
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment