Commit 3cfc2728 by Kirill Smelkov

virtmem: Benchmarks for pagefault handling

Benchmark the time it takes for virtmem to handle pagefault with noop
loadblk for loadblk both implemented in C and in Python.

On my computer it is:

	name          µs/op
	PagefaultC    269 ± 0%
	pagefault_py  291 ± 0%

Quite a big time in other words.

It turned out to be mostly spent in fallocate'ing pages on tmpfs from
/dev/shm. Part of the above 269 µs/op is taken by freeing (reclaiming)
pages back when benchmarking work size exceed /dev/shm size, and part to

If I limit the work size (via npage in benchmem.c) to be less than whole
/dev/shm it starts to be ~ 170 µs/op and with additional tracing it
shows as something like this:

    	.. on_pagefault_start   0.954 µs
    	.. vma_on_pagefault_pre 0.954 µs
    	.. ramh_alloc_page_pre  0.954 µs
    	.. ramh_alloc_page      169.992 µs
    	.. vma_on_pagefault     172.853 µs
    	.. vma_on_pagefault_pre 172.853 µs
    	.. vma_on_pagefault     174.046 µs
    	.. on_pagefault_end     174.046 µs
    	.. whole:               171.900 µs

so almost all time is spent in ramh_alloc_page which is doing the fallocate:

Simple benchmark[1] confirmed it is indeed the case for fallocate(tmpfs) to be
relatively slow[2] (and that for recent kernels it regressed somewhat
compared to Linux 3.16). Profile flamegraph for that benchmark[3] shows
internal loading of shmem_fallocate which for 1 hardware page is not
that too slow (e.g. <1µs) but when a request comes for a region
internally performs it page by page and so accumulates that ~ 170µs for 2M.

I've tried to briefly rerun the benchmark with huge pages activated on /dev/shm via

	mount /dev/shm -o huge=always,remount

as both regular user and as root but it was executing several times
slower. Probably something to investigate more later.

1 parent 51f252d4
......@@ -192,5 +192,10 @@ test.fault : $(FAULTS:%=%.tfault)
# -*- benchmarking -*-
bench : bigfile/
BENCHV.C:= $(patsubst %.c,%,$(wildcard bigfile/tests/bench_*.c))
bench : bench.t
bench.t : $(BENCHV.C:%=%.trun) bigfile/
......@@ -29,6 +29,7 @@ from io import FileIO
from wendelin.bigfile.file_file import BigFile_File
from wendelin.bigfile import WRITEOUT_STORE, WRITEOUT_MARKSTORED
from wendelin.lib.testing import Adler32, nulladler32_bysize, ffadler32_bysize
from wendelin.bigarray.tests.test_basic import BigFile_Zero
from wendelin.lib.mem import bzero, memset
from tempfile import NamedTemporaryFile
......@@ -69,6 +70,36 @@ def teardown_module():
# BigFile that reads as zeros and tracks last loadblk request
class BigFile_ZeroTrack(BigFile_Zero):
def loadblk(self, blk, buf):
#print('zload #%d' % blk)
self.last_load = blk
super(BigFile_ZeroTrack, self).loadblk(blk, buf)
# benchmark the time it takes for virtmem to handle pagefault with noop loadblk
# implemented in Python.
def bench_pagefault_py(b):
npage = b.N
PS = blksize # XXX assumes blksize = pagesize
f = BigFile_ZeroTrack(PS)
fh = f.fileh_open()
vma = fh.mmap(0, npage)
m = memoryview(vma)
for p in xrange(npage):
assert f.last_load == p
del m
del vma # vma.close()
del fh # fh.close()
del f # f.close()
# compute hash via mmaping the file at OS-level
def _bench_file_mmapread(hasher, expect):
fd =, O_RDONLY)
/* Wendelin.bigfile | virtual memory benchmarks
* Copyright (C) 2017 Nexedi SA and Contributors.
* Kirill Smelkov <>
* This program is free software: you can Use, Study, Modify and Redistribute
* it under the terms of the GNU General Public License version 3, or (at your
* option) any later version, as published by the Free Software Foundation.
* You can also Link and Combine this program with other software covered by
* the terms of any of the Free Software licenses or any of the Open Source
* Initiative approved licenses and Convey the resulting work. Corresponding
* source of such a combination shall include the source code for all other
* software used.
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* See COPYING file for full licensing terms.
* See for rationale and options.
// XXX better link with it
#include "../virtmem.c"
#include "../pagemap.c"
#include "../ram.c"
#include "../ram_shmfs.c"
#include "../pagefault.c"
#include <ccan/tap/tap.h>
#include "../../t/t_utils.h"
#include "../../t/t_utils.c"
/* file that reads as zeros and tracks last loadblk request */
struct BigFile_ZeroTrack {
blk_t last_load;
typedef struct BigFile_ZeroTrack BigFile_ZeroTrack;
int zero_loadblk(BigFile *file0, blk_t blk, void *buf)
BigFile_ZeroTrack *file = upcast(BigFile_ZeroTrack *, file0);
//diag("zload #%ld", blk);
// Nothing to do here - the memory buf obtained from OS comes pre-cleared
// XXX reenable once/if memory comes uninitialized here
file->last_load = blk;
return 0;
static const struct bigfile_ops filez_ops = {
.loadblk = zero_loadblk,
.storeblk = NULL, // XXX
.release = NULL, // XXX
/* benchmark the time it takes for virtmem to handle pagefault with noop loadblk */
void bench_pagefault() {
RAM *ram;
BigFileH fh_struct, *fh = &fh_struct;
VMA vma_struct, *vma = &vma_struct;
pgoff_t p, npage = 10000;
size_t PS;
int err;
double Tstart, Tend;
ram = ram_new(NULL,NULL);
PS = ram->pagesize;
/* setup zero file */
BigFile_ZeroTrack f = {
.blksize = ram->pagesize, /* artificially blksize = pagesize */
.file_ops = &filez_ops,
/* setup f mapping */
err = fileh_open(fh, &f, ram);
err = fileh_mmap(vma, fh, 0, npage);
Tstart = microtime();
// access first byte of every page
for (p = 0; p < npage; p++) {
b(vma, p * PS);
if (f.last_load != p)
fail("accessed page #%ld but last loadblk was for block #%ld", p, f.last_load);
Tend = microtime();
printf("BenchmarkPagefaultC\t%ld\t%.3lf µs/op\n", npage, (Tend - Tstart) * 1E6 / npage);
int main()
int i, nrun=3;
tap_fail_callback = abort; // XXX to catch failure immediately
for (i=0; i<nrun; i++)
return 0;
......@@ -21,6 +21,8 @@
#include <wendelin/utils.h>
#include <sys/time.h>
static const struct ram_ops ram_limited_ops;
static const struct ramh_ops ramh_limited_ops;
......@@ -155,3 +157,17 @@ static const struct ramh_ops ramh_limited_ops = {
.mmap_page = ramh_limited_mmap_page,
.close = ramh_limited_close,
double microtime() {
int err;
struct timeval tv;
err = gettimeofday(&tv, NULL);
if (err == -1) {
return tv.tv_sec + 1E-6 * tv.tv_usec;
......@@ -43,4 +43,7 @@ typedef struct RAMLimited RAMLimited;
RAMLimited *ram_limited_new(RAM *backend, size_t alloc_max);
/* current time as float */
double microtime();
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!