Commit b0f3b2e5 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 18362227
...@@ -436,18 +436,23 @@ class tFile: ...@@ -436,18 +436,23 @@ class tFile:
# mmap access we have the guarantee from kernel that the page will # mmap access we have the guarantee from kernel that the page will
# stay in pagecache. # stay in pagecache.
# #
# 2. madvise memory with MADV_NORMAL and MADV_RANDOM in interleaved # 2. madvise memory with MADV_SEQUENTIAL and MADV_RANDOM in interleaved
# mode. This adjusts kernel readahead (which triggers for MADV_NORMAL # mode. This adjusts kernel readahead (which triggers for MADV_SEQUENTIAL
# vma) to not go over to next block and thus a read access to one # vma) to not go over to next block and thus a read access to one
# block won't trigger implicit read access to its neighbour block. # block won't trigger implicit read access to its neighbour block.
# #
# https://www.quora.com/What-heuristics-does-the-adaptive-readahead-implementation-in-the-Linux-kernel-use # https://www.quora.com/What-heuristics-does-the-adaptive-readahead-implementation-in-the-Linux-kernel-use
# https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/madvise.c?h=v5.2-rc4#n51 # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/madvise.c?h=v5.2-rc4#n51
# #
# don't disable readahead universally, since enabled readahead helps # we don't use MADV_NORMAL instead of MADV_SEQUENTIAL, because for
# to test how wcfs handles simultaneous read triggered by async # MADV_NORMAL, there is not only read-ahead, but also read-around,
# kernel readahead vs wcfs uploading data for the same block into OS # which might result in accessing previous block.
# cache. Also, fully enabled readahead is how wcfs is actually used. #
# we don't disable readahead universally, since enabled readahead
# helps to test how wcfs handles simultaneous read triggered by
# async kernel readahead vs wcfs uploading data for the same block
# into OS cache. Also, fully enabled readahead is how wcfs is
# actually used.
assert t.blksize % mm.PAGE_SIZE == 0 assert t.blksize % mm.PAGE_SIZE == 0
t.fmmap = mm.map_ro(t.f.fileno(), 0, t._max_tracked_pages*t.blksize) t.fmmap = mm.map_ro(t.f.fileno(), 0, t._max_tracked_pages*t.blksize)
...@@ -456,12 +461,12 @@ class tFile: ...@@ -456,12 +461,12 @@ class tFile:
for blk in range(t._max_tracked_pages): for blk in range(t._max_tracked_pages):
blkmmap = t.fmmap[blk*t.blksize:(blk+1)*t.blksize] blkmmap = t.fmmap[blk*t.blksize:(blk+1)*t.blksize]
# NOTE the kernel does not start readahead from access to # NOTE the kernel does not start readahead from access to
# MADV_RANDOM vma, but for MADV_NORMAL vma it starts readhead which # MADV_RANDOM vma, but for a MADV_{NORMAL/SEQUENTIAL} vma it starts
# can go _beyond_ vma that was used to decide RA start. For this # readhead which can go _beyond_ vma that was used to decide RA
# reason - to prevent RA started at one block to overlap with the # start. For this reason - to prevent RA started at one block to
# next block, we put MADV_RANDOM vma at the end of every block # overlap with the next block, we put MADV_RANDOM vma at the end of
# covering last 1/4 of it. # every block covering last 1/8 of it.
# XXX implicit assumption that RA window is < 1/4·blksize # XXX implicit assumption that RA window is < 1/8·blksize
# #
# NOTE with a block completely covered by MADV_RANDOM the kernel # NOTE with a block completely covered by MADV_RANDOM the kernel
# issues 4K sized reads; wcfs starts uploading into cache almost # issues 4K sized reads; wcfs starts uploading into cache almost
...@@ -470,7 +475,9 @@ class tFile: ...@@ -470,7 +475,9 @@ class tFile:
# XXX -> investigate and maybe make read(while-uploading) wait for # XXX -> investigate and maybe make read(while-uploading) wait for
# uploading to complete and only then return? (maybe it will help # uploading to complete and only then return? (maybe it will help
# performance even in normal case) # performance even in normal case)
mm.advise(blkmmap[len(blkmmap)*3//4:], mm.MADV_RANDOM) _ = len(blkmmap)*7//8
mm.advise(blkmmap[:_], mm.MADV_SEQUENTIAL)
mm.advise(blkmmap[_:], mm.MADV_RANDOM)
tdb._files.add(t) tdb._files.add(t)
...@@ -549,8 +556,8 @@ class tFile: ...@@ -549,8 +556,8 @@ class tFile:
incore_before = t.cached() incore_before = t.cached()
def _(): def _():
incore_after = t.cached() incore_after = t.cached()
incore_before[blk] = '' incore_before[blk] = 'x'
incore_after [blk] = '' incore_after [blk] = 'x'
assert incore_before == incore_after assert incore_before == incore_after
defer(_) defer(_)
...@@ -1154,7 +1161,7 @@ def test_wcfs(): ...@@ -1154,7 +1161,7 @@ def test_wcfs():
# >>> XXX commit data to not yet accessed f part - nothing happens # >>> XXX commit data to not yet accessed f part - nothing happens
""" # """
# >>> invalidation protocol # >>> invalidation protocol
print('\n\n inv. protocol \n\n') print('\n\n inv. protocol \n\n')
...@@ -1202,7 +1209,7 @@ def test_wcfs(): ...@@ -1202,7 +1209,7 @@ def test_wcfs():
for at in revv[1:]: for at in revv[1:]:
wl.watch(zf, at) wl.watch(zf, at)
wl.close() wl.close()
""" # """
print('\n\n\n\nWATCH+COMMIT\n\n\n\n') print('\n\n\n\nWATCH+COMMIT\n\n\n\n')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment