.

b0f3b2e5 · Kirill Smelkov · 18362227 · b0f3b2e5
Commit b0f3b2e5 authored Jun 10, 2019 by Kirill Smelkov
Hide whitespace changes
Inline Side-by-side

Showing with 24 additions and 17 deletions

wcfs/wcfs_test.py wcfs/wcfs_test.py +24 -17

No files found.
--- a/wcfs/wcfs_test.py
+++ b/wcfs/wcfs_test.py
@@ -436,18 +436,23 @@ class tFile:
        #    mmap access we have the guarantee from kernel that the page will
        #    stay in pagecache.
        #
-        # 2. madvise memory with MADV_NORMAL and MADV_RANDOM in interleaved
-        #    mode. This adjusts kernel readahead (which triggers for MADV_NORMAL
+        # 2. madvise memory with MADV_SEQUENTIAL and MADV_RANDOM in interleaved
+        #    mode. This adjusts kernel readahead (which triggers for MADV_SEQUENTIAL
        #    vma) to not go over to next block and thus a read access to one
        #    block won't trigger implicit read access to its neighbour block.
        #
        #      https://www.quora.com/What-heuristics-does-the-adaptive-readahead-implementation-in-the-Linux-kernel-use
        #      https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/madvise.c?h=v5.2-rc4#n51
        #
-        #    don't disable readahead universally, since enabled readahead helps
-        #    to test how wcfs handles simultaneous read triggered by async
-        #    kernel readahead vs wcfs uploading data for the same block into OS
-        #    cache. Also, fully enabled readahead is how wcfs is actually used.
+        #    we don't use MADV_NORMAL instead of MADV_SEQUENTIAL, because for
+        #    MADV_NORMAL, there is not only read-ahead, but also read-around,
+        #    which might result in accessing previous block.
+        #
+        #    we don't disable readahead universally, since enabled readahead
+        #    helps to test how wcfs handles simultaneous read triggered by
+        #    async kernel readahead vs wcfs uploading data for the same block
+        #    into OS cache. Also, fully enabled readahead is how wcfs is
+        #    actually used.
        assert t.blksize % mm.PAGE_SIZE == 0
        t.fmmap = mm.map_ro(t.f.fileno(), 0, t._max_tracked_pages*t.blksize)

@@ -456,12 +461,12 @@ class tFile:
        for blk in range(t._max_tracked_pages):
            blkmmap = t.fmmap[blk*t.blksize:(blk+1)*t.blksize]
            # NOTE the kernel does not start readahead from access to
-            # MADV_RANDOM vma, but for MADV_NORMAL vma it starts readhead which
-            # can go _beyond_ vma that was used to decide RA start. For this
-            # reason - to prevent RA started at one block to overlap with the
-            # next block, we put MADV_RANDOM vma at the end of every block
-            # covering last 1/4 of it.
-            # XXX implicit assumption that RA window is < 1/4·blksize
+            # MADV_RANDOM vma, but for a MADV_{NORMAL/SEQUENTIAL} vma it starts
+            # readhead which can go _beyond_ vma that was used to decide RA
+            # start. For this reason - to prevent RA started at one block to
+            # overlap with the next block, we put MADV_RANDOM vma at the end of
+            # every block covering last 1/8 of it.
+            # XXX implicit assumption that RA window is < 1/8·blksize
            #
            # NOTE with a block completely covered by MADV_RANDOM the kernel
            # issues 4K sized reads; wcfs starts uploading into cache almost
@@ -470,7 +475,9 @@ class tFile:
            # XXX -> investigate and maybe make read(while-uploading) wait for
            # uploading to complete and only then return? (maybe it will help
            # performance even in normal case)
-            mm.advise(blkmmap[len(blkmmap)*3//4:], mm.MADV_RANDOM)
+            _ = len(blkmmap)*7//8
+            mm.advise(blkmmap[:_], mm.MADV_SEQUENTIAL)
+            mm.advise(blkmmap[_:], mm.MADV_RANDOM)

        tdb._files.add(t)

@@ -549,8 +556,8 @@ class tFile:
        incore_before = t.cached()
        def _():
            incore_after = t.cached()
-            incore_before[blk] = ''
-            incore_after [blk] = ''
+            incore_before[blk] = 'x'
+            incore_after [blk] = 'x'
            assert incore_before == incore_after
        defer(_)

@@ -1154,7 +1161,7 @@ def test_wcfs():

    # >>> XXX commit data to not yet accessed f part - nothing happens

-    """
+#   """
    # >>> invalidation protocol
    print('\n\n inv. protocol \n\n')

@@ -1202,7 +1209,7 @@ def test_wcfs():
            for at in revv[1:]:
                wl.watch(zf, at)
            wl.close()
-    """
+#   """

    print('\n\n\n\nWATCH+COMMIT\n\n\n\n')