Commit c5624fa9 authored by Kirill Smelkov's avatar Kirill Smelkov

wcfs: client: tests: Turn SIGSEGV in tMapping.assertBlk into exception

When WCFS-mmapped memory is accessed, it can get SIGBUS on IO error (and
automatically on WCFS crash), and SIGSEGV when accessed client mapping is closed.

tFile.assertBlk in wcfs_test.py already converts SIGSEGV into python
exception when accessing on-wcfs file's block. However
tMapping.assertBlk was not doing so, which, instead of providing proper
details, leads to test crashes if something goes wrong.

For example when wendelin.core tests are run wrt plain ZODB4 (contrary to
ZODB4-wc2, see nexedi/ZODB!1 and
nexedi/slapos@e256ed97), it first fails in
pinner and then gets SIGSEGV on data access, because, to mimic SIGBUS on
EIO, pinner shutdowns all mappings on its failure:

https://lab.nexedi.com/nexedi/wendelin.core/blob/49f826b1/wcfs/client/wcfs.cpp#L477-501
https://nexedijs.erp5.net/#/test_result_module/20211118-7C45220A/25

-> Fix it by wrapping test block access with appropriate read_exfault
variant.

Before this patch:

    .../wendelin.core$ WENDELIN_CORE_TEST_DB='<zeo>' WENDELIN_CORE_VIRTMEM='r:wcfs+w:uvmm' python -m pytest -vsx wcfs/ -k test_wcfs_client
    ...
    wcfs/client/client_test.py::test_wcfs_client
    -------------------- live log call ---------------------
    INFO     wcfs:__init__.py:293 starting for zeo://localhost:28866 ...
    I1122 19:17:14.376182  110032 wcfs.go:2384] start "/dev/shm/wcfs/ef87339c054c3e0e48d494fa584bb209518844b2" "zeo://localhost:28866"
    I1122 19:17:14.376291  110032 wcfs.go:2390] (built with go1.17.3)
    W1122 19:17:14.380882  110032 storage.go:152] zodb: FIXME: open zeo://localhost:28866: raw cache is not ready for invalidations -> NoCache forced
    INFO     wcfs:__init__.py:334 started pid110032 @ /dev/shm/wcfs/ef87339c054c3e0e48d494fa584bb209518844b2

    M: commit -> @at0 (03e452313dddbc00)

    M: commit -> @at1 (03e452313e0f3b99)
    M:      f<0000000000000002>     [2, 3]

    M: commit -> @at2 (03e452313e1adb55)
    M:      f<0000000000000002>     [2]

    M: commit -> @at3 (03e452313e3be500)
    M:      f<0000000000000002>     [3, 4]
    W1122 19:17:14.597654  110032 wcfs.go:2050] /@03e452313d343c88/bigfile: lookup "0000000000000002": bigfopen 0000000000000002 @03e452313d343c88: invalid argument: Get 0000000000000002: Get 03e452313d343c88:0000000000000002: zeo://localhost:28866: load 03e452313d343c88:0000000000000002: 0000000000000002: no such object
    E1122 19:17:14.597759  110032 wcfs.go:1220] /head/bigfile/0000000000000002: readblk #4: pin watchers: wlink1: f<0000000000000002>: pin #4 @03e452313d343c88: expect "ack"; got "nak: _remmapblk #4 @03e452313d343c88: open /dev/shm/wcfs/ef87339c054c3e0e48d494fa584bb209518844b2/@03e452313d343c88/bigfile/0000000000000002: Invalid argument"
    F1122 19:17:14.597803  110050 wcfs/client/wcfs.cpp:487] CRITICAL: pinner: pin f<0000000000000002> #4 @03e452313d343c88: _remmapblk #4 @03e452313d343c88: open /dev/shm/wcfs/ef87339c054c3e0e48d494fa584bb209518844b2/@03e452313d343c88/bigfile/0000000000000002: Invalid argument
    F1122 19:17:14.597835  110050 wcfs/client/wcfs.cpp:488] CRITICAL: wcfs server will likely kill us soon.
    CRITICAL: pinner: pin f<0000000000000002> #4 @03e452313d343c88: _remmapblk #4 @03e452313d343c88: open /dev/shm/wcfs/ef87339c054c3e0e48d494fa584bb209518844b2/@03e452313d343c88/bigfile/0000000000000002: Invalid argument
    CRITICAL: wcfs server will likely kill us soon.
    Segmentation fault: read @00007ff7b9534000
    /home/kirr/src/wendelin/wendelin.core/wcfs/client/./../../bigfile/liblibvirtmem.so(dump_traceback+0x34)[0x7ff7d6b5c279]
    /home/kirr/src/wendelin/wendelin.core/wcfs/client/./../../bigfile/liblibvirtmem.so(+0x27b0)[0x7ff7d6b577b0]
    /lib/x86_64-linux-gnu/libpthread.so.0(+0x14140)[0x7ff7da078140]
    python(PyString_FromStringAndSize+0x228)[0x5627feb96b58]
    python(PyEval_EvalFrameEx+0x603e)[0x5627febb7a4e]
    python(PyEval_EvalCodeEx+0x57c)[0x5627febb03cc]
    ...
    python(PyObject_Call+0x43)[0x5627feb9d903]
    python(+0x18a7e1)[0x5627fec5d7e1]
    python(Py_Main+0x3ad)[0x5627fec4b8ed]
    /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xea)[0x7ff7d9d59d0a]
    python(_start+0x2a)[0x5627fec4b46a]
    Ошибка сегментирования (стек памяти сброшен на диск)

After this patch:

    .../wendelin.core$ WENDELIN_CORE_TEST_DB='<zeo>' WENDELIN_CORE_VIRTMEM='r:wcfs+w:uvmm' python -m pytest -vsx wcfs/ -k test_wcfs_client
    ...
    wcfs/client/client_test.py::test_wcfs_client
    -------------------- live log call ---------------------
    INFO     wcfs:__init__.py:293 starting for zeo://localhost:22854 ...
    I1122 18:17:22.486445  102541 wcfs.go:2384] start "/dev/shm/wcfs/c818c147676f8d6f3b408b02f727aca5e3229e98" "zeo://localhost:22854"
    I1122 18:17:22.486525  102541 wcfs.go:2390] (built with go1.17.3)
    W1122 18:17:22.489908  102541 storage.go:152] zodb: FIXME: open zeo://localhost:22854: raw cache is not ready for invalidations -> NoCache forced
    INFO     wcfs:__init__.py:334 started pid102541 @ /dev/shm/wcfs/c818c147676f8d6f3b408b02f727aca5e3229e98

    M: commit -> @at0 (03e451f560834477)

    M: commit -> @at1 (03e451f560a2aa77)
    M:      f<0000000000000002>     [2, 3]

    M: commit -> @at2 (03e451f560adafcc)
    M:      f<0000000000000002>     [2]

    M: commit -> @at3 (03e451f560d02111)
    M:      f<0000000000000002>     [3, 4]
    W1122 18:17:22.703710  102541 wcfs.go:2050] /@03e451f55fcc4c77/bigfile: lookup "0000000000000002": bigfopen 0000000000000002 @03e451f55fcc4c77: invalid argument: Get 0000000000000002: Get 03e451f55fcc4c77:0000000000000002: zeo://localhost:22854: load 03e451f55fcc4c77:0000000000000002: 0000000000000002: no such object
    E1122 18:17:22.703840  102541 wcfs.go:1220] /head/bigfile/0000000000000002: readblk #4: pin watchers: wlink1: f<0000000000000002>: pin #4 @03e451f55fcc4c77: expect "ack"; got "nak: _remmapblk #4 @03e451f55fcc4c77: open /dev/shm/wcfs/c818c147676f8d6f3b408b02f727aca5e3229e98/@03e451f55fcc4c77/bigfile/0000000000000002: Invalid argument"
    F1122 18:17:22.704380  102558 wcfs/client/wcfs.cpp:487] CRITICAL: pinner: pin f<0000000000000002> #4 @03e451f55fcc4c77: _remmapblk #4 @03e451f55fcc4c77: open /dev/shm/wcfs/c818c147676f8d6f3b408b02f727aca5e3229e98/@03e451f55fcc4c77/bigfile/0000000000000002: Invalid argument
    F1122 18:17:22.704639  102558 wcfs/client/wcfs.cpp:488] CRITICAL: wcfs server will likely kill us soon.
    CRITICAL: pinner: pin f<0000000000000002> #4 @03e451f55fcc4c77: _remmapblk #4 @03e451f55fcc4c77: open /dev/shm/wcfs/c818c147676f8d6f3b408b02f727aca5e3229e98/@03e451f55fcc4c77/bigfile/0000000000000002: Invalid argument
    CRITICAL: wcfs server will likely kill us soon.
    >>> Change history by file:

    f<0000000000000002>:
                                    0 1 2 3 4 5 6 7
                                    a b c d e f g h
            @at0 (03e451f560834477)
            @at1 (03e451f560a2aa77)     2 3
            @at2 (03e451f560adafcc)     2
            @at3 (03e451f560d02111)       3 4

    INFO     wcfs:__init__.py:400 unmount/stop wcfs pid102541 @ /dev/shm/wcfs/c818c147676f8d6f3b408b02f727aca5e3229e98
    I1122 18:17:22.728452  102541 wcfs.go:2560] stop "/dev/shm/wcfs/c818c147676f8d6f3b408b02f727aca5e3229e98" "zeo://localhost:22854"
    FAILED

    ======================= FAILURES =======================
    ___________________ test_wcfs_client ___________________

        @func
        def test_wcfs_client():
            t = tDB(); zf = t.zfile; at0=t.at0
            defer(t.close)
            pinned = lambda fh: fhpinned(t, fh)

            at1 = t.commit(zf, {2:'c1', 3:'d1'})
            at2 = t.commit(zf, {2:'c2'})

            wconn = t.wc.connect(at1)
            defer(wconn.close)

            fh = wconn.open(zf._p_oid)
            defer(fh.close)

            # create mmap with 1 block beyond file size
            m1 = fh.mmap(2, 3)
            defer(m1.unmap)

            assert m1.blk_start == 2
            assert m1.blk_stop  == 5
            assert len(m1.mem)  == 3*zf.blksize

            tm1 = tMapping(t, m1)

            assert pinned(fh) == {}

            # verify initial data reads
            tm1.assertBlk(2, 'c1',  {2:at1})
            tm1.assertBlk(3, 'd1',  {2:at1})
            tm1.assertBlk(4, '',    {2:at1})

            # commit with growing file size -> verify data read as the same, #3 pinned.
            # (#4 is not yet pinned because it was not accessed)
            at3 = t.commit(zf, {3:'d3', 4:'e3'})
            assert pinned(fh) == {2:at1}
            tm1.assertBlk(2, 'c1',  {2:at1})
            tm1.assertBlk(3, 'd1',  {2:at1, 3:at1})
            tm1.assertBlk(4, '',    {2:at1, 3:at1})

            # resync at1 -> at2:    #2 must unpin to @head; #4 must stay as zero
            wconn.resync(at2)
            assert pinned(fh) == {3:at1}
            tm1.assertBlk(2, 'c2',  {       3:at1})
            tm1.assertBlk(3, 'd1',  {       3:at1})
    >       tm1.assertBlk(4, '',    {       3:at1,  4:at0})     # XXX at0->ø ?

    wcfs/client/client_test.py:158:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    wcfs/client/client_test.py:86: in assertBlk
        _ = read_exfault_withgil(blkview[0:1])
    wcfs/internal/wcfs_test.pyx:90: in wendelin.wcfs.internal.wcfs_test.read_exfault_withgil
        return _read_exfault(mem, withgil=True)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    >   raise SegmentationFault()
    E   SegmentationFault

    wcfs/internal/wcfs_test.pyx:120: SegmentationFault
    ------------------ Captured log call -------------------
    INFO     wcfs:__init__.py:293 starting for zeo://localhost:22854 ...
    INFO     wcfs:__init__.py:334 started pid102541 @ /dev/shm/wcfs/c818c147676f8d6f3b408b02f727aca5e3229e98
    INFO     wcfs:__init__.py:400 unmount/stop wcfs pid102541 @ /dev/shm/wcfs/c818c147676f8d6f3b408b02f727aca5e3229e98
parent 81274eb7
...@@ -32,7 +32,7 @@ from wendelin.bigfile.file_zodb import ZBigFile ...@@ -32,7 +32,7 @@ from wendelin.bigfile.file_zodb import ZBigFile
from wendelin.wcfs.wcfs_test import tDB, tAt, timeout, eprint from wendelin.wcfs.wcfs_test import tDB, tAt, timeout, eprint
from wendelin.wcfs import _waitfor_ as waitfor_ from wendelin.wcfs import _waitfor_ as waitfor_
from wendelin.wcfs import wcfs_test from wendelin.wcfs import wcfs_test
from wendelin.wcfs.internal.wcfs_test import read_mustfault from wendelin.wcfs.internal.wcfs_test import read_mustfault, read_exfault_withgil
from wendelin.wcfs.internal import mm from wendelin.wcfs.internal import mm
from pytest import raises from pytest import raises
...@@ -83,7 +83,7 @@ class tMapping(object): ...@@ -83,7 +83,7 @@ class tMapping(object):
# - - - - - - # - - - - - -
# client process # client process
# #
_ = blkview[0] _ = read_exfault_withgil(blkview[0:1])
assert _ == dataok[0] assert _ == dataok[0]
assert blkview.tobytes() == dataok assert blkview.tobytes() == dataok
......
...@@ -81,12 +81,20 @@ cdef class _tWCFS: ...@@ -81,12 +81,20 @@ cdef class _tWCFS:
# read_exfault_nogil reads mem with GIL released and returns its content. # read_exfault_nogil reads mem with GIL released and returns its content.
# #
# If reading hits segmentation fault, it is converted to SegmentationFault exception. # If reading hits segmentation fault, it is converted to SegmentationFault exception.
def read_exfault_nogil(const unsigned char[::1] mem not None) -> bytes:
return _read_exfault(mem, withgil=False)
# read_exfault_withgil is similar to read_exfault_nogil, but does the reading
# while holding the GIL.
def read_exfault_withgil(const unsigned char[::1] mem not None) -> bytes:
return _read_exfault(mem, withgil=True)
class SegmentationFault(Exception): pass class SegmentationFault(Exception): pass
cdef sync.Mutex exfaultMu # one at a time as sigaction is per-process cdef sync.Mutex exfaultMu # one at a time as sigaction is per-process
cdef sigjmp_buf exfaultJmp cdef sigjmp_buf exfaultJmp
cdef cbool faulted cdef cbool faulted
def read_exfault_nogil(const unsigned char[::1] mem not None) -> bytes: def _read_exfault(const unsigned char[::1] mem not None, cbool withgil) -> bytes:
assert len(mem) == 1, "read_exfault_nogil: only [1] mem is supported for now" assert len(mem) == 1, "_read_exfault: only [1] mem is supported for now"
cdef unsigned char b cdef unsigned char b
global faulted global faulted
cdef cbool faulted_ cdef cbool faulted_
...@@ -97,8 +105,12 @@ def read_exfault_nogil(const unsigned char[::1] mem not None) -> bytes: ...@@ -97,8 +105,12 @@ def read_exfault_nogil(const unsigned char[::1] mem not None) -> bytes:
faulted = False faulted = False
try: try:
if withgil:
#with gil: (cython complains "Trying to acquire the GIL while it is already held.")
b = __read_exfault(&mem[0])
else:
with nogil: with nogil:
b = _read_exfault(&mem[0]) b = __read_exfault(&mem[0])
finally: finally:
faulted_ = faulted faulted_ = faulted
with nogil: with nogil:
...@@ -114,7 +126,7 @@ cdef void exfaultSighand(int sig) nogil: ...@@ -114,7 +126,7 @@ cdef void exfaultSighand(int sig) nogil:
faulted = True faulted = True
siglongjmp(exfaultJmp, 1) siglongjmp(exfaultJmp, 1)
cdef unsigned char _read_exfault(const unsigned char *p) nogil except +topyexc: cdef unsigned char __read_exfault(const unsigned char *p) nogil except +topyexc:
global faulted global faulted
cdef sigaction_t act, saveact cdef sigaction_t act, saveact
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment