wcfs: tests: Use bytestrings uniformly

WCFS tests follow bytestring model from the beginning because it operates with binary data and binary messages to/from WCFS server. It all works ok on py2, but running tests on py3 yielded several problems. For example str and bytes were mixed in the innermost part of _assertBlk def _(ctx, ev): assert t.cached()[blk] == cached ev.append('read pre') # access data with released GIL so that the thread that reads data from # head/watch can receive pin message. Be careful to handle cancellation, # so that on error in another worker we don't get stuck and the # error can be propagated to wait and reported. # # we handle cancellation by spawning read in another thread and # waiting for either ctx cancel, or read thread to complete. This # way on ctx cancel (e.g. assertion failure in another worker), the # read thread can remain running even after _assertBlk returns, and # in particular till the point where the whole test is marked as # failed and shut down. But on test shutdown .fmmap is unmapped for # all opened tFiles, and so read will hit SIGSEGV. Prepare to catch # that SIGSEGV here. have_read = chan(1) def _(): try: b = read_exfault_nogil(blkview[0:1]) except SegmentationFault: b = 'FAULT' t._blkaccess(blk) have_read.send(b) go(_) _, _rx = select( ctx.done().recv, # 0 have_read.recv, # 1 ) if _ == 0: raise ctx.err() b = _rx > ev.append('read ' + b) E TypeError: can only concatenate str (not "bytes") to str bytes input was split with str delimiter def _loadStats(t): # -> {} stats = {} for l in t.wc._read(".wcfs/stats").splitlines(): # key : value > k, v = l.split(':') E TypeError: a bytes-like object is required, not 'str' and str object rejected when assigning to C `char*`: Traceback (most recent call last): File "golang/_golang.pyx", line 156, in golang._golang.__goviac File "wcfs/internal/wcfs_test.pyx", line 58, in wendelin.wcfs.internal.wcfs_test._tWCFS._abort_ontimeout TypeError: expected bytes, str found -> Fix all those overlooks by consistently using bytestrings everywhere. On py3 the implementation depends on nexedi/pygolang!21, but on py2 it works both with and without pygolang bstr patches. Preliminary history: vnmabus/wendelin.core@af56bd31 but it takes the reverse approach and mixes in pytest.approx for timestamp assert which is unrelated to the topic and is not correct as default pytest.approx behaviour is to use 1e-6 relative precision which results in ~1700s seconds tolerance instead of intended 1µs: In [1]: import pytest In [2]: import time In [3]: t = time.time() In [4]: t Out[4]: 1727271692.0636573 In [5]: t == t Out[5]: True In [6]: t == pytest.approx(t+1000) Out[6]: True In [7]: t == pytest.approx(t+2000) Out[7]: False So using pytest.approx resulted in tests to become accepting faulty wcfs behaviour. Timestamp assert will be handled properly in the next patch. Co-authored-by: Carlos Ramos Carreño <carlos.ramos@nexedi.com>

wcfs: tests: Use bytestrings uniformly
WCFS tests follow bytestring model from the beginning because it operates with binary data and binary messages to/from WCFS server. It all works ok on py2, but running tests on py3 yielded several problems. For example str and bytes were mixed in the innermost part of _assertBlk def _(ctx, ev): assert t.cached()[blk] == cached ev.append('read pre') # access data with released GIL so that the thread that reads data from # head/watch can receive pin message. Be careful to handle cancellation, # so that on error in another worker we don't get stuck and the # error can be propagated to wait and reported. # # we handle cancellation by spawning read in another thread and # waiting for either ctx cancel, or read thread to complete. This # way on ctx cancel (e.g. assertion failure in another worker), the # read thread can remain running even after _assertBlk returns, and # in particular till the point where the whole test is marked as # failed and shut down. But on test shutdown .fmmap is unmapped for # all opened tFiles, and so read will hit SIGSEGV. Prepare to catch # that SIGSEGV here. have_read = chan(1) def _(): try: b = read_exfault_nogil(blkview[0:1]) except SegmentationFault: b = 'FAULT' t._blkaccess(blk) have_read.send(b) go(_) _, _rx = select( ctx.done().recv, # 0 have_read.recv, # 1 ) if _ == 0: raise ctx.err() b = _rx > ev.append('read ' + b) E TypeError: can only concatenate str (not "bytes") to str bytes input was split with str delimiter def _loadStats(t): # -> {} stats = {} for l in t.wc._read(".wcfs/stats").splitlines(): # key : value > k, v = l.split(':') E TypeError: a bytes-like object is required, not 'str' and str object rejected when assigning to C `char*`: Traceback (most recent call last): File "golang/_golang.pyx", line 156, in golang._golang.__goviac File "wcfs/internal/wcfs_test.pyx", line 58, in wendelin.wcfs.internal.wcfs_test._tWCFS._abort_ontimeout TypeError: expected bytes, str found -> Fix all those overlooks by consistently using bytestrings everywhere. On py3 the implementation depends on nexedi/pygolang!21, but on py2 it works both with and without pygolang bstr patches. Preliminary history: vnmabus/wendelin.core@af56bd31 but it takes the reverse approach and mixes in pytest.approx for timestamp assert which is unrelated to the topic and is not correct as default pytest.approx behaviour is to use 1e-6 relative precision which results in ~1700s seconds tolerance instead of intended 1µs: In [1]: import pytest In [2]: import time In [3]: t = time.time() In [4]: t Out[4]: 1727271692.0636573 In [5]: t == t Out[5]: True In [6]: t == pytest.approx(t+1000) Out[6]: True In [7]: t == pytest.approx(t+2000) Out[7]: False So using pytest.approx resulted in tests to become accepting faulty wcfs behaviour. Timestamp assert will be handled properly in the next patch. Co-authored-by: Carlos Ramos Carreño <carlos.ramos@nexedi.com>
e1c96b1d · Kirill Smelkov · 69aab23a · e1c96b1d · e1c96b1d
Commit e1c96b1d authored Sep 25, 2024 by Kirill Smelkov
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 11 deletions

wcfs/internal/wcfs_test.pyx wcfs/internal/wcfs_test.pyx +1 -1

wcfs/wcfs_test.py wcfs/wcfs_test.py +13 -10

No files found.
--- a/wcfs/internal/wcfs_test.pyx
+++ b/wcfs/internal/wcfs_test.pyx
@@ -54,7 +54,7 @@ cdef class _tWCFS:
    # but, if _abort_ontimeout uses GIL, won't continue to run trying to lock
    # GIL -> deadlock.
    def _abort_ontimeout(_tWCFS t, int fdabort, double dt, pychan timeoutch not None, pychan nogilready not None):
-        emsg1 = "\nC: test timed out after %.1fs\n" % (dt / time.second)
+        emsg1 = b"\nC: test timed out after %.1fs\n" % (dt / time.second)
        cdef char *_emsg1 = emsg1
        with nogil:
            # tell main thread that we entered nogil world

--- a/wcfs/wcfs_test.py
+++ b/wcfs/wcfs_test.py
@@ -58,6 +58,8 @@ from wendelin.wcfs.internal.wcfs_test import _tWCFS, read_exfault_nogil, Segment
 from wendelin.wcfs.client._wcfs import _tpywlinkwrite as _twlinkwrite
 from wendelin.wcfs import _is_mountpoint as is_mountpoint, _procwait as procwait, _waitfor as waitfor, _ready as ready, _rmdir_ifexists as rmdir_ifexists

+bstr = type(b(''))  # TODO import directly after https://lab.nexedi.com/nexedi/pygolang/-/merge_requests/21 is merged
+

 # setup:
 # - create test database, compute zurl and mountpoint for wcfs
@@ -466,14 +468,14 @@ class tWCFS(_tWCFS):
            assert kv == kvok, "stats did not stay at expected state"

    # _loadStats loads content of .wcfs/stats .
-    def _loadStats(t): # -> {}
+    def _loadStats(t): # -> {} bstr -> int
        stats = {}
        for l in t.wc._read(".wcfs/stats").splitlines():
            # key : value
-            k, v = l.split(':')
+            k, v = l.split(b':')
            k = k.strip()
            v = v.strip()
-            stats[k] = int(v)
+            stats[b(k)] = int(v)

        # verify that keys remains the same and that cumulative counters do not decrease
        if t._stats_prev is not None:
@@ -824,6 +826,7 @@ class tFile:

    @func
    def _assertBlk(t, blk, dataok, pinokByWLink=None, pinfunc=None, timeout=None):
+        assert isinstance(dataok, bstr)
        assert len(dataok) <= t.blksize
        dataok += b'\0'*(t.blksize - len(dataok))   # tailing zeros
        assert blk < t._sizeinblk()
@@ -897,11 +900,11 @@ class tFile:
            have_read = chan(1)
            def _():
                try:
-                    b = read_exfault_nogil(blkview[0:1])
+                    got = read_exfault_nogil(blkview[0:1])
                except SegmentationFault:
-                    b = 'FAULT'
+                    got = 'FAULT'
                t._blkaccess(blk)
-                have_read.send(b)
+                have_read.send(got)
            go(_)
            _, _rx = select(
                ctx.done().recv,    # 0
@@ -909,9 +912,9 @@ class tFile:
            )
            if _ == 0:
                raise ctx.err()
-            b = _rx
+            got = _rx

-            ev.append('read ' + b)
+            ev.append('read ' + b(got))
        ev = doCheckingPin(ctx, _, pinokByWLink, pinfunc)

        # XXX hack - wlinks are notified and emit events simultaneously - we
@@ -1212,7 +1215,7 @@ def doCheckingPin(ctx, f, pinokByWLink, pinfunc=None): # -> []event(str)
 def _expectPin(twlink, ctx, zf, expect): # -> []SrvReq
    expected = set()    # of expected pin messages
    for blk, at in expect.items():
-        hat = h(at) if at is not None else 'head'
+        hat = h(at) if at is not None else b'head'
        msg = b"pin %s #%d @%s" % (h(zf._p_oid), blk, hat)
        assert msg not in expected
        expected.add(msg)
@@ -1806,7 +1809,7 @@ def test_wcfs_remmap_on_pin():
        assert at    == at1
        mm.map_into_ro(f._blk(blk), f1.f.fileno(), blk*f.blksize)

-    f._assertBlk(2, 'hello', {wl: {2:at1}}, pinfunc=_)     # NOTE not world
+    f._assertBlk(2, b('hello'), {wl: {2:at1}}, pinfunc=_)  # NOTE not world


 # verify that pin message is not sent for the same blk@at twice.