Commit 869e597d authored by Kirill Smelkov's avatar Kirill Smelkov Committed by Levin Zimmermann

wcfs: tests: Add context to tWCFS

This testing helper limits whole test time to detect FUSE-related
deadlocks via aborting FUSE connection on timeout. It is working good so
far. But soon we will need pinkill-related tests, where timeout will
need to be detected independently of FUSE connection. Expose tWCFS.ctx
for tests to be able to use this context and do things limited in time.
Adjust FUSE aborting to correlate exactly with this context
cancellation.

/reviewed-by @levin.zimmermann
/reviewed-on nexedi/wendelin.core!18
parent ab38f971
# Copyright (C) 2019-2021 Nexedi SA and Contributors. # Copyright (C) 2019-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -33,7 +33,7 @@ from posix.types cimport off_t ...@@ -33,7 +33,7 @@ from posix.types cimport off_t
from cpython.exc cimport PyErr_SetFromErrno from cpython.exc cimport PyErr_SetFromErrno
from golang cimport chan, pychan, select, panic, topyexc, cbool from golang cimport chan, pychan, select, panic, topyexc, cbool, structZ
from golang cimport sync, time from golang cimport sync, time
# _tWCFS is pyx part of tWCFS. # _tWCFS is pyx part of tWCFS.
...@@ -53,16 +53,15 @@ cdef class _tWCFS: ...@@ -53,16 +53,15 @@ cdef class _tWCFS:
# but pin handler is failing one way or another - select will wake-up # but pin handler is failing one way or another - select will wake-up
# but, if _abort_ontimeout uses GIL, won't continue to run trying to lock # but, if _abort_ontimeout uses GIL, won't continue to run trying to lock
# GIL -> deadlock. # GIL -> deadlock.
def _abort_ontimeout(_tWCFS t, int fdabort, double dt, pychan nogilready not None): def _abort_ontimeout(_tWCFS t, int fdabort, double dt, pychan timeoutch not None, pychan nogilready not None):
cdef chan[double] timeoutch = time.after(dt)
emsg1 = "\nC: test timed out after %.1fs\n" % (dt / time.second) emsg1 = "\nC: test timed out after %.1fs\n" % (dt / time.second)
cdef char *_emsg1 = emsg1 cdef char *_emsg1 = emsg1
with nogil: with nogil:
# tell main thread that we entered nogil world # tell main thread that we entered nogil world
nogilready.chan_structZ().close() nogilready.chan_structZ().close()
t.__abort_ontimeout(dt, timeoutch, fdabort, _emsg1) t.__abort_ontimeout(timeoutch.chan_structZ(), fdabort, _emsg1)
cdef void __abort_ontimeout(_tWCFS t, double dt, chan[double] timeoutch, cdef void __abort_ontimeout(_tWCFS t, chan[structZ] timeoutch,
int fdabort, const char *emsg1) nogil except +topyexc: int fdabort, const char *emsg1) nogil except +topyexc:
_ = select([ _ = select([
timeoutch.recvs(), # 0 timeoutch.recvs(), # 0
......
...@@ -358,6 +358,13 @@ class tWCFS(_tWCFS): ...@@ -358,6 +358,13 @@ class tWCFS(_tWCFS):
assert is_mountpoint(wc.mountpoint) assert is_mountpoint(wc.mountpoint)
t.wc = wc t.wc = wc
# the whole test is limited in time to detect deadlocks
# NOTE with_timeout must be << timeout
# NOTE wcfs_pin_timeout must be >> timeout
timeout = 10*time.second
t.ctx, t._ctx_cancel = context.with_timeout(context.background(), timeout)
# make sure any stuck FUSE request is aborted. To do so
# force-unmount wcfs on timeout to unstuck current test and let it fail. # force-unmount wcfs on timeout to unstuck current test and let it fail.
# Force-unmount can be done reliably only by writing into # Force-unmount can be done reliably only by writing into
# /sys/fs/fuse/connections/<X>/abort. For everything else there are # /sys/fs/fuse/connections/<X>/abort. For everything else there are
...@@ -366,7 +373,7 @@ class tWCFS(_tWCFS): ...@@ -366,7 +373,7 @@ class tWCFS(_tWCFS):
# still wait for request completion even after fatal signal ) # still wait for request completion even after fatal signal )
nogilready = chan(dtype='C.structZ') nogilready = chan(dtype='C.structZ')
t._wcfuseabort = os.dup(wc._wcsrv._fuseabort.fileno()) t._wcfuseabort = os.dup(wc._wcsrv._fuseabort.fileno())
go(t._abort_ontimeout, t._wcfuseabort, 10*time.second, nogilready) # NOTE must be: with_timeout << · << wcfs_pin_timeout go(t._abort_ontimeout, t._wcfuseabort, timeout, t.ctx.done(), nogilready)
nogilready.recv() # wait till _abort_ontimeout enters nogil nogilready.recv() # wait till _abort_ontimeout enters nogil
t._stats_prev = None t._stats_prev = None
...@@ -380,6 +387,7 @@ class tWCFS(_tWCFS): ...@@ -380,6 +387,7 @@ class tWCFS(_tWCFS):
# that wcfs server exits. # that wcfs server exits.
@func @func
def close(t): def close(t):
defer(t._ctx_cancel)
def _(): def _():
os.close(t._wcfuseabort) os.close(t._wcfuseabort)
defer(_) defer(_)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment