Commit 5bfa8cf8 authored by Kirill Smelkov's avatar Kirill Smelkov

X wcfs: Add start to spawn a Server that can be later stopped (draft)

Implement Server.stop using most of the functionality that was
previously living in tWCFS test part.
parent 4188ea9e
......@@ -46,8 +46,18 @@ def transaction_reset():
# nothing to run after test
# Before pytest exits, teardown WCFS(s) that we automatically spawned during
# test runs in bigfile/bigarray/...
# enable log_cli on no-capture
# (output during a test is a mixture of print and log)
def pytest_configure(config):
if config.option.capture == "no":
config.inicfg['log_cli'] = "true"
assert config.getini("log_cli") is True
if config.option.verbose:
config.inicfg['log_cli_level'] = "INFO"
# Before pytest exits, teardown WCFS server(s) that we automatically spawned
# during test runs in bigfile/bigarray/...
#
# If we do not do this, spawned wcfs servers are left running _and_ connected
# by stdout to nxdtest input - which makes nxdtest to wait for them to exit.
......@@ -58,18 +68,13 @@ def pytest_unconfigure(config):
gc.collect()
from wendelin import wcfs
for wc in wcfs._wcstarted:
if wc._proc.poll() is not None:
continue # this wcfs server already exited
for wc in wcfs._wcautostarted:
# NOTE: defer instead of direct call - to call all wc.close if there
# was multiple wc spawned, and proceeding till the end even if any
# particular call raises exception.
defer(partial(_wcclose, wc))
defer(partial(_wcclose_and_stop, wc))
def _wcclose(wc):
from wendelin.wcfs.wcfs_test import tWCFS
print("# unmount/stop wcfs pid%d @ %s" % (wc._proc.pid, wc.mountpoint))
twc = tWCFS(wc=wc)
twc.close()
@func
def _wcclose_and_stop(wc):
defer(wc._wcsrv.stop)
defer(wc.close)
This diff is collapsed.
......@@ -29,7 +29,8 @@ from __future__ import print_function, absolute_import
from golang import func, defer, error, b
from wendelin.bigfile.file_zodb import ZBigFile
from wendelin.wcfs.wcfs_test import tDB, tAt, timeout, waitfor_, eprint
from wendelin.wcfs.wcfs_test import tDB, tAt, timeout, eprint
from wendelin.wcfs import _waitfor_ as waitfor_
from wendelin.wcfs import wcfs_test
from wendelin.wcfs.internal.wcfs_test import read_mustfault
from wendelin.wcfs.internal import mm
......
......@@ -53,9 +53,8 @@ cdef class _tWCFS:
# but pin handler is failing one way or another - select will wake-up
# but, if _abort_ontimeout uses GIL, won't continue to run trying to lock
# GIL -> deadlock.
def _abort_ontimeout(_tWCFS t, double dt, pychan nogilready not None):
def _abort_ontimeout(_tWCFS t, int fdabort, double dt, pychan nogilready not None):
cdef chan[double] timeoutch = time.after(dt)
cdef int fdabort = t._wcfuseabort.fileno()
emsg1 = "\nC: test timed out after %.1fs\n" % (dt / time.second)
cdef char *_emsg1 = emsg1
with nogil:
......
......@@ -43,9 +43,7 @@ from ZODB.utils import z64, u64, p64
import sys, os, os.path, subprocess
from thread import get_ident as gettid
from time import gmtime
from errno import EINVAL, ENOENT, ENOTCONN
from stat import S_ISDIR
from signal import SIGQUIT, SIGKILL
from errno import EINVAL, ENOTCONN
from resource import setrlimit, getrlimit, RLIMIT_MEMLOCK
from golang import go, chan, select, func, defer, default, error, b
from golang import context, errors, sync, time
......@@ -55,6 +53,7 @@ from pytest import raises, fail
from wendelin.wcfs.internal import io, mm
from wendelin.wcfs.internal.wcfs_test import _tWCFS, read_nogil, install_sigbus_trap, fadvise_dontneed
from wendelin.wcfs.client._wcfs import _tpywlinkwrite as _twlinkwrite
from wendelin.wcfs import _is_mountpoint as is_mountpoint, _procwait as procwait, _ready as ready
# setup:
......@@ -113,26 +112,9 @@ def teardown_function(f):
procmounts_lookup_wcfs(testzurl)
# fuse_unmount unmounts FUSE filesystem mounted @ mntpt.
@func
def fuse_unmount(mntpt):
assert is_mountpoint(mntpt)
try:
wcfs._fuse_unmount(mntpt)
except subprocess.CalledProcessError:
# unmount failed, usually due to "device is busy".
# Print which files are still opened and reraise
def _():
print("# lsof %s" % mntpt)
# -w to avoid lots of
# lsof: WARNING: can't stat() fuse.wcfs file system /tmp/wcfs/X
# Output information may be incomplete.
# if there are other uncleaned wcfs mountpoints.
# (lsof stats all filesystems on startup)
# XXX -> better use `fuser -m <mntpt>` (it says it will limit search to files only under mntpt) ?
subprocess.check_call(["lsof", "-w", "+D", mntpt])
defer(_)
raise
# ---- test join/autostart/serve ----
......@@ -149,13 +131,18 @@ def test_join():
assert wcfs._wcregistry == {}
defer(_)
wc = wcfs._start(zurl)
wcsrv = wcfs.start(zurl)
defer(wcsrv.stop)
assert wcsrv.mountpoint == testmntpt
assert readfile(wcsrv.mountpoint + "/.wcfs/zurl") == zurl
assert os.path.isdir(wcsrv.mountpoint + "/head")
assert os.path.isdir(wcsrv.mountpoint + "/head/bigfile")
wc = wcfs.join(zurl, autostart=False)
defer(wc.close)
assert wc.mountpoint == testmntpt
assert wc.mountpoint == wcsrv.mountpoint
assert wc._njoin == 1
assert readfile(wc.mountpoint + "/.wcfs/zurl") == zurl
assert os.path.isdir(wc.mountpoint + "/head")
assert os.path.isdir(wc.mountpoint + "/head/bigfile")
assert wc._wcsrv is None
wc2 = wcfs.join(zurl, autostart=False)
defer(wc2.close)
......@@ -239,6 +226,9 @@ def test_serve_after_crash():
assert procmounts_lookup_wcfs(zurl) == mntpt
# XXX test_start_after_crash?
# start_and_crash_wcfs starts wcfs and then kills it.
# it returns closed WCFS connection that was connected to the killed WCFS server.
def start_and_crash_wcfs(zurl, mntpt): # -> WCFS
......@@ -247,7 +237,11 @@ def start_and_crash_wcfs(zurl, mntpt): # -> WCFS
procmounts_lookup_wcfs(zurl)
# start the server with attached client
wc = wcfs._start(zurl)
wcsrv = wcfs.start(zurl)
assert wcsrv.mountpoint == mntpt
assert mntpt not in wcfs._wcregistry
wc = wcfs.join(zurl, autostart=False)
assert wcfs._wcregistry[mntpt] is wc
assert wc.mountpoint == mntpt
assert readfile(mntpt + "/.wcfs/zurl") == zurl
......@@ -257,8 +251,8 @@ def start_and_crash_wcfs(zurl, mntpt): # -> WCFS
# kill the server
wc._proc.kill() # sends SIGKILL
assert wc._proc.wait() != 0
wcsrv._proc.kill() # sends SIGKILL
assert wcsrv._proc.wait() != 0
# access to filesystem should raise "Transport endpoint not connected"
with raises(IOError) as exc:
......@@ -335,9 +329,7 @@ class DFile:
# XXX print -> t.trace/debug() + t.verbose depending on py.test -v -v ?
class tWCFS(_tWCFS):
@func
def __init__(t, wc=None):
# `wc != None` is used to create tWCFS on existing WCFS connection
if wc is None:
def __init__(t):
assert not os.path.exists(testmntpt)
wc = wcfs.join(testzurl, autostart=True)
assert wc.mountpoint == testmntpt
......@@ -351,60 +343,35 @@ class tWCFS(_tWCFS):
# cases, when wcfs, even after receiving `kill -9`, will be stuck in kernel.
# ( git.kernel.org/linus/a131de0a482a makes in-kernel FUSE client to
# still wait for request completion even after fatal signal )
t._wcfuseabort = open("/sys/fs/fuse/connections/%d/abort" % os.minor(os.stat(t.wc.mountpoint).st_dev), "w")
nogilready = chan(dtype='C.structZ')
go(t._abort_ontimeout, 10*time.second, nogilready) # NOTE must be: with_timeout << · << wcfs_pin_timeout
t._wcfuseabort = os.dup(wc._wcsrv._fuseabort.fileno())
go(t._abort_ontimeout, t._wcfuseabort, 10*time.second, nogilready) # NOTE must be: with_timeout << · << wcfs_pin_timeout
nogilready.recv() # wait till _abort_ontimeout enters nogil
# close closes connection to wcfs, unmounts the filesystem and makes sure
# that wcfs server exits.
@func
def close(t):
defer(t._wcfuseabort.close)
def _():
os.close(t._wcfuseabort)
defer(t._closed.close)
# unmount and wait for wcfs to exit
def _():
# run `fusermount -u` the second time after if wcfs was killed to
# cleanup /proc/mounts.
if is_mountpoint(t.wc.mountpoint):
fuse_unmount(t.wc.mountpoint)
assert not is_mountpoint(t.wc.mountpoint)
os.rmdir(t.wc.mountpoint)
defer(_)
@func
def _():
# kill wcfs.go in case it is deadlocked and does not exit by itself
if procwait_(timeout(), t.wc._proc):
return
# run `fusermount -u` the second time after we kill wcfs to cleanup
# /proc/mounts and avoid `assert not is_mountpoint` above.
def _():
if is_mountpoint(t.wc.mountpoint):
fuse_unmount(t.wc.mountpoint)
defer(_)
eprint("\nC: wcfs.go does not exit")
eprint("-> kill -QUIT wcfs.go ...\n")
os.kill(t.wc._proc.pid, SIGQUIT)
if procwait_(timeout(), t.wc._proc):
return
eprint("\nC: wcfs.go does not exit (after SIGQUIT)")
eprint("-> kill -KILL wcfs.go ...\n")
os.kill(t.wc._proc.pid, SIGKILL)
if procwait_(timeout(), t.wc._proc):
return
eprint("\nC: wcfs.go does not exit (after SIGKILL; probably it is stuck in kernel)")
eprint("-> nothing we can do...\n") # XXX dump /proc/pid/task/*/stack instead (ignore EPERM)
def onstuck():
fail("wcfs.go does not exit even after SIGKILL")
t.wc._wcsrv._stop(timeout(), _onstuck=onstuck)
defer(_)
def _():
#if not ready(t._wcfuseaborted): XXX kill _wcfuseaborted ?
# assert 0 == subprocess.call(["mountpoint", "-q", t.wc.mountpoint])
defer(t.wc.close)
assert is_mountpoint(t.wc.mountpoint)
fuse_unmount(t.wc.mountpoint)
defer(_)
t.wc.close()
class tDB(tWCFS):
......@@ -1971,72 +1938,6 @@ def dump_history(t):
print()
# ready reports whether chan ch is ready.
def ready(ch):
_, _rx = select(
default, # 0
ch.recv, # 1
)
return bool(_)
# procwait waits for a process (subprocess.Popen) to terminate.
def procwait(ctx, proc):
waitfor(ctx, lambda: proc.poll() is not None)
# procwait_, similarly to procwait, waits for a process (subprocess.Popen) to terminate.
#
# it returns bool whether process terminated or not - e.g. due to context being canceled.
def procwait_(ctx, proc): # -> ok
return waitfor_(ctx, lambda: proc.poll() is not None)
# waitfor waits for condf() to become true.
def waitfor(ctx, condf):
wg = sync.WorkGroup(ctx)
def _(ctx):
while 1:
if ready(ctx.done()):
raise ctx.err()
if condf():
return
tdelay()
wg.go(_)
wg.wait()
# waitfor_, similarly to waitfor, waits for condf() to become true.
#
# it returns bool whether target condition was reached or not - e.g. due to
# context being canceled.
def waitfor_(ctx, proc): # -> ok
try:
waitfor(ctx, proc)
except Exception as e:
if errors.Is(e, context.canceled) or errors.Is(e, context.deadlineExceeded):
return False
raise
return True
# is_mountpoint returns whether path is a mountpoint
def is_mountpoint(path): # -> bool
# NOTE we don't call mountpoint directly on path, because if FUSE
# fileserver failed, the mountpoint will also fail and print ENOTCONN
try:
_ = os.lstat(path)
except OSError as e:
if e.errno == ENOENT:
return False
# "Transport endpoint is not connected" -> it is a failed FUSE server
# (XXX we can also grep /proc/mounts)
if e.errno == ENOTCONN:
return True
raise
if not S_ISDIR(_.st_mode):
return False
mounted = (0 == subprocess.call(["mountpoint", "-q", path]))
return mounted
# procmounts_lookup_wcfs returns /proc/mount entry for wcfs mounted to serve zurl.
def procmounts_lookup_wcfs(zurl): # -> mountpoint | KeyError
for line in readfile('/proc/mounts').splitlines():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment