Commit 153c002a authored by Kirill Smelkov's avatar Kirill Smelkov

X wcfs: _fuse_unmount: Try first `kill -TERM` before `kill -QUIT` wcfs

Just aborting the FUSE connection does not make WCFS to exit. This abort
is needed to avoid deadlocks, but we should also signal WCFS it should
stop. Not doing so will lead to timeout in wait after FUSE connection
abort and `kill -QUIT` triggered, which makes WCFS crash and print lots
of traceback to stderr before exiting.
parent 15389db0
...@@ -71,7 +71,7 @@ import logging as log ...@@ -71,7 +71,7 @@ import logging as log
from os.path import dirname from os.path import dirname
from stat import S_ISDIR from stat import S_ISDIR
from errno import ENOENT, ENOTCONN, EEXIST from errno import ENOENT, ENOTCONN, EEXIST
from signal import SIGQUIT, SIGKILL from signal import SIGTERM, SIGQUIT, SIGKILL
from golang import chan, select, default, func, defer from golang import chan, select, default, func, defer
from golang import context, errors, sync, time from golang import context, errors, sync, time
...@@ -423,41 +423,43 @@ def __stop(wcsrv, ctx, _onstuck): ...@@ -423,41 +423,43 @@ def __stop(wcsrv, ctx, _onstuck):
if _procwait_(timeoutFrac(0.5), wcsrv._proc): if _procwait_(timeoutFrac(0.5), wcsrv._proc):
return return
log.warn("\nC: wcfs.go does not exit") log.warn("wcfs.go does not exit (after SIGTERM)")
log.warn("-> kill -QUIT wcfs.go ...\n") log.warn("-> kill -QUIT wcfs.go ...")
os.kill(wcsrv._proc.pid, SIGQUIT) os.kill(wcsrv._proc.pid, SIGQUIT)
if _procwait_(timeoutFrac(0.25), wcsrv._proc): if _procwait_(timeoutFrac(0.25), wcsrv._proc):
return return
log.warn("\nC: wcfs.go does not exit (after SIGQUIT)") log.warn("wcfs.go does not exit (after SIGQUIT)")
log.warn("-> kill -KILL wcfs.go ...\n") log.warn("-> kill -KILL wcfs.go ...")
os.kill(wcsrv._proc.pid, SIGKILL) os.kill(wcsrv._proc.pid, SIGKILL)
if _procwait_(timeoutFrac(0.25), wcsrv._proc): if _procwait_(timeoutFrac(0.25), wcsrv._proc):
return return
log.warn("\nC: wcfs.go does not exit (after SIGKILL; probably it is stuck in kernel)") log.warn("wcfs.go does not exit (after SIGKILL; probably it is stuck in kernel)")
log.warn("-> nothing we can do...\n") # XXX dump /proc/pid/task/*/stack instead (ignore EPERM) log.warn("-> nothing we can do...") # XXX dump /proc/pid/task/*/stack instead (ignore EPERM)
if _onstuck is not None: if _onstuck is not None:
_onstuck() _onstuck()
else: else:
_procwait(context.background(), wcsrv._proc) _procwait(context.background(), wcsrv._proc)
defer(_) defer(_)
@func try:
def _(): if _is_mountpoint(wcsrv.mountpoint): # could be unmounted from outside
try: _fuse_unmount(wcsrv.mountpoint)
if _is_mountpoint(wcsrv.mountpoint): # could be unmounted from outside except:
_fuse_unmount(wcsrv.mountpoint) # if clean unmount failed -> kill -TERM wcfs and force abort of fuse connection.
except: #
# if clean unmount failed -> force abort of fuse connection # aborting fuse connection is needed in case wcfs/kernel will be stuck
def _(): # in a deadlock even after being `kill -9`. See comments in tWCFS for details.
if wcsrv._fuseabort is not None: def _():
log.warn("aborting FUSE connection ...") log.warn("-> kill -TERM wcfs.go ...")
wcsrv._fuseabort.write(b"1\n") os.kill(wcsrv._proc.pid, SIGTERM)
wcsrv._fuseabort.flush() if wcsrv._fuseabort is not None:
defer(_) log.warn("-> aborting FUSE connection ...")
raise wcsrv._fuseabort.write(b"1\n")
defer(_) wcsrv._fuseabort.flush()
defer(_)
raise
# ---- misc ---- # ---- misc ----
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment