Commit ae2d0f53 authored by Kirill Smelkov's avatar Kirill Smelkov

Y wcfs: Make .wcfs/zhead to be debug-only feature and move it to .wcfs/debug/zhead

Currently it is easy to deadlock wcfs by opening .wcfs/zhead and not
reading from opened zhead handle due to the nature of what this zhead
file is designed to do. For this reason we want .wcfs/zhead not to be present
on production deployments and be available only where it is needed -
during test runs.

-> Introduce -debug flag for that to "enable debugging features" and put
   .wcfs/zhead to be that debug-only feature, and move it to be
   .wcfs/debug/zhead for clarity.
parent d4dd89c9
......@@ -55,6 +55,7 @@ def prepend_env(var, prefix):
os.environ[var] = v
# enable debug features during testing
# enable log_cli on no-capture
# (output during a test is a mixture of print and log)
def pytest_configure(config):
......@@ -62,7 +63,7 @@ def pytest_configure(config):
# this way we don't leak those files and include relevant information in test output
#
# TODO put WCFS logs into dedicated dir without -v?
prepend_env('WENDELIN_CORE_WCFS_OPTIONS', '-logtostderr')
prepend_env('WENDELIN_CORE_WCFS_OPTIONS', '-debug -logtostderr')
if config.option.capture == "no":
config.inicfg['log_cli'] = "true"
......
......@@ -1038,9 +1038,9 @@ retry:
log.Infof("\t <<< resync zhead to δZ.tid")
}
// notify .wcfs/zhead
// notify .wcfs/debug/zhead
if log.V(2) {
log.Infof("\t >>> notify .wcfs/zhead")
log.Infof("\t >>> notify .wcfs/debug/zhead")
}
gdebug.zheadSockTabMu.Lock()
for sk := range gdebug.zheadSockTab {
......@@ -1053,7 +1053,7 @@ retry:
}
gdebug.zheadSockTabMu.Unlock()
if log.V(2) {
log.Infof("\t <<< notify .wcfs/zhead")
log.Infof("\t <<< notify .wcfs/debug/zhead")
}
// shrink δFtail not to grow indefinitely.
......@@ -2765,7 +2765,7 @@ var gmntpt string
// debugging
var gdebug = struct {
// .wcfs/zhead opens
// .wcfs/debug/zhead opens
zheadSockTabMu sync.Mutex
zheadSockTab map[*FileSock]struct{}
}{}
......@@ -2774,22 +2774,22 @@ func init() {
gdebug.zheadSockTab = make(map[*FileSock]struct{})
}
// _wcfs_Zhead serves .wcfs/zhead .
type _wcfs_Zhead struct {
// _wcfs_debug_Zhead serves .wcfs/debug/zhead .
type _wcfs_debug_Zhead struct {
fsNode
}
// _wcfs_ZheadH serves .wcfs/zhead opens.
type _wcfs_ZheadH struct {
// _wcfs_debug_ZheadH serves .wcfs/debug/zhead opens.
type _wcfs_debug_ZheadH struct {
nodefs.File // = .sk.file
sk *FileSock
}
func (*_wcfs_Zhead) Open(flags uint32, fctx *fuse.Context) (nodefs.File, fuse.Status) {
func (*_wcfs_debug_Zhead) Open(flags uint32, fctx *fuse.Context) (nodefs.File, fuse.Status) {
// TODO(?) check flags
sk := NewFileSock()
sk.CloseRead()
zh := &_wcfs_ZheadH{
zh := &_wcfs_debug_ZheadH{
File: sk.file,
sk: sk,
}
......@@ -2801,7 +2801,7 @@ func (*_wcfs_Zhead) Open(flags uint32, fctx *fuse.Context) (nodefs.File, fuse.St
return WithOpenStreamFlags(zh), fuse.OK
}
func (zh *_wcfs_ZheadH) Release() {
func (zh *_wcfs_debug_ZheadH) Release() {
gdebug.zheadSockTabMu.Lock()
delete(gdebug.zheadSockTab, zh.sk)
gdebug.zheadSockTabMu.Unlock()
......@@ -2865,7 +2865,7 @@ func _wcfs_Stats(fctx *fuse.Context) ([]byte, error) {
num("BigFile", lenFileTab) // # of head/BigFile
num("RevHead", lenRevTab) // # of @revX/ directories
num("ZHeadLink", lenZHeadSockTab) // # of open .wcfs/zhead handles
num("ZHeadLink", lenZHeadSockTab) // # of open .wcfs/debug/zhead handles
num("WatchLink", lenWLinkTab) // # of open watchlinks
num("Watch", ΣWatch) // # of setup watches
num("PinnedBlk", ΣPinnedBlk) // # of currently on-client pinned blocks
......@@ -2892,6 +2892,7 @@ func main() {
}
func _main() (err error) {
debug := flag.Bool("debug", false, "enable debugging features")
tracefuse := flag.Bool("trace.fuse", false, "trace FUSE exchange")
autoexit := flag.Bool("autoexit", false, "automatically stop service when there is no client activity")
pintimeout := flag.Duration("pintimeout", 30*time.Second, "clients are killed if they do not handle pin notification in pintimeout time")
......@@ -2909,7 +2910,7 @@ func _main() (err error) {
}
// debug -> precise t, no dates (TODO(?) -> always precise t?)
if *tracefuse {
if *debug || *tracefuse {
stdlog.SetFlags(stdlog.Lmicroseconds)
}
......@@ -3038,25 +3039,32 @@ func _main() (err error) {
mkfile(head, "at", NewSmallFile(head.readAt)) // TODO mtime(at) = tidtime(at)
mkfile(head, "watch", wnode)
// for debugging/testing
// information about wcfs itself
_wcfs := newFSNode(fSticky)
mkdir(root, ".wcfs", &_wcfs)
mkfile(&_wcfs, "zurl", NewStaticFile([]byte(zurl)))
mkfile(&_wcfs, "pintimeout", NewStaticFile([]byte(fmt.Sprintf("%.1f", float64(root.pinTimeout) / float64(time.Second)))))
mkfile(&_wcfs, "stats", NewSmallFile(_wcfs_Stats)) // collected statistics
// for debugging/testing
if *debug {
_wcfs_debug := newFSNode(fSticky)
mkdir(&_wcfs, "debug", &_wcfs_debug)
// .wcfs/zhead - special file channel that sends zhead.at.
// .wcfs/debug/zhead - special file channel that sends zhead.at.
//
// If a user opens it, it will start to get tids of through which
// zhead.at was, starting from the time when .wcfs/zhead was opened.
// zhead.at was, starting from the time when .wcfs/debug/zhead was opened.
// There can be multiple openers. Once opened, the file must be read,
// as wcfs blocks waiting for data to be read when processing
// invalidations.
mkfile(&_wcfs, "zhead", &_wcfs_Zhead{
//
// zhead is debug-only since it is easy to deadlock wcfs by not
// reading from opened zhead handle.
mkfile(&_wcfs_debug, "zhead", &_wcfs_debug_Zhead{
fsNode: newFSNode(fSticky),
})
// .wcfs/stats - special file with collected statistics.
mkfile(&_wcfs, "stats", NewSmallFile(_wcfs_Stats))
}
// TODO handle autoexit
......
......@@ -525,8 +525,8 @@ class tDB(tWCFS):
# start wcfs after testdb is created and initial data is committed
super(tDB, t).__init__(**kw)
# fh(.wcfs/zhead) + history of zhead read from there
t._wc_zheadfh = open(t.wc.mountpoint + "/.wcfs/zhead")
# fh(.wcfs/debug/zhead) + history of zhead read from there
t._wc_debug_zheadfh = open(t.wc.mountpoint + "/.wcfs/debug/zhead")
# whether head/ ZBigFile(s) blocks were ever accessed via wcfs.
# this is updated only explicitly via ._blkheadaccess() .
......@@ -557,7 +557,7 @@ class tDB(tWCFS):
tw.close()
assert len(t._files) == 0
assert len(t._wlinks) == 0
t._wc_zheadfh.close()
t._wc_debug_zheadfh.close()
zstats = {'WatchLink': 0, 'Watch': 0, 'PinnedBlk': 0, 'ZHeadLink': 0}
if not t.multiproc:
......@@ -596,7 +596,7 @@ class tDB(tWCFS):
head = t._commit(zf, changeDelta)
# make sure wcfs is synchronized to committed transaction
l = t._wc_zheadfh.readline()
l = t._wc_debug_zheadfh.readline()
#print('> zhead read: %r' % l)
l = l.rstrip('\n')
wchead = tAt(t, fromhex(l))
......@@ -1896,7 +1896,7 @@ def test_wcfs_watch_2files():
@func
def test_wcfs_eio_after_zwatcher_fail(capfd):
# we will use low-level tWCFS instead of tDB for precise control of access
# to the filesystem. For example tDB keeps open connection to .wcfs/zhead
# to the filesystem. For example tDB keeps open connection to .wcfs/debug/zhead
# and inspects it during commit which can break in various ways on switch
# to EIO mode. Do all needed actions by hand to avoid unneeded uncertainty.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment