Commit f82bfed3 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent eca92b6d
......@@ -66,7 +66,7 @@ digraph {
nowcfs [label="!wcfs mode"]
wcfsInvProcess [label="process\nZODB invalidations"]
zconnCacheGet [label="zconn.Cache.Get"]
zconnCacheGet [label="zconn.Cache.Get", style=filled fillcolor=grey95]
zobj2file [label="Z* → file/[]#blk"]
zblk2file [label="ZBlk* → file/[]#blk"]
zbtree2file [label="BTree/Bucket → file/[]#blk"]
......
......@@ -138,7 +138,7 @@
<!-- zconnCacheGet -->
<g id="node8" class="node">
<title>zconnCacheGet</title>
<ellipse fill="none" stroke="#000000" cx="369.8112" cy="-188.6102" rx="71.4873" ry="18"/>
<ellipse fill="#f2f2f2" stroke="#000000" cx="369.8112" cy="-188.6102" rx="71.4873" ry="18"/>
<text text-anchor="middle" x="369.8112" y="-184.9102" font-family="Times,serif" font-size="14.00" fill="#000000">zconn.Cache.Get</text>
</g>
<!-- wcfsInvProcess&#45;&gt;zconnCacheGet -->
......
......@@ -227,15 +227,15 @@ package main
// non-wcfs wendelin.core do for a ZBigFile. Contrary to non-wcfs wendelin.core
// it keeps bigfile data in shared cache efficiently. It is organized as follows:
//
// 1) 1 ZODB connection for "latest data" for whole filesystem (zconn).
// 2) head/data of all bigfiles represent state as of zconn.At .
// 1) 1 ZODB connection for "latest data" for whole filesystem (zhead).
// 2) head/data of all bigfiles represent state as of zhead.At .
// 3) for */head/data the following invariant is maintained:
//
// #blk ∈ file cache => ZBlk(#blk) + all BTree/Bucket that lead to it ∈ zconn cache
// #blk ∈ file cache => ZBlk(#blk) + all BTree/Bucket that lead to it ∈ zhead cache
// (ZBlk* in ghost state)
//
// The invariant helps on invalidation: if we see a changed oid, and
// zconn.cache.lookup(oid) = ø -> we know we don't have to invalidate OS
// zhead.cache.lookup(oid) = ø -> we know we don't have to invalidate OS
// cache for any part of any file (even if oid relates to a file block - that
// block is not cached and will trigger ZODB load on file read).
//
......@@ -244,12 +244,12 @@ package main
// so ZBlk* also stay in cache in ghost form). In the future we may want to
// try to synchronize to kernel freeing its pagecache pages.
//
// 4) when we receive an invalidation message from zstor - we process it and
// 4) when we receive an invalidation message from ZODB - we process it and
// propagate invalidations to OS file cache of */head/data:
//
// invalidation message: (tid↑, []oid)
//
// 4.1) zconn.cache.lookup(oid)
// 4.1) zhead.cache.lookup(oid)
// 4.2) ø: nothing to do - see invariant ^^^.
// 4.3) obj found:
//
......@@ -268,7 +268,7 @@ package main
// - if retrieved successfully -> store retrieved data back into OS file
// cache for file/@<rev>/data[blk], where
//
// rev = max(δFtail.by(#blk)) || min(rev ∈ δFtail) || zconn.at ; see below about δFtail
// rev = max(δFtail.by(#blk)) || min(rev ∈ δFtail) || zhead.at ; see below about δFtail
//
// - invalidate file/head/data[blk] in OS file cache.
//
......@@ -289,7 +289,7 @@ package main
//
// 6) when we receive a FUSE read(#blk) request to a file/head/data we process it as follows:
//
// 6.1) load blkdata for head/data[blk] @zconn.at .
// 6.1) load blkdata for head/data[blk] @zhead.at .
//
// while loading this also gives upper bound estimate of when the block
// was last changed:
......@@ -477,7 +477,8 @@ type Root struct {
zdb *zodb.DB
// ZODB connection for head/
zconn *ZConn
zheadMu sync.RWMutex // protects access to zhead & live _objects_ associated with it
zhead *ZConn // zwatcher resyncs zhead; others only read zhead objects.
// ZODB connections for @<rev>/
zrevMu sync.Mutex
......@@ -513,7 +514,7 @@ type BigFileRev struct {
// /bigfile/<bigfileX>/(head|<rev>)/* - internally served by BigFile.
type BigFile struct {
// this BigFile view ZODB via zconn
// this BigFile views ZODB via zconn
zconn *ZConn
// ZBigFile top-level object. Kept activated during lifetime of current transaction.
......@@ -559,40 +560,72 @@ type blkLoadState struct {
// ----------------------------------------
/*
// XXX invalidation watcher
func (sb *Root) zwatch(ctx context.Context) error {
// XXX err ctx
// zodbCacheControl implements zodb.LiveCacheControl to tune ZODB to never evict
// LOBTree/LOBucket from live cache. We want to keep LOBTree/LOBucket always alive
// becuse it is essentially the index where to find ZBigFile data.
//
// For the data itself - we put it to kernel pagecache and always deactivate
// from ZODB right after that.
//
// See "3) for */head/data the following invariant is maintained..."
type zodbCacheControl struct {}
func (cc *zodbCacheControl) WantEvict(obj zodb.IPersistent) bool {
switch obj.(type) {
default:
return true
case *btree.LOBTree:
case *btree.LOBucket:
// ZBlk* are kept referenced by a LOBucket, so they don't go away from Connection.cache.objtab
// we also keep ZBigFile alive because we want to make sure .blksize
// and (p. ref) .blktab do not change.
// XXX do we really need to keep ZBigFile alive for that?
//case *ZBigFile:
}
return false
}
// zwatcher watches for ZODB changes.
// see "4) when we receive an invalidation message from ZODB ..."
func (r *Root) zwatcher(ctx context.Context) (err error) {
defer xerr.Contextf(&err, "zwatch") // XXX more in context?
// XXX unmount on error? -> always EIO?
znotify := sb.zconn.Notifier()
zwatch := r.zstor.Watch() // XXX -> create before zwatcher is started
for {
zevent, err := znotify.Read(ctx)
zevent, err := zwatch.Read(ctx)
if err != nil {
return err
}
// XXX locking
sb.zhandle1(zevent)
r.zhandle1(zevent)
}
}
// zhandle1 handles 1 event from ZODB notification.
// XXX something is locked
func (sb *Root) zhandle1(zevent *zodb.NotifyEvent) {
// (called with .zheadMu wlocked)
func (r *Root) zhandle1(zevent zodb.WatchEvent) {
// XXX locking correct? XXX too coarse? -> lock only around "resync .zhead ..." ?
r.zheadMu.Lock()
defer r.zheadMu.Unlock()
toinvalidate = ... // [] of file/[]#blk
// zevent = (tid^, []oid)
for _, oid := zevent.Oidv {
obj := zconn.Cache().Get(oid)
for _, oid := range zevent.Oidv {
obj := zhead.Cache().Get(oid)
if obj == nil {
continue // nothing to do - see invariant
}
switch obj := obj.(type) {
default:
continue // object not related to a bigfile
continue // object not related to any bigfile
case *LOBTree:
// XXX -> δBTree
......@@ -600,40 +633,56 @@ func (sb *Root) zhandle1(zevent *zodb.NotifyEvent) {
case *LOBucket:
// XXX -> δBTree
case *ZBlkXXXAny:
case *ZBlkXXXAny: // ZBlk0, ZBlk1
fileinv := XXX(obj.file)
fileinv.blkv += obj.blk // XXX or better obj.blkv ?
case *ZBigFile:
// XXX check that .blksize and .blktab (it is only
// persistent reference) do not change.
// XXX shutdown fs with ^^^ message.
}
}
wg = ...
//wg = ...
for _, fileinv := range toinvalidate {
for _, blk := range fileinv.blkv {
wg.go fileinv.file.invalidateBlk(blk)
go fileinv.file.invalidateBlk(blk) // XXX -> wg.Go
}
// XXX resync .zhead to zevent.tid
}
// invalidateBlk invalidates 1 file block. XXX
// XXX see "4. for all file/blk to in invalidate we do"
// XXX see "4.4) for all file/blk to in invalidate we do"
func (f *file) invalidateBlk(ctx context.Context, blk int64) error {
fsconn := f.root().fsconn
off := blk*blksize
// try retrieve cache of current head/data[blk]
//
// if less than blksize was cached - probably the kernel had to evict
// some data from its cache already. In such case we don't try to
// preserve the rest and drop what was read, to avoid keeping the
// system overloaded.
//
// XXX st != OK -> warn?
blkdata, st := fsconn.FileRetrieveCache(f.Inode(), off, blksize)
// XXX st != ok, len(blkdata) < blksize
// store retrieved data back to OS cache for file @<rev>/data[blk]
frev = ... // XXX
st = fsconn.FileNotifyStoreCache(frev.Inode(), off, blkdata)
// XXX st != ok
if len(blkdata) == blksize {
// XXX -> go
// store retrieved data back to OS cache for file @<rev>/data[blk]
frev, _ = file.δFtail.LastRevOf(blk, at)
st = fsconn.FileNotifyStoreCache(frev.Inode(), off, blkdata)
if st != fuse.OK {
// XXX log - dup wrt readBlk -> common func.
}
}
// invalidate file/head/data[blk] in OS file cache.
st = fsconn.FileNotify(f.Inode(), off, blksize)
// XXX st != ok (fatal here)
}
*/
// ----------------------------------------
......@@ -664,7 +713,7 @@ func (bfroot *BigFileRoot) mkdir(name string, fctx *fuse.Context) (_ *nodefs.Ino
}
// not there - without bfroot lock proceed to open BigFile from ZODB
bf, err := bigopen(asctx(fctx), groot.zconn, oid)
bf, err := bigopen(asctx(fctx), groot.zhead, oid)
if err != nil {
return nil, err
}
......@@ -1064,34 +1113,6 @@ func (bf *BigFile) readAt() []byte {
// zodbCacheControl implements zodb.LiveCacheControl to tune ZODB to never evict
// LOBTree/LOBucket from live cache. We want to keep LOBTree/LOBucket always alive
// becuse it is essentially the index where to find ZBigFile data.
//
// For the data itself - we put it to kernel pagecache and always deactivate
// from ZODB right after that.
//
// See "*/head/data invariant" in "wcfs organization" overview.
//
// TODO set it to Connection.CacheControl
type zodbCacheControl struct {}
func (cc *zodbCacheControl) WantEvict(obj zodb.IPersistent) bool {
switch obj.(type) {
default:
return true
case *btree.LOBTree:
case *btree.LOBucket:
// ZBlk* are kept referenced by a LOBucket, so they don't go away from Connection.objtab
// XXX + ZBigFile ?
}
return false
}
// FIXME groot/gfsconn is tmp workaround for lack of way to retrieve FileSystemConnector from nodefs.Inode
// TODO:
// - Inode += .Mount() -> nodefs.Mount
......@@ -1131,17 +1152,18 @@ func main() {
defer zstor.Close()
zdb := zodb.NewDB(zstor)
zconn, err := zopen(ctx, zdb, &zodb.ConnOptions{})
zhead, err := zopen(ctx, zdb, &zodb.ConnOptions{})
if err != nil {
log.Fatal(err)
}
zhead.Cache().SetControl(&zodbCacheControl{}) // XXX +locking?
// mount root
root := &Root{
Node: nodefs.NewDefaultNode(),
zstor: zstor,
zdb: zdb,
zconn: zconn,
zhead: zhead,
zrevTab: make(map[zodb.Tid]*ZConn),
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment