Commit f82bfed3 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent eca92b6d
...@@ -66,7 +66,7 @@ digraph { ...@@ -66,7 +66,7 @@ digraph {
nowcfs [label="!wcfs mode"] nowcfs [label="!wcfs mode"]
wcfsInvProcess [label="process\nZODB invalidations"] wcfsInvProcess [label="process\nZODB invalidations"]
zconnCacheGet [label="zconn.Cache.Get"] zconnCacheGet [label="zconn.Cache.Get", style=filled fillcolor=grey95]
zobj2file [label="Z* → file/[]#blk"] zobj2file [label="Z* → file/[]#blk"]
zblk2file [label="ZBlk* → file/[]#blk"] zblk2file [label="ZBlk* → file/[]#blk"]
zbtree2file [label="BTree/Bucket → file/[]#blk"] zbtree2file [label="BTree/Bucket → file/[]#blk"]
......
...@@ -138,7 +138,7 @@ ...@@ -138,7 +138,7 @@
<!-- zconnCacheGet --> <!-- zconnCacheGet -->
<g id="node8" class="node"> <g id="node8" class="node">
<title>zconnCacheGet</title> <title>zconnCacheGet</title>
<ellipse fill="none" stroke="#000000" cx="369.8112" cy="-188.6102" rx="71.4873" ry="18"/> <ellipse fill="#f2f2f2" stroke="#000000" cx="369.8112" cy="-188.6102" rx="71.4873" ry="18"/>
<text text-anchor="middle" x="369.8112" y="-184.9102" font-family="Times,serif" font-size="14.00" fill="#000000">zconn.Cache.Get</text> <text text-anchor="middle" x="369.8112" y="-184.9102" font-family="Times,serif" font-size="14.00" fill="#000000">zconn.Cache.Get</text>
</g> </g>
<!-- wcfsInvProcess&#45;&gt;zconnCacheGet --> <!-- wcfsInvProcess&#45;&gt;zconnCacheGet -->
......
...@@ -227,15 +227,15 @@ package main ...@@ -227,15 +227,15 @@ package main
// non-wcfs wendelin.core do for a ZBigFile. Contrary to non-wcfs wendelin.core // non-wcfs wendelin.core do for a ZBigFile. Contrary to non-wcfs wendelin.core
// it keeps bigfile data in shared cache efficiently. It is organized as follows: // it keeps bigfile data in shared cache efficiently. It is organized as follows:
// //
// 1) 1 ZODB connection for "latest data" for whole filesystem (zconn). // 1) 1 ZODB connection for "latest data" for whole filesystem (zhead).
// 2) head/data of all bigfiles represent state as of zconn.At . // 2) head/data of all bigfiles represent state as of zhead.At .
// 3) for */head/data the following invariant is maintained: // 3) for */head/data the following invariant is maintained:
// //
// #blk ∈ file cache => ZBlk(#blk) + all BTree/Bucket that lead to it ∈ zconn cache // #blk ∈ file cache => ZBlk(#blk) + all BTree/Bucket that lead to it ∈ zhead cache
// (ZBlk* in ghost state) // (ZBlk* in ghost state)
// //
// The invariant helps on invalidation: if we see a changed oid, and // The invariant helps on invalidation: if we see a changed oid, and
// zconn.cache.lookup(oid) = ø -> we know we don't have to invalidate OS // zhead.cache.lookup(oid) = ø -> we know we don't have to invalidate OS
// cache for any part of any file (even if oid relates to a file block - that // cache for any part of any file (even if oid relates to a file block - that
// block is not cached and will trigger ZODB load on file read). // block is not cached and will trigger ZODB load on file read).
// //
...@@ -244,12 +244,12 @@ package main ...@@ -244,12 +244,12 @@ package main
// so ZBlk* also stay in cache in ghost form). In the future we may want to // so ZBlk* also stay in cache in ghost form). In the future we may want to
// try to synchronize to kernel freeing its pagecache pages. // try to synchronize to kernel freeing its pagecache pages.
// //
// 4) when we receive an invalidation message from zstor - we process it and // 4) when we receive an invalidation message from ZODB - we process it and
// propagate invalidations to OS file cache of */head/data: // propagate invalidations to OS file cache of */head/data:
// //
// invalidation message: (tid↑, []oid) // invalidation message: (tid↑, []oid)
// //
// 4.1) zconn.cache.lookup(oid) // 4.1) zhead.cache.lookup(oid)
// 4.2) ø: nothing to do - see invariant ^^^. // 4.2) ø: nothing to do - see invariant ^^^.
// 4.3) obj found: // 4.3) obj found:
// //
...@@ -268,7 +268,7 @@ package main ...@@ -268,7 +268,7 @@ package main
// - if retrieved successfully -> store retrieved data back into OS file // - if retrieved successfully -> store retrieved data back into OS file
// cache for file/@<rev>/data[blk], where // cache for file/@<rev>/data[blk], where
// //
// rev = max(δFtail.by(#blk)) || min(rev ∈ δFtail) || zconn.at ; see below about δFtail // rev = max(δFtail.by(#blk)) || min(rev ∈ δFtail) || zhead.at ; see below about δFtail
// //
// - invalidate file/head/data[blk] in OS file cache. // - invalidate file/head/data[blk] in OS file cache.
// //
...@@ -289,7 +289,7 @@ package main ...@@ -289,7 +289,7 @@ package main
// //
// 6) when we receive a FUSE read(#blk) request to a file/head/data we process it as follows: // 6) when we receive a FUSE read(#blk) request to a file/head/data we process it as follows:
// //
// 6.1) load blkdata for head/data[blk] @zconn.at . // 6.1) load blkdata for head/data[blk] @zhead.at .
// //
// while loading this also gives upper bound estimate of when the block // while loading this also gives upper bound estimate of when the block
// was last changed: // was last changed:
...@@ -477,7 +477,8 @@ type Root struct { ...@@ -477,7 +477,8 @@ type Root struct {
zdb *zodb.DB zdb *zodb.DB
// ZODB connection for head/ // ZODB connection for head/
zconn *ZConn zheadMu sync.RWMutex // protects access to zhead & live _objects_ associated with it
zhead *ZConn // zwatcher resyncs zhead; others only read zhead objects.
// ZODB connections for @<rev>/ // ZODB connections for @<rev>/
zrevMu sync.Mutex zrevMu sync.Mutex
...@@ -513,7 +514,7 @@ type BigFileRev struct { ...@@ -513,7 +514,7 @@ type BigFileRev struct {
// /bigfile/<bigfileX>/(head|<rev>)/* - internally served by BigFile. // /bigfile/<bigfileX>/(head|<rev>)/* - internally served by BigFile.
type BigFile struct { type BigFile struct {
// this BigFile view ZODB via zconn // this BigFile views ZODB via zconn
zconn *ZConn zconn *ZConn
// ZBigFile top-level object. Kept activated during lifetime of current transaction. // ZBigFile top-level object. Kept activated during lifetime of current transaction.
...@@ -559,40 +560,72 @@ type blkLoadState struct { ...@@ -559,40 +560,72 @@ type blkLoadState struct {
// ---------------------------------------- // ----------------------------------------
/* // zodbCacheControl implements zodb.LiveCacheControl to tune ZODB to never evict
// XXX invalidation watcher // LOBTree/LOBucket from live cache. We want to keep LOBTree/LOBucket always alive
func (sb *Root) zwatch(ctx context.Context) error { // becuse it is essentially the index where to find ZBigFile data.
// XXX err ctx //
// For the data itself - we put it to kernel pagecache and always deactivate
// from ZODB right after that.
//
// See "3) for */head/data the following invariant is maintained..."
type zodbCacheControl struct {}
func (cc *zodbCacheControl) WantEvict(obj zodb.IPersistent) bool {
switch obj.(type) {
default:
return true
case *btree.LOBTree:
case *btree.LOBucket:
// ZBlk* are kept referenced by a LOBucket, so they don't go away from Connection.cache.objtab
// we also keep ZBigFile alive because we want to make sure .blksize
// and (p. ref) .blktab do not change.
// XXX do we really need to keep ZBigFile alive for that?
//case *ZBigFile:
}
return false
}
// zwatcher watches for ZODB changes.
// see "4) when we receive an invalidation message from ZODB ..."
func (r *Root) zwatcher(ctx context.Context) (err error) {
defer xerr.Contextf(&err, "zwatch") // XXX more in context?
// XXX unmount on error? -> always EIO? // XXX unmount on error? -> always EIO?
znotify := sb.zconn.Notifier() zwatch := r.zstor.Watch() // XXX -> create before zwatcher is started
for { for {
zevent, err := znotify.Read(ctx) zevent, err := zwatch.Read(ctx)
if err != nil { if err != nil {
return err return err
} }
// XXX locking r.zhandle1(zevent)
sb.zhandle1(zevent)
} }
} }
// zhandle1 handles 1 event from ZODB notification. // zhandle1 handles 1 event from ZODB notification.
// XXX something is locked // (called with .zheadMu wlocked)
func (sb *Root) zhandle1(zevent *zodb.NotifyEvent) { func (r *Root) zhandle1(zevent zodb.WatchEvent) {
// XXX locking correct? XXX too coarse? -> lock only around "resync .zhead ..." ?
r.zheadMu.Lock()
defer r.zheadMu.Unlock()
toinvalidate = ... // [] of file/[]#blk toinvalidate = ... // [] of file/[]#blk
// zevent = (tid^, []oid) // zevent = (tid^, []oid)
for _, oid := zevent.Oidv { for _, oid := range zevent.Oidv {
obj := zconn.Cache().Get(oid) obj := zhead.Cache().Get(oid)
if obj == nil { if obj == nil {
continue // nothing to do - see invariant continue // nothing to do - see invariant
} }
switch obj := obj.(type) { switch obj := obj.(type) {
default: default:
continue // object not related to a bigfile continue // object not related to any bigfile
case *LOBTree: case *LOBTree:
// XXX -> δBTree // XXX -> δBTree
...@@ -600,40 +633,56 @@ func (sb *Root) zhandle1(zevent *zodb.NotifyEvent) { ...@@ -600,40 +633,56 @@ func (sb *Root) zhandle1(zevent *zodb.NotifyEvent) {
case *LOBucket: case *LOBucket:
// XXX -> δBTree // XXX -> δBTree
case *ZBlkXXXAny: case *ZBlkXXXAny: // ZBlk0, ZBlk1
fileinv := XXX(obj.file) fileinv := XXX(obj.file)
fileinv.blkv += obj.blk // XXX or better obj.blkv ? fileinv.blkv += obj.blk // XXX or better obj.blkv ?
case *ZBigFile:
// XXX check that .blksize and .blktab (it is only
// persistent reference) do not change.
// XXX shutdown fs with ^^^ message.
} }
} }
wg = ... //wg = ...
for _, fileinv := range toinvalidate { for _, fileinv := range toinvalidate {
for _, blk := range fileinv.blkv { for _, blk := range fileinv.blkv {
wg.go fileinv.file.invalidateBlk(blk) go fileinv.file.invalidateBlk(blk) // XXX -> wg.Go
} }
// XXX resync .zhead to zevent.tid
} }
// invalidateBlk invalidates 1 file block. XXX // invalidateBlk invalidates 1 file block. XXX
// XXX see "4. for all file/blk to in invalidate we do" // XXX see "4.4) for all file/blk to in invalidate we do"
func (f *file) invalidateBlk(ctx context.Context, blk int64) error { func (f *file) invalidateBlk(ctx context.Context, blk int64) error {
fsconn := f.root().fsconn fsconn := f.root().fsconn
off := blk*blksize off := blk*blksize
// try retrieve cache of current head/data[blk] // try retrieve cache of current head/data[blk]
//
// if less than blksize was cached - probably the kernel had to evict
// some data from its cache already. In such case we don't try to
// preserve the rest and drop what was read, to avoid keeping the
// system overloaded.
//
// XXX st != OK -> warn?
blkdata, st := fsconn.FileRetrieveCache(f.Inode(), off, blksize) blkdata, st := fsconn.FileRetrieveCache(f.Inode(), off, blksize)
// XXX st != ok, len(blkdata) < blksize if len(blkdata) == blksize {
// XXX -> go
// store retrieved data back to OS cache for file @<rev>/data[blk] // store retrieved data back to OS cache for file @<rev>/data[blk]
frev = ... // XXX frev, _ = file.δFtail.LastRevOf(blk, at)
st = fsconn.FileNotifyStoreCache(frev.Inode(), off, blkdata) st = fsconn.FileNotifyStoreCache(frev.Inode(), off, blkdata)
if st != fuse.OK {
// XXX log - dup wrt readBlk -> common func.
}
}
// XXX st != ok // invalidate file/head/data[blk] in OS file cache.
st = fsconn.FileNotify(f.Inode(), off, blksize) st = fsconn.FileNotify(f.Inode(), off, blksize)
// XXX st != ok (fatal here) // XXX st != ok (fatal here)
} }
*/
// ---------------------------------------- // ----------------------------------------
...@@ -664,7 +713,7 @@ func (bfroot *BigFileRoot) mkdir(name string, fctx *fuse.Context) (_ *nodefs.Ino ...@@ -664,7 +713,7 @@ func (bfroot *BigFileRoot) mkdir(name string, fctx *fuse.Context) (_ *nodefs.Ino
} }
// not there - without bfroot lock proceed to open BigFile from ZODB // not there - without bfroot lock proceed to open BigFile from ZODB
bf, err := bigopen(asctx(fctx), groot.zconn, oid) bf, err := bigopen(asctx(fctx), groot.zhead, oid)
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -1064,34 +1113,6 @@ func (bf *BigFile) readAt() []byte { ...@@ -1064,34 +1113,6 @@ func (bf *BigFile) readAt() []byte {
// zodbCacheControl implements zodb.LiveCacheControl to tune ZODB to never evict
// LOBTree/LOBucket from live cache. We want to keep LOBTree/LOBucket always alive
// becuse it is essentially the index where to find ZBigFile data.
//
// For the data itself - we put it to kernel pagecache and always deactivate
// from ZODB right after that.
//
// See "*/head/data invariant" in "wcfs organization" overview.
//
// TODO set it to Connection.CacheControl
type zodbCacheControl struct {}
func (cc *zodbCacheControl) WantEvict(obj zodb.IPersistent) bool {
switch obj.(type) {
default:
return true
case *btree.LOBTree:
case *btree.LOBucket:
// ZBlk* are kept referenced by a LOBucket, so they don't go away from Connection.objtab
// XXX + ZBigFile ?
}
return false
}
// FIXME groot/gfsconn is tmp workaround for lack of way to retrieve FileSystemConnector from nodefs.Inode // FIXME groot/gfsconn is tmp workaround for lack of way to retrieve FileSystemConnector from nodefs.Inode
// TODO: // TODO:
// - Inode += .Mount() -> nodefs.Mount // - Inode += .Mount() -> nodefs.Mount
...@@ -1131,17 +1152,18 @@ func main() { ...@@ -1131,17 +1152,18 @@ func main() {
defer zstor.Close() defer zstor.Close()
zdb := zodb.NewDB(zstor) zdb := zodb.NewDB(zstor)
zconn, err := zopen(ctx, zdb, &zodb.ConnOptions{}) zhead, err := zopen(ctx, zdb, &zodb.ConnOptions{})
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
zhead.Cache().SetControl(&zodbCacheControl{}) // XXX +locking?
// mount root // mount root
root := &Root{ root := &Root{
Node: nodefs.NewDefaultNode(), Node: nodefs.NewDefaultNode(),
zstor: zstor, zstor: zstor,
zdb: zdb, zdb: zdb,
zconn: zconn, zhead: zhead,
zrevTab: make(map[zodb.Tid]*ZConn), zrevTab: make(map[zodb.Tid]*ZConn),
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment