.

f82bfed3 · Kirill Smelkov · eca92b6d · f82bfed3 · f82bfed3 · f82bfed3
Commit f82bfed3 authored Dec 21, 2018 by Kirill Smelkov
Hide whitespace changes
Inline Side-by-side

Showing with 90 additions and 68 deletions

wcfs/todo.dot wcfs/todo.dot +1 -1

wcfs/todo.svg wcfs/todo.svg +1 -1

wcfs/wcfs.go wcfs/wcfs.go +88 -66

No files found.
--- a/wcfs/todo.dot
+++ b/wcfs/todo.dot
@@ -66,7 +66,7 @@ digraph {
 	nowcfs		[label="!wcfs mode"]

 	wcfsInvProcess	[label="process\nZODB invalidations"]
-	zconnCacheGet	[label="zconn.Cache.Get"]
+	zconnCacheGet	[label="zconn.Cache.Get", style=filled fillcolor=grey95]
 	zobj2file	[label="Z* → file/[]#blk"]
 	zblk2file	[label="ZBlk* → file/[]#blk"]
 	zbtree2file	[label="BTree/Bucket → file/[]#blk"]

--- a/wcfs/todo.svg
+++ b/wcfs/todo.svg
@@ -138,7 +138,7 @@
 <!-- zconnCacheGet -->
 <g id="node8" class="node">
 <title>zconnCacheGet</title>
-<ellipse fill="none" stroke="#000000" cx="369.8112" cy="-188.6102" rx="71.4873" ry="18"/>
+<ellipse fill="#f2f2f2" stroke="#000000" cx="369.8112" cy="-188.6102" rx="71.4873" ry="18"/>
 <text text-anchor="middle" x="369.8112" y="-184.9102" font-family="Times,serif" font-size="14.00" fill="#000000">zconn.Cache.Get</text>
 </g>
 <!-- wcfsInvProcess&#45;&gt;zconnCacheGet -->

--- a/wcfs/wcfs.go
+++ b/wcfs/wcfs.go
@@ -227,15 +227,15 @@ package main
 // non-wcfs wendelin.core do for a ZBigFile. Contrary to non-wcfs wendelin.core
 // it keeps bigfile data in shared cache efficiently. It is organized as follows:
 //
-// 1) 1 ZODB connection for "latest data" for whole filesystem (zconn).
-// 2) head/data of all bigfiles represent state as of zconn.At .
+// 1) 1 ZODB connection for "latest data" for whole filesystem (zhead).
+// 2) head/data of all bigfiles represent state as of zhead.At .
 // 3) for */head/data the following invariant is maintained:
 //
-//	#blk ∈ file cache    =>    ZBlk(#blk) + all BTree/Bucket that lead to it  ∈ zconn cache
+//	#blk ∈ file cache    =>    ZBlk(#blk) + all BTree/Bucket that lead to it  ∈ zhead cache
 //	                           (ZBlk* in ghost state)
 //
 //    The invariant helps on invalidation: if we see a changed oid, and
-//    zconn.cache.lookup(oid) = ø -> we know we don't have to invalidate OS
+//    zhead.cache.lookup(oid) = ø -> we know we don't have to invalidate OS
 //    cache for any part of any file (even if oid relates to a file block - that
 //    block is not cached and will trigger ZODB load on file read).
 //
@@ -244,12 +244,12 @@ package main
 //    so ZBlk* also stay in cache in ghost form). In the future we may want to
 //    try to synchronize to kernel freeing its pagecache pages.
 //
-// 4) when we receive an invalidation message from zstor - we process it and
+// 4) when we receive an invalidation message from ZODB - we process it and
 //    propagate invalidations to OS file cache of */head/data:
 //
 //	invalidation message: (tid↑, []oid)
 //
-//    4.1) zconn.cache.lookup(oid)
+//    4.1) zhead.cache.lookup(oid)
 //    4.2) ø: nothing to do - see invariant ^^^.
 //    4.3) obj found:
 //
@@ -268,7 +268,7 @@ package main
 //	- if retrieved successfully -> store retrieved data back into OS file
 //	  cache for file/@<rev>/data[blk], where
 //
-//	    rev = max(δFtail.by(#blk)) || min(rev ∈ δFtail) || zconn.at	; see below about δFtail
+//	    rev = max(δFtail.by(#blk)) || min(rev ∈ δFtail) || zhead.at	; see below about δFtail
 //
 //	- invalidate file/head/data[blk] in OS file cache.
 //
@@ -289,7 +289,7 @@ package main
 //
 // 6) when we receive a FUSE read(#blk) request to a file/head/data we process it as follows:
 //
-//   6.1) load blkdata for head/data[blk] @zconn.at .
+//   6.1) load blkdata for head/data[blk] @zhead.at .
 //
 //	while loading this also gives upper bound estimate of when the block
 //	was last changed:
@@ -477,7 +477,8 @@ type Root struct {
 	zdb *zodb.DB

 	// ZODB connection for head/
-	zconn *ZConn
+	zheadMu sync.RWMutex // protects access to zhead & live _objects_ associated with it
+	zhead   *ZConn       // zwatcher resyncs zhead; others only read zhead objects.

 	// ZODB connections for @<rev>/
 	zrevMu  sync.Mutex
@@ -513,7 +514,7 @@ type BigFileRev struct {

 // /bigfile/<bigfileX>/(head|<rev>)/*	- internally served by BigFile.
 type BigFile struct {
-	// this BigFile view ZODB via zconn
+	// this BigFile views ZODB via zconn
 	zconn	*ZConn

 	// ZBigFile top-level object. Kept activated during lifetime of current transaction.
@@ -559,40 +560,72 @@ type blkLoadState struct {

 // ----------------------------------------

-/*
-// XXX invalidation watcher
-func (sb *Root) zwatch(ctx context.Context) error {
-	// XXX err ctx
+// zodbCacheControl implements zodb.LiveCacheControl to tune ZODB to never evict
+// LOBTree/LOBucket from live cache. We want to keep LOBTree/LOBucket always alive
+// becuse it is essentially the index where to find ZBigFile data.
+//
+// For the data itself - we put it to kernel pagecache and always deactivate
+// from ZODB right after that.
+//
+// See "3) for */head/data the following invariant is maintained..."
+type zodbCacheControl struct {}
+
+func (cc *zodbCacheControl) WantEvict(obj zodb.IPersistent) bool {
+	switch obj.(type) {
+	default:
+		return true
+
+	case *btree.LOBTree:
+	case *btree.LOBucket:
+
+	// ZBlk* are kept referenced by a LOBucket, so they don't go away from Connection.cache.objtab
+
+	// we also keep ZBigFile alive because we want to make sure .blksize
+	// and (p. ref) .blktab do not change.
+	// XXX do we really need to keep ZBigFile alive for that?
+	//case *ZBigFile:
+	}
+
+	return false
+}
+
+// zwatcher watches for ZODB changes.
+// see "4) when we receive an invalidation message from ZODB ..."
+func (r *Root) zwatcher(ctx context.Context) (err error) {
+	defer xerr.Contextf(&err, "zwatch")	// XXX more in context?
 	// XXX unmount on error? -> always EIO?

-	znotify := sb.zconn.Notifier()
+	zwatch := r.zstor.Watch() // XXX -> create before zwatcher is started

 	for {
-		zevent, err := znotify.Read(ctx)
+		zevent, err := zwatch.Read(ctx)
 		if err != nil {
 			return err
 		}

-		// XXX locking
-		sb.zhandle1(zevent)
+		r.zhandle1(zevent)
 	}
 }

 // zhandle1 handles 1 event from ZODB notification.
-// XXX something is locked
-func (sb *Root) zhandle1(zevent *zodb.NotifyEvent) {
+// (called with .zheadMu wlocked)
+func (r *Root) zhandle1(zevent zodb.WatchEvent) {
+	// XXX locking correct? XXX too coarse? -> lock only around "resync .zhead ..." ?
+	r.zheadMu.Lock()
+	defer r.zheadMu.Unlock()
+
 	toinvalidate = ...	// [] of file/[]#blk

 	// zevent = (tid^, []oid)
-	for _, oid := zevent.Oidv {
-		obj := zconn.Cache().Get(oid)
+	for _, oid := range zevent.Oidv {
+		obj := zhead.Cache().Get(oid)
 		if obj == nil {
 			continue // nothing to do - see invariant
 		}

 		switch obj := obj.(type) {
 		default:
-			continue // object not related to a bigfile
+			continue // object not related to any bigfile

 		case *LOBTree:
 			// XXX -> δBTree
@@ -600,40 +633,56 @@ func (sb *Root) zhandle1(zevent *zodb.NotifyEvent) {
 		case *LOBucket:
 			// XXX -> δBTree

-		case *ZBlkXXXAny:
+		case *ZBlkXXXAny:	// ZBlk0, ZBlk1
 			fileinv := XXX(obj.file)
 			fileinv.blkv += obj.blk		// XXX or better obj.blkv ?

+		case *ZBigFile:
+			// XXX check that .blksize and .blktab (it is only
+			// persistent reference) do not change.
+
+			// XXX shutdown fs with ^^^ message.
 		}
 	}

-	wg = ...
+	//wg = ...
 	for _, fileinv := range toinvalidate {
 		for _, blk := range fileinv.blkv {
-			wg.go fileinv.file.invalidateBlk(blk)
+			go fileinv.file.invalidateBlk(blk)	// XXX -> wg.Go
 	}
+
+	// XXX resync .zhead to zevent.tid
 }

 // invalidateBlk invalidates 1 file block.	XXX
-// XXX see "4. for all file/blk to in invalidate we do"
+// XXX see "4.4) for all file/blk to in invalidate we do"
 func (f *file) invalidateBlk(ctx context.Context, blk int64) error {
 	fsconn := f.root().fsconn
 	off := blk*blksize

 	// try retrieve cache of current head/data[blk]
+	//
+	// if less than blksize was cached - probably the kernel had to evict
+	// some data from its cache already. In such case we don't try to
+	// preserve the rest and drop what was read, to avoid keeping the
+	// system overloaded.
+	//
+	// XXX st != OK -> warn?
 	blkdata, st := fsconn.FileRetrieveCache(f.Inode(), off, blksize)
-	// XXX st != ok, len(blkdata) < blksize
-
-	// store retrieved data back to OS cache for file @<rev>/data[blk]
-	frev = ...	// XXX
-	st = fsconn.FileNotifyStoreCache(frev.Inode(), off, blkdata)
-
-	// XXX st != ok
+	if len(blkdata) == blksize {
+		// XXX -> go
+		// store retrieved data back to OS cache for file @<rev>/data[blk]
+		frev, _ = file.δFtail.LastRevOf(blk, at)
+		st = fsconn.FileNotifyStoreCache(frev.Inode(), off, blkdata)
+		if st != fuse.OK {
+			// XXX log	- dup wrt readBlk -> common func.
+		}
+	}

+	// invalidate file/head/data[blk] in OS file cache.
 	st = fsconn.FileNotify(f.Inode(), off, blksize)
 	// XXX st != ok (fatal here)
 }
-*/

 // ----------------------------------------

@@ -664,7 +713,7 @@ func (bfroot *BigFileRoot) mkdir(name string, fctx *fuse.Context) (_ *nodefs.Ino
 	}

 	// not there - without bfroot lock proceed to open BigFile from ZODB
-	bf, err := bigopen(asctx(fctx), groot.zconn, oid)
+	bf, err := bigopen(asctx(fctx), groot.zhead, oid)
 	if err != nil {
 		return nil, err
 	}
@@ -1064,34 +1113,6 @@ func (bf *BigFile) readAt() []byte {



-// zodbCacheControl implements zodb.LiveCacheControl to tune ZODB to never evict
-// LOBTree/LOBucket from live cache. We want to keep LOBTree/LOBucket always alive
-// becuse it is essentially the index where to find ZBigFile data.
-//
-// For the data itself - we put it to kernel pagecache and always deactivate
-// from ZODB right after that.
-//
-// See "*/head/data invariant" in "wcfs organization" overview.
-//
-// TODO set it to Connection.CacheControl
-type zodbCacheControl struct {}
-
-func (cc *zodbCacheControl) WantEvict(obj zodb.IPersistent) bool {
-	switch obj.(type) {
-	default:
-		return true
-
-	case *btree.LOBTree:
-	case *btree.LOBucket:
-
-	// ZBlk* are kept referenced by a LOBucket, so they don't go away from Connection.objtab
-
-	// XXX + ZBigFile ?
-	}
-
-	return false
-}
-
 // FIXME groot/gfsconn is tmp workaround for lack of way to retrieve FileSystemConnector from nodefs.Inode
 // TODO:
 //	- Inode += .Mount() -> nodefs.Mount
@@ -1131,17 +1152,18 @@ func main() {
 	defer zstor.Close()

 	zdb := zodb.NewDB(zstor)
-	zconn, err := zopen(ctx, zdb, &zodb.ConnOptions{})
+	zhead, err := zopen(ctx, zdb, &zodb.ConnOptions{})
 	if err != nil {
 		log.Fatal(err)
 	}
+	zhead.Cache().SetControl(&zodbCacheControl{})	// XXX +locking?

 	// mount root
 	root := &Root{
 		Node:    nodefs.NewDefaultNode(),
 		zstor:   zstor,
 		zdb:     zdb,
-		zconn:   zconn,
+		zhead:   zhead,
 		zrevTab: make(map[zodb.Tid]*ZConn),
 	}