Commit 22f5f096 authored by Kirill Smelkov's avatar Kirill Smelkov

X Rework ΔFtail so that BlkRevAt works with ZBigFile checkout from any at ∈ (tail, head]

This is necessarry because previously both ΔFtail.BlkRevAt and
ΔBtail.GetAt required zfile/root checkouts from @head. However during
update process in wcfs it is ΔFtail that is updated first, and only
after some invalidations zconn is resynced to new @head. And if we
move zconn.resync to happen before ΔFtail.Update, that would break
the invariant that data in OS cache correspond to zconn.At.

The rework triggered and clarified logic of what ΔBtail.GetAt
responsibility and functionality should be. Now it computes query result
only based on ΔBtail data, and returns valueExact=false or
revExact=false, if those data is insufficient. Previously it was further
trying to load data from the database which is conflicting with

	// ΔBtail semantically consists of
	//
	//	[]δB			; rev ∈ (tail, head]

Now that database access moved to GetAt user - to ΔFtail.BlkRevAt.

WCFS tests - that were broken after ΔFtail rework - now finally pass again.

* t2+qoldhead:
  .
  .
  .
  .
  .
  .
  .
  .
parents 03de8208 95c1dac0
......@@ -43,18 +43,12 @@ const debugΔBtail = false
// It semantically consists of
//
// []δB ; rev ∈ (tail, head]
// atTail XXX no need (see vvv)
//
// where δB represents a change in BTrees space
//
// δB:
// .rev↑
// {} root -> {}(key, δvalue) XXX was value
//
// and atTail keeps set of k/v @tail for keys changed in (tail, head]
//
// atTail: XXX no need for atTail as we have δvalue.Old
// {} root -> {}(key, value)
// {} root -> {}(key, δvalue)
//
// It covers only changes to keys from tracked subset of BTrees parts.
// In particular a key that was not explicitly requested to be tracked, even if
......@@ -62,7 +56,7 @@ const debugΔBtail = false
//
// ΔBtail provides the following operations:
//
// .Track(path) - start tracking tree nodes and keys; root=path[0], keys=path[-1].keys XXX keys not correct - e.g. track missing key
// .Track(path) - start tracking tree nodes and keys; root=path[0], keys=path[-1].(lo,hi]
//
// .Update(δZ) -> δB - update BTree δ tail given raw ZODB changes
// .ForgetPast(revCut) - forget changes past revCut
......@@ -80,7 +74,7 @@ const debugΔBtail = false
//
// XXX incremental; not full coverage
//
// ΔBtail is not safe for concurrent access.
// ΔBtail is not safe for concurrent access. XXX rework
// XXX -> multiple readers / single writer?
//
// See also zodb.ΔTail
......@@ -766,44 +760,61 @@ func (δTtail *ΔTtail) forgetPast(revCut zodb.Tid) {
}
// Get returns root[key] as of @at database state plus revision that changed it.
// GetAt tries to retrieve root[key]@at from δBtail data.
//
// If δBtail has δB entry that covers root[key]@at, corresponding value
// (VDEL means deletion) and valueExact=true are returned. If δBtail data
// allows to determine revision of root[key]@at value, corresponding revision
// and revExact=true are returned. If revision of root[key]@at cannot be
// determined (rev=δBtail.Tail, revExact=false) are returned.
//
// If δBtail has no δB entry that covers root[key]@at, return is
//
// (value=VDEL, valueExact=false, rev=δBtail.Tail, revExact=false)
//
// .rev and exact=true are returned:
//
// (δB[root/key].δvalue.New, δB.rev, exact=true)
//
// if revExact=False - rev is upper estimate for the revision.
// If δBtail has no δB entry for root[key] with .rev ≤ @at, return is
//
// (VDEL, δBtail.Tail, exact=false)
//
// key must be tracked
// at must ∈ (tail, head]
//
// XXX root -> Oid ?
func (δBtail *ΔBtail) GetAt(ctx context.Context, root *Tree, key Key, at zodb.Tid) (value Value, ok bool, rev zodb.Tid, revExact bool, err error) {
defer xerr.Contextf(&err, "δBtail: root<%s>: get %d @%s", root.POid(), key, at)
func (δBtail *ΔBtail) GetAt(root zodb.Oid, key Key, at zodb.Tid) (value Value, rev zodb.Tid, valueExact, revExact bool, err error) {
defer xerr.Contextf(&err, "δBtail: root<%s>: get %d @%s", root, key, at)
// XXX key not tracked -> panic
// XXX at not ∈ (tail, head] -> panic
tail := δBtail.Tail()
head := δBtail.Head()
if !(tail < at && at <= head) {
panicf("at out of bounds: at: @%s, (tail, head] = (@%s, @%s]", at, tail, head)
}
// XXX locking
rootAt := root.PJar().At()
if rootAt != δBtail.Head() {
panicf("δBtail: root.at (@%s) != head (@%s)", rootAt, δBtail.Head())
}
value = VDEL
valueExact = false
rev = tail
revExact = false
err = δBtail.rebuild1IfNeeded(root.POid())
err = δBtail.rebuild1IfNeeded(root)
if err != nil {
return
return value, rev, valueExact, revExact, err
}
δTtail := δBtail.vδTbyRoot[root.POid()]
δTtail := δBtail.vδTbyRoot[root]
if δTtail == nil {
panicf("δBtail: root<%s> not tracked", root.POid())
panicf("δBtail: root<%s> not tracked", root)
}
// XXX -> index lastXXXOf(key) | linear scan ↓ looking for change <= at
rev = δBtail.Tail()
revExact = false
for i := len(δTtail.vδT)-1; i >= 0; i-- {
δT := δTtail.vδT[i]
δvalue, ok_ := δT.ΔKV[key]
if ok_ {
ok = true
valueExact = true
if δT.Rev > at {
value = δvalue.Old
} else {
......@@ -815,33 +826,7 @@ func (δBtail *ΔBtail) GetAt(ctx context.Context, root *Tree, key Key, at zodb.
}
}
// key was found in δT ∈ δTtail
if ok {
if value == VDEL {
ok = false
}
return
}
// key not in history tail at all.
// use @head[key]
// @tail[key] is not present - key was not changing in (tail, head].
// since at ∈ (tail, head] we can use @head[key] as the result
xvalue, ok, err := root.Get(ctx, key)
if !ok {
value = VDEL
}
if err != nil || !ok {
return
}
value, err = vOid(xvalue)
if err != nil {
ok = false
return
}
return
return value, rev, valueExact, revExact, nil
}
// XXX don't need
......
......@@ -390,9 +390,9 @@ func (bf *zBigFileState) PySetState(pystate interface{}) (err error) {
return fmt.Errorf("blksize: must be > 0; got %d", blksize)
}
blktab, ok := t[1].(*btree.LOBTree)
if !ok {
return fmt.Errorf("blktab: expect LOBTree; got %s", xzodb.TypeOf(t[1]))
blktab, err := vBlktab(t[1])
if err != nil {
return err
}
bf.blksize = blksize
......@@ -437,9 +437,9 @@ func (bf *ZBigFile) LoadBlk(ctx context.Context, blk int64) (_ []byte, treePath
return make([]byte, bf.blksize), treePath, nil, blkRevMax, nil
}
zblk, ok = xzblk.(ZBlk)
if !ok {
return nil, nil, nil, 0, fmt.Errorf("expect ZBlk*; got %s", xzodb.TypeOf(xzblk))
zblk, err = vZBlk(xzblk)
if err != nil {
return nil, nil, nil, 0, err
}
blkdata, zblkrev, err := zblk.LoadBlkData(ctx)
......@@ -493,6 +493,23 @@ func (bf *ZBigFile) Size(ctx context.Context) (_ int64, treePath []btree.LONode,
return size, treePath, nil
}
// vZBlk checks and converts xzblk to a ZBlk object.
func vZBlk(xzblk interface{}) (ZBlk, error) {
zblk, ok := xzblk.(ZBlk)
if !ok {
return nil, fmt.Errorf("expect ZBlk*; got %s", xzodb.TypeOf(xzblk))
}
return zblk, nil
}
// vBlktab checks and converts xblktab to LOBTree object.
func vBlktab(xblktab interface{}) (*btree.LOBTree, error) {
blktab, ok := xblktab.(*btree.LOBTree)
if !ok {
return nil, fmt.Errorf("blktab: expect LOBTree; got %s", xzodb.TypeOf(xblktab))
}
return blktab, nil
}
// ----------------------------------------
......
......@@ -67,7 +67,7 @@ type setOid = set.Oid
// .ForgetPast(revCut) - forget changes past revCut
// .SliceByRev(lo, hi) -> []δF - query for all files changes with rev ∈ (lo, hi]
// .SliceByFileRev(file, lo, hi) -> []δfile - query for changes of file with rev ∈ (lo, hi]
// .LastBlkRev(file, #blk, at) - query for what is last revision that changed
// .BlkRevAt(file, #blk, at) - query for what is last revision that changed
// file[#blk] as of @at database state.
//
// where δfile represents a change to one file
......@@ -708,27 +708,41 @@ func (δFtail *ΔFtail) SliceByFileRev(zfile *ZBigFile, lo, hi zodb.Tid) /*reado
return vδf
}
// XXX rename -> BlkRevAt
// LastBlkRev returns last revision that changed file[blk] as of @at database state.
// BlkRevAt returns last revision that changed file[blk] as of @at database state.
//
// if exact=False - what is returned is only an upper bound for last block revision.
//
// zf must be from @head
// zf must be any checkout from (tail, head]
// at must ∈ (tail, head]
// blk must be tracked
//
// XXX +ctx, error rebuild []δF here
func (δFtail *ΔFtail) LastBlkRev(ctx context.Context, zf *ZBigFile, blk int64, at zodb.Tid) (_ zodb.Tid, exact bool) {
rev, exact, err := δFtail._LastBlkRev(ctx, zf, blk, at)
func (δFtail *ΔFtail) BlkRevAt(ctx context.Context, zf *ZBigFile, blk int64, at zodb.Tid) (_ zodb.Tid, exact bool) {
rev, exact, err := δFtail._BlkRevAt(ctx, zf, blk, at)
if err != nil {
panic(err) // XXX
}
return rev, exact
}
func (δFtail *ΔFtail) _LastBlkRev(ctx context.Context, zf *ZBigFile, blk int64, at zodb.Tid) (_ zodb.Tid, exact bool, err error) {
func (δFtail *ΔFtail) _BlkRevAt(ctx context.Context, zf *ZBigFile, blk int64, at zodb.Tid) (_ zodb.Tid, exact bool, err error) {
defer xerr.Contextf(&err, "blkrev f<%s> #%d @%s", zf.POid(), blk, at)
//fmt.Printf("\nblkrev #%d @%s\n", blk, at)
// assert at ∈ (tail, head]
tail := δFtail.Tail()
head := δFtail.Head()
if !(tail < at && at <= head) {
panicf("at out of bounds: at: @%s, (tail, head] = (@%s, @%s]", at, tail, head)
}
// assert zf.at ∈ (tail, head]
zconn := zf.PJar()
zconnAt := zconn.At()
if !(tail < zconnAt && zconnAt <= head) {
panicf("zconn.at out of bounds: zconn.at: @%s, (tail, head] = (@%s, @%s]", zconnAt, tail, head)
}
// XXX locking
δftail, err := δFtail.rebuildIfNeeded(zf.POid())
......@@ -748,21 +762,8 @@ func (δFtail *ΔFtail) _LastBlkRev(ctx context.Context, zf *ZBigFile, blk int64
// root
var root zodb.Oid
var rootObj *btree.LOBTree
if i == l {
err := zf.PActivate(ctx)
if err != nil {
// file deleted
if xzodb.IsErrNoData(err) {
root = xbtree.VDEL
} else {
return zodb.InvalidTid, false, err
}
} else {
rootObj = zf.blktab
root = rootObj.POid()
zf.PDeactivate()
}
root = δftail.root
} else {
root = vδE[i].oldRoot
}
......@@ -779,47 +780,58 @@ func (δFtail *ΔFtail) _LastBlkRev(ctx context.Context, zf *ZBigFile, blk int64
//fmt.Printf(" epoch: @%s root: %s\n", epoch, root)
// get to rootObj (NOTE @head, because it is ΔBtail.GetAt requirement)
if rootObj == nil && root != xbtree.VDEL {
zconn := zf.PJar()
xrootObj, err := zconn.Get(ctx, root)
if err != nil {
return zodb.InvalidTid, false, err
}
var ok bool
rootObj, ok = xrootObj.(*btree.LOBTree)
if !ok {
return zodb.InvalidTid, false, fmt.Errorf("blktab: expect LOBTree; got %s", xzodb.TypeOf(xrootObj))
}
if root == xbtree.VDEL {
return epoch, true, nil
}
// XXX take epochs into account
zblk, tabRev, zblkExact, tabRevExact, err := δFtail.δBtail.GetAt(root, blk, at)
var zblkOid zodb.Oid
var tabRev zodb.Tid
var tabRevExact, ok bool
if rootObj != nil {
zblkOid, ok, tabRev, tabRevExact, err = δFtail.δBtail.GetAt(ctx, rootObj, blk, at)
//fmt.Printf(" GetAt #%d @%s -> %s, %v, @%s, %v\n", blk, at, zblkOid, ok, tabRev, tabRevExact)
//fmt.Printf(" GetAt #%d @%s -> %s(%v), @%s(%v)\n", blk, at, zblk, zblkExact, tabRev, tabRevExact)
if err != nil {
return zodb.InvalidTid, false, err
}
}
if tabRev < epoch {
tabRev = epoch
tabRevExact = true
}
// block was removed
// if δBtail does not have entry that covers root[blk] - get it
// through zconn that has any .at ∈ (tail, head].
if !zblkExact {
xblktab, err := zconn.Get(ctx, root)
if err != nil {
return zodb.InvalidTid, false, err
}
blktab, err := vBlktab(xblktab)
if err != nil {
return zodb.InvalidTid, false, err
}
xzblkObj, ok, err := blktab.Get(ctx, blk)
if err != nil {
return zodb.InvalidTid, false, err
}
if !ok {
zblk = xbtree.VDEL
} else {
zblkObj, err := vZBlk(xzblkObj)
if err != nil {
return zodb.InvalidTid, false, fmt.Errorf("blktab<%s>[#%d]: %s", root, blk, err)
}
zblk = zblkObj.POid()
}
}
// block was removed
if zblk == xbtree.VDEL {
return tabRev, tabRevExact, nil
}
// blktab[blk] was changed to point to a zblk @rev.
// blktab[blk] was changed to point to a zblk @tabRev.
// blk revision is max rev and when zblk changed last in (rev, at] range.
zblkRev, zblkRevExact := δFtail.δBtail.ΔZtail().LastRevOf(zblkOid, at)
//fmt.Printf(" ZRevOf %s @%s -> @%s, %v\n", zblkOid, at, zblkRev, zblkRevExact)
zblkRev, zblkRevExact := δFtail.δBtail.ΔZtail().LastRevOf(zblk, at)
//fmt.Printf(" ZRevOf %s @%s -> @%s, %v\n", zblk, at, zblkRev, zblkRevExact)
if zblkRev > tabRev {
return zblkRev, zblkRevExact, nil
} else {
......
......@@ -543,7 +543,7 @@ func testΔFtail(t_ *testing.T, testq chan ΔFTestEntry) {
}
// LastBlkRev
// BlkRevAt
blkv := []int64{} // all blocks
if l := len(vδf); l > 0 {
......@@ -560,7 +560,7 @@ func testΔFtail(t_ *testing.T, testq chan ΔFTestEntry) {
at := vδf[j].Rev
blkRev := blkRevAt[at]
for _, blk := range blkv {
rev, exact := δFtail.LastBlkRev(ctx, zfile, blk, at)
rev, exact := δFtail.BlkRevAt(ctx, zfile, blk, at)
revOK, ok := blkRev[blk]
if !ok {
k := len(epochv) - 1
......
......@@ -368,7 +368,7 @@ package main
// rev(blk) ≤ rev'(blk) rev'(blk) = min(^^^)
//
//
// XXX we delay recomputing δFtail.LastBlkRev(file, #blk, head) because
// XXX we delay recomputing δFtail.BlkRevAt(file, #blk, head) because
// using just cheap revmax estimate can frequently result in all watches
// being skipped.
//
......@@ -961,6 +961,7 @@ retry:
file := bfdir.fileTab[foid] // must be present
zfile := file.zfile
// XXX need to do only if δfile.Size changed
size, sizePath, err := zfile.Size(ctx)
if err != nil {
return err
......@@ -1086,7 +1087,7 @@ func (f *BigFile) invalidateBlk(ctx context.Context, blk int64) (err error) {
func() {
// store retrieved data back to OS cache for file @<rev>/file[blk]
δFtail := f.head.bfdir.δFtail
blkrev, _ := δFtail.LastBlkRev(ctx, f.zfile, blk, f.head.zconn.At())
blkrev, _ := δFtail.BlkRevAt(ctx, f.zfile, blk, f.head.zconn.At())
frev, funlock, err := groot.lockRevFile(blkrev, f.zfile.POid())
if err != nil {
log.Errorf("BUG: %s: invalidate blk #%d: %s (ignoring, but reading @revX/bigfile will be slow)", f.path(), blk, err)
......@@ -1564,7 +1565,7 @@ func (f *BigFile) readPinWatchers(ctx context.Context, blk int64, treepath []btr
// we'll relock atMu again and recheck blkrev vs w.at after.
w.atMu.RUnlock()
blkrev, _ = δFtail.LastBlkRev(ctx, f.zfile, blk, f.head.zconn.At())
blkrev, _ = δFtail.BlkRevAt(ctx, f.zfile, blk, f.head.zconn.At())
blkrevRough = false
w.atMu.RLock()
......@@ -1580,7 +1581,7 @@ func (f *BigFile) readPinWatchers(ctx context.Context, blk int64, treepath []btr
// and most of them would be on different w.at - cache of the file will
// be lost. Via pinning to particular block revision, we make sure the
// revision to pin is the same on all clients, and so file cache is shared.
pinrev, _ := δFtail.LastBlkRev(ctx, w.file.zfile, blk, w.at) // XXX move into go?
pinrev, _ := δFtail.BlkRevAt(ctx, w.file.zfile, blk, w.at) // XXX move into go?
// XXX ^^^ w.file vs f ?
//fmt.Printf("S: read #%d: watch @%s: pin -> @%s\n", blk, w.at, pinrev)
......@@ -1762,7 +1763,7 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T
continue
}
toPin[blk], _ = δFtail.LastBlkRev(ctx, f.zfile, blk, at) // XXX err
toPin[blk], _ = δFtail.BlkRevAt(ctx, f.zfile, blk, at) // XXX err
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment