Commit 8b5d8523 authored by Kirill Smelkov's avatar Kirill Smelkov

X Move tracking of which blocks were accessed from wcfs to ΔFtail

this set has to be protected by a lock. In ΔFtail it can be naturally
trated as "set of blocks that were explicitly requested to be tracked",
and protected by general ΔFtail locking. If it would stay in wcfs - it
would have to be protected by a separate lock and explicitly cared to be
synchronized with ΔFtail.

Change ΔFtail.SliceByFileRev to accept option to indicate whether we are
interested in changed covering only explicitly tracked set. Maybe in the
future we might want to make this the default and remove this option.
parent aaca62ff
...@@ -194,6 +194,8 @@ type _ΔFileTail struct { ...@@ -194,6 +194,8 @@ type _ΔFileTail struct {
vδE []_ΔFileEpoch // epochs (changes to ZBigFile object itself) ; nil if not yet rebuilt vδE []_ΔFileEpoch // epochs (changes to ZBigFile object itself) ; nil if not yet rebuilt
rebuildJob *_RebuildJob // !nil if vδE rebuild is currently in-progress rebuildJob *_RebuildJob // !nil if vδE rebuild is currently in-progress
btrackReqSet setI64 // set of blocks explicitly requested to be tracked in this file
} }
// _ΔFileEpoch represent a change to ZBigFile object. // _ΔFileEpoch represent a change to ZBigFile object.
...@@ -260,7 +262,8 @@ func (δFtail *ΔFtail) Tail() zodb.Tid { return δFtail.δBtail.Tail() } ...@@ -260,7 +262,8 @@ func (δFtail *ΔFtail) Tail() zodb.Tid { return δFtail.δBtail.Tail() }
// One root can be associated with several files (each provided on different Track calls). // One root can be associated with several files (each provided on different Track calls).
// //
// zblk can be nil, which represents a hole. // zblk can be nil, which represents a hole.
// if zblk is nil -> blk is ignored and can be arbitrary. // blk can be < 0, which requests not to establish file[blk] -> zblk
// association. zblk must be nil in this case.
// //
// Objects in path and zblk must be with .PJar().At() == .head // Objects in path and zblk must be with .PJar().At() == .head
func (δFtail *ΔFtail) Track(file *ZBigFile, blk int64, path []btree.LONode, blkcov btree.LKeyRange, zblk ZBlk) { func (δFtail *ΔFtail) Track(file *ZBigFile, blk int64, path []btree.LONode, blkcov btree.LKeyRange, zblk ZBlk) {
...@@ -299,7 +302,11 @@ func (δFtail *ΔFtail) Track(file *ZBigFile, blk int64, path []btree.LONode, bl ...@@ -299,7 +302,11 @@ func (δFtail *ΔFtail) Track(file *ZBigFile, blk int64, path []btree.LONode, bl
δftail, ok := δFtail.byFile[foid] δftail, ok := δFtail.byFile[foid]
if !ok { if !ok {
δftail = &_ΔFileTail{root: root, vδE: nil /*will need to be rebuilt to past till tail*/} δftail = &_ΔFileTail{
root: root,
vδE: nil /*will need to be rebuilt to past till tail*/,
btrackReqSet: setI64{},
}
δFtail.byFile[foid] = δftail δFtail.byFile[foid] = δftail
δFtail.ftrackNew.Add(foid) δFtail.ftrackNew.Add(foid)
} }
...@@ -307,10 +314,16 @@ func (δFtail *ΔFtail) Track(file *ZBigFile, blk int64, path []btree.LONode, bl ...@@ -307,10 +314,16 @@ func (δFtail *ΔFtail) Track(file *ZBigFile, blk int64, path []btree.LONode, bl
// .root can change during epochs, but in between them it must be stable // .root can change during epochs, but in between them it must be stable
panicf("BUG: zfile<%s> root mutated from %s -> %s", foid, δftail.root, root) panicf("BUG: zfile<%s> root mutated from %s -> %s", foid, δftail.root, root)
} }
if blk >= 0 {
δftail.btrackReqSet.Add(blk)
}
// associate zblk with root, if it was not hole // associate zblk with root, if it was not hole
if zblk != nil { if zblk != nil {
if blk < 0 {
panicf("BUG: zfile<%s>: blk=%d, but zblk != nil", foid, blk)
}
zoid := zblk.POid() zoid := zblk.POid()
inroot, ok := δFtail.ztrackInRoot[zoid] inroot, ok := δFtail.ztrackInRoot[zoid]
...@@ -333,15 +346,18 @@ func (δFtail *ΔFtail) Track(file *ZBigFile, blk int64, path []btree.LONode, bl ...@@ -333,15 +346,18 @@ func (δFtail *ΔFtail) Track(file *ZBigFile, blk int64, path []btree.LONode, bl
// //
// It builds vδE for that file if there is such need. // It builds vδE for that file if there is such need.
// The only case when vδE actually needs to be built is when the file just started to be tracked. // The only case when vδE actually needs to be built is when the file just started to be tracked.
func (δFtail *ΔFtail) vδEForFile(foid zodb.Oid) (vδE []_ΔFileEpoch, headRoot zodb.Oid, err error) { //
// It also returns δftail for convenience.
// NOTE access to returned δftail must be protected via δFtail.mu.
func (δFtail *ΔFtail) vδEForFile(foid zodb.Oid) (vδE []_ΔFileEpoch, headRoot zodb.Oid, δftail *_ΔFileTail, err error) {
δFtail.mu.Lock() // TODO verify that there is no in-progress writers δFtail.mu.Lock() // TODO verify that there is no in-progress writers
defer δFtail.mu.Unlock() defer δFtail.mu.Unlock()
δftail := δFtail.byFile[foid] δftail = δFtail.byFile[foid]
root := δftail.root root := δftail.root
vδE = δftail.vδE vδE = δftail.vδE
if vδE != nil { if vδE != nil {
return vδE, root, nil return vδE, root, δftail, nil
} }
// vδE needs to be built // vδE needs to be built
...@@ -355,7 +371,7 @@ func (δFtail *ΔFtail) vδEForFile(foid zodb.Oid) (vδE []_ΔFileEpoch, headRoo ...@@ -355,7 +371,7 @@ func (δFtail *ΔFtail) vδEForFile(foid zodb.Oid) (vδE []_ΔFileEpoch, headRoo
δFtail.mu.Lock() δFtail.mu.Lock()
vδE = δftail.vδE vδE = δftail.vδE
} }
return vδE, root, job.err return vδE, root, δftail, job.err
} }
// we become responsible to build vδE // we become responsible to build vδE
...@@ -379,7 +395,7 @@ func (δFtail *ΔFtail) vδEForFile(foid zodb.Oid) (vδE []_ΔFileEpoch, headRoo ...@@ -379,7 +395,7 @@ func (δFtail *ΔFtail) vδEForFile(foid zodb.Oid) (vδE []_ΔFileEpoch, headRoo
job.err = err job.err = err
close(job.ready) close(job.ready)
return vδE, root, err return vδE, root, δftail, err
} }
// _rebuildAll rebuilds vδE for all files from ftrackNew requests. // _rebuildAll rebuilds vδE for all files from ftrackNew requests.
...@@ -473,6 +489,7 @@ func (δFtail *ΔFtail) Update(δZ *zodb.EventCommit) (_ ΔF, err error) { ...@@ -473,6 +489,7 @@ func (δFtail *ΔFtail) Update(δZ *zodb.EventCommit) (_ ΔF, err error) {
// NOTE no need to clone vδE: we are writer, vδE is never returned to // NOTE no need to clone vδE: we are writer, vδE is never returned to
// outside, append does not invalidate previous vδE retrievals. // outside, append does not invalidate previous vδE retrievals.
δftail.vδE = append(δftail.vδE, δE) δftail.vδE = append(δftail.vδE, δE)
δftail.btrackReqSet = setI64{}
} }
} }
...@@ -674,16 +691,32 @@ type _ZinblkOverlay struct { ...@@ -674,16 +691,32 @@ type _ZinblkOverlay struct {
// //
// Note: contrary to regular go slicing, low is exclusive while high is inclusive. // Note: contrary to regular go slicing, low is exclusive while high is inclusive.
func (δFtail *ΔFtail) SliceByFileRev(zfile *ZBigFile, lo, hi zodb.Tid) (/*readonly*/[]*ΔFile, error) { func (δFtail *ΔFtail) SliceByFileRev(zfile *ZBigFile, lo, hi zodb.Tid) (/*readonly*/[]*ΔFile, error) {
return δFtail.SliceByFileRevEx(zfile, lo, hi, QueryOptions{})
}
// SliceByFileRevEx is extended version of SliceByFileRev with options.
func (δFtail *ΔFtail) SliceByFileRevEx(zfile *ZBigFile, lo, hi zodb.Tid, opt QueryOptions) (/*readonly*/[]*ΔFile, error) {
foid := zfile.POid() foid := zfile.POid()
//fmt.Printf("\nslice f<%s> (@%s,@%s]\n", foid, lo, hi) //fmt.Printf("\nslice f<%s> (@%s,@%s]\n", foid, lo, hi)
vδf, err := δFtail._SliceByFileRev(foid, lo, hi) vδf, err := δFtail._SliceByFileRev(foid, lo, hi, opt)
if err != nil { if err != nil {
err = fmt.Errorf("slice f<%s> (@%s,@%s]: %e", foid, lo, hi, err) err = fmt.Errorf("slice f<%s> (@%s,@%s]: %e", foid, lo, hi, err)
} }
return vδf, err return vδf, err
} }
func (δFtail *ΔFtail) _SliceByFileRev(foid zodb.Oid, lo, hi zodb.Tid) (/*readonly*/[]*ΔFile, error) { // QueryOptions represents options for SliceBy* queries.
type QueryOptions struct {
// OnlyExplicitlyTracked requests that only blocks, that were
// explicitly tracked, are included into result.
//
// By default SliceBy* return information about both blocks that
// were explicitly tracked, and blocks that became tracked due to being
// adjacent to a tracked block in BTree bucket.
OnlyExplicitlyTracked bool
}
func (δFtail *ΔFtail) _SliceByFileRev(foid zodb.Oid, lo, hi zodb.Tid, opt QueryOptions) (/*readonly*/[]*ΔFile, error) {
xtail.AssertSlice(δFtail, lo, hi) xtail.AssertSlice(δFtail, lo, hi)
// query .δBtail.SliceByRootRev(file.blktab, lo, hi) + // query .δBtail.SliceByRootRev(file.blktab, lo, hi) +
...@@ -703,7 +736,7 @@ func (δFtail *ΔFtail) _SliceByFileRev(foid zodb.Oid, lo, hi zodb.Tid) (/*reado ...@@ -703,7 +736,7 @@ func (δFtail *ΔFtail) _SliceByFileRev(foid zodb.Oid, lo, hi zodb.Tid) (/*reado
// δFile ────────o───────o──────x─────x──────────────────────── // δFile ────────o───────o──────x─────x────────────────────────
vδE, headRoot, err := δFtail.vδEForFile(foid) vδE, headRoot, δftail, err := δFtail.vδEForFile(foid)
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -926,6 +959,40 @@ func (δFtail *ΔFtail) _SliceByFileRev(foid zodb.Oid, lo, hi zodb.Tid) (/*reado ...@@ -926,6 +959,40 @@ func (δFtail *ΔFtail) _SliceByFileRev(foid zodb.Oid, lo, hi zodb.Tid) (/*reado
vδf[i], vδf[j] = vδf[j], vδf[i] vδf[i], vδf[j] = vδf[j], vδf[i]
} }
// take opt.OnlyExplicitlyTracked into account
// XXX epochs not handled (currently ok as epochs are rejected by wcfs)
if opt.OnlyExplicitlyTracked {
δblk := setI64{}
for _, δf := range vδf {
δblk.Update(δf.Blocks)
}
δFtail.mu.Lock()
for blk := range δblk {
if !δftail.btrackReqSet.Has(blk) {
δblk.Del(blk)
}
}
δFtail.mu.Unlock()
for i := len(vδf)-1; i >= 0; i-- {
δf := vδf[i]
if δf.Epoch {
continue
}
for blk := range δf.Blocks {
if !δblk.Has(blk) {
δf.Blocks.Del(blk)
}
}
if len(δf.Blocks) == 0 {
// delete @i
copy(vδf[i:], vδf[i+1:])
vδf = vδf[:len(vδf)-1]
}
}
}
return vδf, nil return vδf, nil
} }
...@@ -1017,7 +1084,7 @@ func (δFtail *ΔFtail) BlkRevAt(ctx context.Context, zfile *ZBigFile, blk int64 ...@@ -1017,7 +1084,7 @@ func (δFtail *ΔFtail) BlkRevAt(ctx context.Context, zfile *ZBigFile, blk int64
panicf("zconn.at out of bounds: zconn.at: @%s, (tail, head] = (@%s, @%s]", zconnAt, tail, head) panicf("zconn.at out of bounds: zconn.at: @%s, (tail, head] = (@%s, @%s]", zconnAt, tail, head)
} }
vδE, headRoot, err := δFtail.vδEForFile(foid) vδE, headRoot, _, err := δFtail.vδEForFile(foid)
if err != nil { if err != nil {
return zodb.InvalidTid, false, err return zodb.InvalidTid, false, err
} }
......
...@@ -609,6 +609,8 @@ func testΔFtail(t_ *testing.T, testq chan ΔFTestEntry) { ...@@ -609,6 +609,8 @@ func testΔFtail(t_ *testing.T, testq chan ΔFTestEntry) {
// SliceByFileRev returns all changes to that untracked block. In other words // SliceByFileRev returns all changes to that untracked block. In other words
// we verify that no change to untracked block is missed, if any change to that // we verify that no change to untracked block is missed, if any change to that
// block is ever present in returned slice. // block is ever present in returned slice.
//
// This test also verifies handling of OnlyExplicitlyTracked query option.
func TestΔFtailSliceUntrackedUniform(t_ *testing.T) { func TestΔFtailSliceUntrackedUniform(t_ *testing.T) {
t := newT(t_) t := newT(t_)
X := exc.Raiseif X := exc.Raiseif
...@@ -651,37 +653,48 @@ func TestΔFtailSliceUntrackedUniform(t_ *testing.T) { ...@@ -651,37 +653,48 @@ func TestΔFtailSliceUntrackedUniform(t_ *testing.T) {
// blktab[2] remains unnoticed because it is not changed past at1. // blktab[2] remains unnoticed because it is not changed past at1.
xtrackBlk(0) xtrackBlk(0)
// (at1, at4] -> changes to both 0 and 1, because they both are changed in the same bucket @at2 // assertSliceByFileRev verifies result of SliceByFileRev and SliceByFileRevEx(OnlyExplicitlyTracked=y).
lo := t1.At assertSliceByFileRev := func(lo, hi zodb.Tid, vδf_ok, vδfT_ok []*ΔFile) {
hi := t4.At t.Helper()
vδf, err := δFtail.SliceByFileRev(zfile, lo, hi); X(err)
vδf_ok := []*ΔFile{ Tonly := QueryOptions{OnlyExplicitlyTracked: true}
&ΔFile{Rev: t2.At, Blocks: b(0,1), Size: true}, vδf, err := δFtail.SliceByFileRev (zfile, lo, hi); X(err)
&ΔFile{Rev: t3.At, Blocks: b(0,1), Size: false}, vδfT, err := δFtail.SliceByFileRevEx(zfile, lo, hi, Tonly); X(err)
&ΔFile{Rev: t4.At, Blocks: b( 1), Size: false},
} if !reflect.DeepEqual(vδf, vδf_ok) {
if !reflect.DeepEqual(vδf, vδf_ok) { t.Errorf("slice (@%s,@%s]:\nhave: %v\nwant: %v", t.AtSymb(lo), t.AtSymb(hi), t.vδfstr(vδf), t.vδfstr(vδf_ok))
t.Errorf("slice (@%s,@%s]:\nhave: %v\nwant: %v", t.AtSymb(lo), t.AtSymb(hi), t.vδfstr(vδf), t.vδfstr(vδf_ok)) }
if !reflect.DeepEqual(vδfT, vδfT_ok) {
t.Errorf("sliceT (@%s,@%s]:\nhave: %v\nwant: %v", t.AtSymb(lo), t.AtSymb(hi), t.vδfstr(vδfT), t.vδfstr(vδfT_ok))
}
} }
// (at1, at4] -> changes to both 0 and 1, because they both are changed in the same bucket @at2
assertSliceByFileRev(t1.At, t4.At,
/*vδf*/ []*ΔFile{
&ΔFile{Rev: t2.At, Blocks: b(0,1), Size: true},
&ΔFile{Rev: t3.At, Blocks: b(0,1), Size: false},
&ΔFile{Rev: t4.At, Blocks: b( 1), Size: false},
},
/*vδfT*/ []*ΔFile{
&ΔFile{Rev: t2.At, Blocks: b(0 ), Size: true},
&ΔFile{Rev: t3.At, Blocks: b(0 ), Size: false},
// no change @at4
})
// (at2, at4] -> changes to only 0, because there is no change to 2 via blktab // (at2, at4] -> changes to only 0, because there is no change to 2 via blktab
lo = t2.At assertSliceByFileRev(t2.At, t4.At,
vδf, err = δFtail.SliceByFileRev(zfile, lo, hi); X(err) /*vδf*/ []*ΔFile{
vδf_ok = []*ΔFile{ &ΔFile{Rev: t3.At, Blocks: b(0), Size: false},
&ΔFile{Rev: t3.At, Blocks: b(0), Size: false}, },
} /*vδfT*/ []*ΔFile{
if !reflect.DeepEqual(vδf, vδf_ok) { &ΔFile{Rev: t3.At, Blocks: b(0), Size: false},
t.Errorf("slice (@%s,@%s]:\nhave: %v\nwant: %v", t.AtSymb(lo), t.AtSymb(hi), t.vδfstr(vδf), t.vδfstr(vδf_ok)) })
}
// (at3, at4] -> changes to only 0, ----/---- // (at3, at4] -> changes to only 0, ----/----
lo = t3.At assertSliceByFileRev(t3.At, t4.At,
vδf, err = δFtail.SliceByFileRev(zfile, lo, hi); X(err) /*vδf*/ []*ΔFile(nil),
vδf_ok = []*ΔFile(nil) /*vδfT*/ []*ΔFile(nil))
if !reflect.DeepEqual(vδf, vδf_ok) {
t.Errorf("slice (@%s,@%s]:\nhave: %v\nwant: %v", t.AtSymb(lo), t.AtSymb(hi), t.vδfstr(vδf), t.vδfstr(vδf_ok))
}
} }
......
...@@ -627,11 +627,6 @@ type BigFile struct { ...@@ -627,11 +627,6 @@ type BigFile struct {
// while populating δFtail lazily // while populating δFtail lazily
// XXX or then it is not "constant during lifetime of current txn" // XXX or then it is not "constant during lifetime of current txn"
// blocks that were ever read-accessed (head/ only) XXX locking by bfdir.δFmu ?
// XXX = δFtail.Tracked(f) ?
// XXX goes away if δFtail query returns only tracked blocks (XXX that's not the case)
accessed setI64
// inflight loadings of ZBigFile from ZODB. // inflight loadings of ZBigFile from ZODB.
// successful load results are kept here until blkdata is put into OS pagecache. // successful load results are kept here until blkdata is put into OS pagecache.
// //
...@@ -1536,7 +1531,6 @@ func (f *BigFile) readPinWatchers(ctx context.Context, blk int64, treepath []btr ...@@ -1536,7 +1531,6 @@ func (f *BigFile) readPinWatchers(ctx context.Context, blk int64, treepath []btr
δFtail := bfdir.δFtail δFtail := bfdir.δFtail
// bfdir.δFmu.Lock() // XXX locking correct? XXX -> better push down? // bfdir.δFmu.Lock() // XXX locking correct? XXX -> better push down?
δFtail.Track(f.zfile, blk, treepath, blkcov, zblk) // XXX pass in zblk.rev here? δFtail.Track(f.zfile, blk, treepath, blkcov, zblk) // XXX pass in zblk.rev here?
f.accessed.Add(blk)
// bfdir.δFmu.Unlock() // bfdir.δFmu.Unlock()
// make sure that file[blk] on clients side stays as of @w.at state. // make sure that file[blk] on clients side stays as of @w.at state.
...@@ -1739,7 +1733,16 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T ...@@ -1739,7 +1733,16 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T
toPin := map[int64]zodb.Tid{} // blk -> @rev toPin := map[int64]zodb.Tid{} // blk -> @rev
δFtail := bfdir.δFtail δFtail := bfdir.δFtail
vδf, err := δFtail.SliceByFileRev(f.zfile, at, headAt) // XXX locking δFtail vδf, err := δFtail.SliceByFileRevEx(f.zfile, at, headAt, zdata.QueryOptions{
// blk might be in δFtail because it is adjacent in
// ZBigFile.blktab to another blk that was explicitly tracked.
// We do not want to get those to avoid unnecessarily pinning
// potentially more blocks than needed.
//
// wcfs tests also verify that only blocks that were previously
// explicitly accessed are included into watch setup pins.
OnlyExplicitlyTracked: true,
}) // XXX locking δFtail
if err != nil { if err != nil {
panic(err) // XXX panic(err) // XXX
} }
...@@ -1764,20 +1767,6 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T ...@@ -1764,20 +1767,6 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T
if already { if already {
continue continue
} }
// blk might be in δFtail because it is adjacent in
// ZBigFile.blktab to another blk that was explicitly
// tracked. However wcfs tests expect that only blocks
// that were previously explicitly accessed are
// included into watch setup pins.
//
// XXX adjust wcfs tests to not require only accessed
// blocks to be in setup pins? But that would mean that
// potentially more blocks would be potentially
// _unnecessarily_ pinned if they are not going to be
// accessed at all.
if !f.accessed.Has(blk) {
continue
}
toPin[blk], _, err = δFtail.BlkRevAt(ctx, f.zfile, blk, at) toPin[blk], _, err = δFtail.BlkRevAt(ctx, f.zfile, blk, at)
if err != nil { if err != nil {
...@@ -2269,10 +2258,6 @@ func (head *Head) bigopen(ctx context.Context, oid zodb.Oid) (_ *BigFile, err er ...@@ -2269,10 +2258,6 @@ func (head *Head) bigopen(ctx context.Context, oid zodb.Oid) (_ *BigFile, err er
head.bfdir.δFtail.Track(f.zfile, -1, sizePath, blkCov, nil) head.bfdir.δFtail.Track(f.zfile, -1, sizePath, blkCov, nil)
// head.bfdir.δFmu.Unlock() // head.bfdir.δFmu.Unlock()
// FIXME: scan zfile.blktab - so that we can detect all btree changes
// see "XXX building δFtail lazily ..." in notes.txt
f.accessed = make(setI64)
f.watchTab = make(map[*Watch]struct{}) f.watchTab = make(map[*Watch]struct{})
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment