Commit 44a2e6db authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 2f2ffdd5
......@@ -18,9 +18,25 @@
// See https://www.nexedi.com/licensing for rationale and options.
package xbtree
// diff for BTrees
// XXX doc
// treediff provides diff for BTrees
//
// Use δZConnectTracked + treediff to compute BTree-diff caused by δZ:
//
// δZConnectTracked(δZ, trackSet) -> δZTC, δtopsByRoot
// treediff(root, δtops, δZTC, trackSet, zconn{Old,New}) -> δT, δtrack, δtkeycov
//
// δZConnectTracked computes BTree-connected closure of δZ modulo tracked set
// and also returns δtopsByRoot to indicate which tree objects were changed and
// in which subtree parts. With that information one can call treediff for each
// changed root to compute BTree-diff and δ for trackSet itself.
//
//
// BTree diff algorithm
//
// The central part of BTree-diff algorithm is in diffT and diffB.
//
//
// FIXME the algorithm is different: recursion is implemented by expanding rangeSplit step by step.
//
// δ(BTree) notes
......@@ -168,164 +184,8 @@ func δZConnectTracked(δZv []zodb.Oid, T blib.PPTreeSubSet) (δZTC setOid, δto
}
// XXX place
// nodeInRange represents a Node coming under [lo, hi_] key range in its tree.
type nodeInRange struct {
prefix []zodb.Oid // path to this node goes via this objects
keycov blib.KeyRange // key coverage
node Node
done bool // whether this node was already taken into account while computing diff
}
// XXX place, doc
func (n *nodeInRange) Path() []zodb.Oid {
// return full copy - else .prefix can become aliased in between children of a node
return append([]zodb.Oid{}, append(n.prefix, n.node.POid())...)
}
// rangeSplit represents set of nodes covering a range.
// nodes come with key↑ and no intersection in between their [lo,hi)
type rangeSplit []*nodeInRange // key↑
// Get returns node covering key k.
// Get panics if k is not covered.
func (rs rangeSplit) Get(k Key) *nodeInRange {
rnode, ok := rs.Get_(k)
if !ok {
panicf("key %v not covered; coverage: %s", k, rs)
}
return rnode
}
// Get_ returns node covering key k.
func (rs rangeSplit) Get_(k Key) (rnode *nodeInRange, ok bool) {
i := sort.Search(len(rs), func(i int) bool {
return k <= rs[i].keycov.Hi_
})
if i == len(rs) {
return nil, false // key not covered
}
rn := rs[i]
if !rn.keycov.Has(k) {
panicf("BUG: get(%v) -> %s; coverage: %s", k, rn, rs)
}
return rn, true
}
// Expand replaces rnode with its children.
//
// rnode must be initially in *prs.
// rnode.node must be tree.
// rnode.node must be already activated.
//
// inserted children are returned for convenience.
func (prs *rangeSplit) Expand(rnode *nodeInRange) (children rangeSplit) {
rs := *prs
i := sort.Search(len(rs), func(i int) bool {
return rnode.keycov.Hi_ <= rs[i].keycov.Hi_
})
if i == len(rs) || rs[i] != rnode {
panicf("%s not in rangeSplit; coverage: %s", rnode, rs)
}
// [i].Key ≤ [i].Child.*.Key < [i+1].Key i ∈ [0, len([]))
//
// [0].Key = -∞ ; always returned so
// [len(ev)].Key = +∞ ; should be assumed so
tree := rnode.node.(*Tree)
treev := tree.Entryv()
children = make(rangeSplit, 0, len(treev)+1)
for i := range treev {
lo := rnode.keycov.Lo
if i > 0 {
lo = treev[i].Key()
}
hi_ := rnode.keycov.Hi_
if i < len(treev)-1 {
hi_ = treev[i+1].Key()-1 // NOTE -1 because it is hi_] not hi)
}
children = append(children, &nodeInRange{
prefix: rnode.Path(),
keycov: blib.KeyRange{lo, hi_},
node: treev[i].Child(),
})
}
// del[i]; insert(@i, children)
*prs = append(rs[:i], append(children, rs[i+1:]...)...)
return children
}
// GetToLeaf returns leaf node corresponding to key k.
//
// Leaf is usually bucket node, but, in the sole single case of empty tree, can be that root tree node.
// GetToLeaf expands step-by-step every tree through which it has to traverse to next depth level.
//
// GetToLeaf panics if k is not covered.
func (prs *rangeSplit) GetToLeaf(ctx context.Context, k Key) (*nodeInRange, error) {
rnode, ok, err := prs.GetToLeaf_(ctx, k)
if err == nil && !ok {
panicf("key %v not covered; coverage: %s", k, *prs)
}
return rnode, err
}
// GetToLeaf_ is comma-ok version of GetToLeaf.
func (prs *rangeSplit) GetToLeaf_(ctx context.Context, k Key) (rnode *nodeInRange, ok bool, err error) {
rnode, ok = prs.Get_(k)
if !ok {
return nil, false, nil // key not covered
}
for {
switch rnode.node.(type) {
// bucket = leaf
case *Bucket:
return rnode, true, nil
}
// its tree -> activate to expand; check for ø case
tree := rnode.node.(*Tree)
err = tree.PActivate(ctx)
if err != nil {
return nil, false, err
}
defer tree.PDeactivate()
// empty tree -> don't expand - it is already leaf
if len(tree.Entryv()) == 0 {
return rnode, true, nil
}
// expand tree children
children := prs.Expand(rnode)
rnode = children.Get(k) // k must be there
}
}
func (rs rangeSplit) String() string {
if len(rs) == 0 {
return "ø"
}
s := ""
for _, rn := range rs {
if s != "" {
s += " "
}
s += fmt.Sprintf("%s", rn)
}
return s
}
// treediff computes δT/δtrack/δtkeycov for tree/trackSet specified by root in between old..new.
//
// It is the main entry-point for BTree-diff algorithm. XXX -> overview
//
// δtops is set of top nodes for changed subtrees.
// δZTC is connected(δZ/T) - connected closure for subset of δZ(old..new) that
// touches tracked nodes of T.
......@@ -443,6 +303,31 @@ func diffX(ctx context.Context, a, b Node, δZTC setOid, trackSet blib.PPTreeSub
}
}
// ---- diff algorithm ----
// nodeInRange represents a Node coming under [lo, hi_] key range in its tree.
//
// The following operations are provided:
//
// Path() -> []oid - get full path to this node.
type nodeInRange struct {
prefix []zodb.Oid // path to this node goes via this objects
keycov blib.KeyRange // key coverage
node Node
done bool // whether this node was already taken into account while computing diff
}
// rangeSplit represents set of nodes covering a range.
// nodes come with key↑ and no intersection in between their [lo,hi)
//
// The following operations are provided:
//
// Get(key) -> node - get node covering key
// Expand(node) -> children - replace node with its children
// GetToLeaf(key) -> leaf - get/expand to leaf node covering key
type rangeSplit []*nodeInRange // key↑
// diffT computes difference in between two subtrees.
//
// a, b point to top of subtrees @old and @new revisions.
......@@ -943,11 +828,157 @@ func diffB(ctx context.Context, a, b *Bucket) (δ map[Key]ΔValue, err error) {
return δ, nil
}
// ---- nodeInRange + rangeSplit ----
func (rn *nodeInRange) String() string {
done := " "; if rn.done { done = "*" }
return fmt.Sprintf("%s%s%s", done, rn.keycov, vnode(rn.node))
}
// Path returns full path to this node.
func (n *nodeInRange) Path() []zodb.Oid {
// return full copy - else .prefix can become aliased in between children of a node
return append([]zodb.Oid{}, append(n.prefix, n.node.POid())...)
}
func (rs rangeSplit) String() string {
if len(rs) == 0 {
return "ø"
}
s := ""
for _, rn := range rs {
if s != "" {
s += " "
}
s += fmt.Sprintf("%s", rn)
}
return s
}
// Get returns node covering key k.
// Get panics if k is not covered.
func (rs rangeSplit) Get(k Key) *nodeInRange {
rnode, ok := rs.Get_(k)
if !ok {
panicf("key %v not covered; coverage: %s", k, rs)
}
return rnode
}
// Get_ returns node covering key k.
func (rs rangeSplit) Get_(k Key) (rnode *nodeInRange, ok bool) {
i := sort.Search(len(rs), func(i int) bool {
return k <= rs[i].keycov.Hi_
})
if i == len(rs) {
return nil, false // key not covered
}
rn := rs[i]
if !rn.keycov.Has(k) {
panicf("BUG: get(%v) -> %s; coverage: %s", k, rn, rs)
}
return rn, true
}
// Expand replaces rnode with its children.
//
// rnode must be initially in *prs.
// rnode.node must be tree.
// rnode.node must be already activated.
//
// inserted children are returned for convenience.
func (prs *rangeSplit) Expand(rnode *nodeInRange) (children rangeSplit) {
rs := *prs
i := sort.Search(len(rs), func(i int) bool {
return rnode.keycov.Hi_ <= rs[i].keycov.Hi_
})
if i == len(rs) || rs[i] != rnode {
panicf("%s not in rangeSplit; coverage: %s", rnode, rs)
}
// [i].Key ≤ [i].Child.*.Key < [i+1].Key i ∈ [0, len([]))
//
// [0].Key = -∞ ; always returned so
// [len(ev)].Key = +∞ ; should be assumed so
tree := rnode.node.(*Tree)
treev := tree.Entryv()
children = make(rangeSplit, 0, len(treev)+1)
for i := range treev {
lo := rnode.keycov.Lo
if i > 0 {
lo = treev[i].Key()
}
hi_ := rnode.keycov.Hi_
if i < len(treev)-1 {
hi_ = treev[i+1].Key()-1 // NOTE -1 because it is hi_] not hi)
}
children = append(children, &nodeInRange{
prefix: rnode.Path(),
keycov: blib.KeyRange{lo, hi_},
node: treev[i].Child(),
})
}
// del[i]; insert(@i, children)
*prs = append(rs[:i], append(children, rs[i+1:]...)...)
return children
}
// GetToLeaf returns leaf node corresponding to key k.
//
// Leaf is usually bucket node, but, in the sole single case of empty tree, can be that root tree node.
// GetToLeaf expands step-by-step every tree through which it has to traverse to next depth level.
//
// GetToLeaf panics if k is not covered.
func (prs *rangeSplit) GetToLeaf(ctx context.Context, k Key) (*nodeInRange, error) {
rnode, ok, err := prs.GetToLeaf_(ctx, k)
if err == nil && !ok {
panicf("key %v not covered; coverage: %s", k, *prs)
}
return rnode, err
}
// GetToLeaf_ is comma-ok version of GetToLeaf.
func (prs *rangeSplit) GetToLeaf_(ctx context.Context, k Key) (rnode *nodeInRange, ok bool, err error) {
rnode, ok = prs.Get_(k)
if !ok {
return nil, false, nil // key not covered
}
for {
switch rnode.node.(type) {
// bucket = leaf
case *Bucket:
return rnode, true, nil
}
// its tree -> activate to expand; check for ø case
tree := rnode.node.(*Tree)
err = tree.PActivate(ctx)
if err != nil {
return nil, false, err
}
defer tree.PDeactivate()
// empty tree -> don't expand - it is already leaf
if len(tree.Entryv()) == 0 {
return rnode, true, nil
}
// expand tree children
children := prs.Expand(rnode)
rnode = children.Get(k) // k must be there
}
}
// ---- stack of nodeInRange ----
// push pushes element to node stack.
func push(nodeStk *[]*nodeInRange, top *nodeInRange) {
......
......@@ -38,7 +38,7 @@ package xbtree
// users, which are expected to call ΔBtail.Track(treepath) to let ΔBtail know
// that such and such ZODB objects constitute a path from root of a tree to some
// of its leaf. After Track call the objects from the path and tree keys, that
// were covered by leaf node, become tracked: from now-on ΔBtail will detect
// are covered by leaf node, become tracked: from now-on ΔBtail will detect
// and provide BTree-level changes caused by any change of tracked tree objects
// or tracked keys. This guarantee can be provided because ΔBtail now knows
// that such and such objects belong to a particular tree.
......@@ -54,8 +54,8 @@ package xbtree
//
// A new Track request potentially grows tracked keys coverage. Due to this,
// ΔBtail needs to recompute potentially whole vδT of the affected tree. This
// recomputation is managed by rebuild* family of functions and uses the same
// treediff algorithm, that Update is using, but modulo PPTreeSubSet
// recomputation is managed by "rebuild..." family of functions and uses the
// same treediff algorithm, that Update is using, but modulo PPTreeSubSet
// corresponding to δ key coverage. Update also potentially needs to rebuild
// whole vδT history, not only append new δT, because a change to tracked tree
// nodes can result in growth of tracked key coverage.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment