Commit 33d10066 authored by Kirill Smelkov's avatar Kirill Smelkov

go/zodb/fs1: BTree specialized with KEY=zodb.Oid, VALUE=int64

FileStorage index maps oid to file position storing latest data record
for this oid. This index is naturally to implement via BTree as e.g.
ZODB/py does.

In Go world there is github.com/cznic/b BTree library but without
specialization and working via interface{} it is slower than it could be
and allocates a lot. So generate specialized version of that code with
key and value types exactly suitable for FileStorage indexing.

We use a bit patched b version with speed ups for bulk-loading data via
regular point-ingestion BTree entry point:

	https://lab.nexedi.com/kirr/b x/refill

The patches has not been upstreamed because it slows down general case a
bit (only a bit, but still this is a "no" to me), and because with
dedicated bulk-loading API it could be possible to still load data
several times faster. Still current version is enough for not very-huge
indices.

Btw ZODB/py does the same (see fsBucket + friends).
parent 8f64f6ed
// Copyright (C) 2017 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
// Package fsb specializes cznic/b.Tree for FileStorage index needs.
//
// See gen-fsbtree for details.
package fsb
//go:generate ./gen-fsbtree
import "lab.nexedi.com/kirr/neo/go/zodb"
// comparison function for fsbTree.
// kept short & inlineable.
func oidCmp(a, b zodb.Oid) int {
if a < b {
return -1
} else if a > b {
return +1
} else {
return 0
}
}
// Code generated by gen-fsbtree from github.com/cznic/b 93348d0; DO NOT EDIT.
// (from patched version available at https://lab.nexedi.com/kirr/b.git)
//
// KEY=zodb.Oid VALUE=int64
// ---- 8< ----
// Copyright 2014 The b Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fsb
import "lab.nexedi.com/kirr/neo/go/zodb"
import (
"fmt"
"io"
"sync"
)
const (
kx = 32 //TODO benchmark tune this number if using custom key/value type(s).
kd = 126
//kx = 2
//kd = 2
)
func init() {
if kd < 1 {
panic(fmt.Errorf("kd %d: out of range", kd))
}
if kx < 2 {
panic(fmt.Errorf("kx %d: out of range", kx))
}
}
var (
btDPool = sync.Pool{New: func() interface{} { return &d{} }}
btEPool = btEpool{sync.Pool{New: func() interface{} { return &Enumerator{} }}}
btTPool = btTpool{sync.Pool{New: func() interface{} { return &Tree{} }}}
btXPool = sync.Pool{New: func() interface{} { return &x{} }}
)
type btTpool struct{ sync.Pool }
func (p *btTpool) get() *Tree {
x := p.Get().(*Tree)
x.hitDi = -1
x.hitPi = -1
return x
}
type btEpool struct{ sync.Pool }
func (p *btEpool) get(err error, hit bool, i int, k zodb.Oid, q *d, t *Tree, ver int64) *Enumerator {
x := p.Get().(*Enumerator)
x.err, x.hit, x.i, x.k, x.q, x.t, x.ver = err, hit, i, k, q, t, ver
return x
}
type (
// Cmp compares a and b. Return value is:
//
// < 0 if a < b
// 0 if a == b
// > 0 if a > b
//
Cmp func(a, b zodb.Oid) int
d struct { // data page
c int
d [2*kd + 1]de
n *d
p *d
}
de struct { // d element
k zodb.Oid
v int64
}
// Enumerator captures the state of enumerating a tree. It is returned
// from the Seek* methods. The enumerator is aware of any mutations
// made to the tree in the process of enumerating it and automatically
// resumes the enumeration at the proper key, if possible.
//
// However, once an Enumerator returns io.EOF to signal "no more
// items", it does no more attempt to "resync" on tree mutation(s). In
// other words, io.EOF from an Enumerator is "sticky" (idempotent).
Enumerator struct {
err error
hit bool
i int
k zodb.Oid
q *d
t *Tree
ver int64
}
// Tree is a B+tree.
Tree struct {
c int
first *d
last *d
r interface{}
ver int64
// information about last data page which Set/Put/Delete accessed
hitD *d // data page & pos of last write access
hitDi int
hitP *x // parent & pos for data page (= nil/-1 if no parent)
hitPi int
hitKmin zodb.Oid // hitD allowed key range is [hitKmin, hitKmax)
hitKmax zodb.Oid
hitPKmin zodb.Oid // ----//--- for hitP
hitPKmax zodb.Oid
hitKminSet bool // whether corresponding hitK* value is set
hitKmaxSet bool // if value is not set it is treated as ±∞ depending on context
hitPKminSet bool
hitPKmaxSet bool
}
xe struct { // x element
ch interface{}
k zodb.Oid
}
x struct { // index page
c int
x [2*kx + 2]xe
}
)
var ( // R/O zero values
zd d
zde de
ze Enumerator
zk zodb.Oid
zt Tree
zx x
zxe xe
)
func clr(q interface{}) {
switch x := q.(type) {
case *x:
for i := 0; i <= x.c; i++ { // Ch0 Sep0 ... Chn-1 Sepn-1 Chn
clr(x.x[i].ch)
}
*x = zx
btXPool.Put(x)
case *d:
*x = zd
btDPool.Put(x)
}
}
func (t *Tree) setHitKmin(k zodb.Oid) { t.hitKmin = k; t.hitKminSet = true }
func (t *Tree) setHitKmax(k zodb.Oid) { t.hitKmax = k; t.hitKmaxSet = true }
func (t *Tree) setHitPKmin(k zodb.Oid) { t.hitPKmin = k; t.hitPKminSet = true }
func (t *Tree) setHitPKmax(k zodb.Oid) { t.hitPKmax = k; t.hitPKmaxSet = true }
// -------------------------------------------------------------------------- x
func newX(ch0 interface{}) *x {
r := btXPool.Get().(*x)
r.x[0].ch = ch0
return r
}
func (q *x) extract(i int) {
q.c--
if i < q.c {
copy(q.x[i:], q.x[i+1:q.c+1])
q.x[q.c].ch = q.x[q.c+1].ch
q.x[q.c].k = zk // GC
q.x[q.c+1] = zxe // GC
}
}
func (q *x) insert(i int, k zodb.Oid, ch interface{}) *x {
c := q.c
if i < c {
q.x[c+1].ch = q.x[c].ch
copy(q.x[i+2:], q.x[i+1:c])
q.x[i+1].k = q.x[i].k
}
c++
q.c = c
q.x[i].k = k
q.x[i+1].ch = ch
return q
}
func (q *x) siblings(i int) (l, r *d) {
if i >= 0 {
if i > 0 {
l = q.x[i-1].ch.(*d)
}
if i < q.c {
r = q.x[i+1].ch.(*d)
}
}
return
}
// -------------------------------------------------------------------------- d
func (l *d) mvL(r *d, c int) {
copy(l.d[l.c:], r.d[:c])
copy(r.d[:], r.d[c:r.c])
l.c += c
r.c -= c
}
func (l *d) mvR(r *d, c int) {
copy(r.d[c:], r.d[:r.c])
copy(r.d[:c], l.d[l.c-c:])
r.c += c
l.c -= c
}
// ----------------------------------------------------------------------- Tree
// TreeNew returns a newly created, empty Tree. The compare function is used
// for key collation.
func TreeNew() *Tree {
return btTPool.get()
}
// Clear removes all K/V pairs from the tree.
func (t *Tree) Clear() {
if t.r == nil {
return
}
clr(t.r)
t.c, t.first, t.last, t.r = 0, nil, nil, nil
t.hitD, t.hitDi, t.hitP, t.hitPi = nil, -1, nil, -1
t.hitKmin, t.hitKmax, t.hitPKmin, t.hitPKmax = zk, zk, zk, zk
t.hitKminSet, t.hitKmaxSet, t.hitPKminSet, t.hitPKmaxSet = false, false, false, false
t.ver++
}
// Close performs Clear and recycles t to a pool for possible later reuse. No
// references to t should exist or such references must not be used afterwards.
func (t *Tree) Close() {
t.Clear()
*t = zt
btTPool.Put(t)
}
func (t *Tree) cat(p *x, q, r *d, pi int) {
t.ver++
q.mvL(r, r.c)
if r.n != nil {
r.n.p = q
} else {
t.last = q
}
q.n = r.n
*r = zd
btDPool.Put(r)
if p.c > 1 {
p.extract(pi)
p.x[pi].ch = q
return
}
switch x := t.r.(type) {
case *x:
*x = zx
btXPool.Put(x)
case *d:
*x = zd
btDPool.Put(x)
}
t.r = q
}
func (t *Tree) catX(p, q, r *x, pi int) {
t.ver++
q.x[q.c].k = p.x[pi].k
copy(q.x[q.c+1:], r.x[:r.c])
q.c += r.c + 1
q.x[q.c].ch = r.x[r.c].ch
*r = zx
btXPool.Put(r)
if p.c > 1 {
p.c--
pc := p.c
if pi < pc {
p.x[pi].k = p.x[pi+1].k
copy(p.x[pi+1:], p.x[pi+2:pc+1])
p.x[pc].ch = p.x[pc+1].ch
p.x[pc].k = zk // GC
p.x[pc+1].ch = nil // GC
}
return
}
switch x := t.r.(type) {
case *x:
*x = zx
btXPool.Put(x)
case *d:
*x = zd
btDPool.Put(x)
}
t.r = q
}
// Delete removes the k's KV pair, if it exists, in which case Delete returns
// true.
func (t *Tree) Delete(k zodb.Oid) (ok bool) {
//dbg("--- PRE Delete(%v)\t; %v @%d, [%v, %v) pk: [%v, %v)\n%s", k, t.hitD, t.hitDi, t.hitKmin, t.hitKmax, t.hitPKmin, t.hitPKmax, t.dump())
//defer t.checkHit(k, opDel)
//defer func() {
// dbg("--- POST\n%s\n====\n", t.dump())
//}()
// check if we can do the delete nearby previous change
i, ok := t.hitFind(k)
if i >= 0 {
dd := t.hitD
switch {
case !ok:
// tried to delete last or element past max k in hitD
// see also "extract rule" below
if i >= dd.c {
i--
}
t.hitDi = i
return false
case dd.c > kd:
t.extract(dd, i)
// extract rule for t.hitDi
if t.hitDi >= dd.c {
t.hitDi--
}
return true
// here: need to extract / underflow but we have to check: if underflowing
// would cause upper level underflow (underflowX) -> we cannot extract /
// underflow here - need to do the usual scan from root to underflow index pages.
default:
p, pi := t.hitP, t.hitPi
if p != nil && p.c < kx && p != t.r {
break
}
t.extract(dd, i)
if p != nil {
// NOTE underflow corrects hit D,Di, P,Pi, Kmin, Kmax as needed
t.underflow(p, dd, pi)
} else if t.c == 0 {
t.Clear()
}
// extract rule for t.hitDi
if t.hitD != nil && t.hitDi >= t.hitD.c {
t.hitDi--
}
return true
}
}
// don't leave previously active data page with < 50% load
if t.hitD != nil && t.hitD.c < kd && t.hitP != nil {
//println("refill")
t.refill(t.hitP, t.hitD, t.hitPi)
}
// data page not quickly found - search and descent from root
pi := -1
var p *x
q := t.r
if q == nil {
return false
}
t.hitKminSet, t.hitKmaxSet = false, false // initially [-∞, +∞)
t.hitPKminSet, t.hitPKmaxSet = false, false
for {
switch x := q.(type) {
case *x:
i, ok := t.findX(x, k)
if ok {
i++
}
if x.c < kx && q != t.r {
// NOTE underflowX corrects hit Kmin and Kmax as needed
x, i = t.underflowX(p, x, pi, i)
}
t.hitPKmin = t.hitKmin
t.hitPKmax = t.hitKmax
t.hitPKminSet = t.hitKminSet
t.hitPKmaxSet = t.hitKmaxSet
p = x
pi = i
q = x.x[pi].ch
if pi > 0 { // k=-∞ @-1
t.setHitKmin(p.x[pi-1].k)
}
if pi < p.c { // k=+∞ @p.c
t.setHitKmax(p.x[pi].k)
}
case *d:
i, ok := t.find(x, k)
// data page found - perform the delete
t.hitP = p
t.hitPi = pi
if !ok {
t.hitD = x
if i >= x.c {
// tried to delete last or element past max k in hitD
i--
}
t.hitDi = i
return false
}
t.extract(x, i)
if x.c < kd {
if q != t.r {
// NOTE underflow corrects hit D,Di, P,Pi, Kmin, Kmax as needed
t.underflow(p, x, pi)
} else if t.c == 0 {
t.Clear()
}
}
// extract rule for t.hitDi
if t.hitD != nil && t.hitDi >= t.hitD.c {
t.hitDi--
}
return true
}
}
}
func (t *Tree) extract(q *d, i int) { // (r int64) {
t.ver++
//r = q.d[i].v // prepared for Extract
q.c--
if i < q.c {
copy(q.d[i:], q.d[i+1:q.c+1])
}
q.d[q.c] = zde // GC
t.c--
t.hitD = q
// NOTE extract users - in the end - must decrement t.hitDi if t.hitDi == t.hitD.c
// we are not doing it right here because unaltered position is
// required in case merging a right sibling data page will be needed.
t.hitDi = i
return
}
func (t *Tree) findX(x *x, k zodb.Oid) (i int, ok bool) {
l := 0
h := x.c - 1
for l <= h {
m := (l + h) >> 1
switch cmp := oidCmp(k, x.x[m].k); {
case cmp > 0:
l = m + 1
case cmp == 0:
return m, true
default:
h = m - 1
}
}
return l, false
}
func (t *Tree) find(x *d, k zodb.Oid) (i int, ok bool) {
l := 0
h := x.c - 1
for l <= h {
m := (l + h) >> 1
switch cmp := oidCmp(k, x.d[m].k); {
case cmp > 0:
l = m + 1
case cmp == 0:
return m, true
default:
h = m - 1
}
}
return l, false
}
// hitFind returns position for k in previously hit data page
// if k should not reside in hit range: -1, false is returned
// otherwise returns are:
// - i: index corresponding to data entry in t.hitD with min(k' : k <= k')
// - ok: whether k' == k
func (t *Tree) hitFind(k zodb.Oid) (i int, ok bool) {
// DEBUG: enable this to test how slow path computes hit{Kmin,Kmax} on all keys
//return -1, false
hit := t.hitD
if hit == nil {
return -1, false
}
i = t.hitDi
var h int
switch cmp := oidCmp(k, hit.d[i].k); {
case cmp > 0:
if t.hitKmaxSet && oidCmp(k, t.hitKmax) >= 0 {
// >= hitKmax
return -1, false
}
// NOTE we are ok if i+1=hit.c -> hit.c will be returned
i, h = i+1, hit.c-1
case cmp < 0:
if t.hitKminSet && oidCmp(k, t.hitKmin) < 0 {
// < hitKmin
return -1, false
}
// NOTE we are ok if i-1=-1 -> 0 will be returned
i, h = 0, i-1
default:
return i, true
}
for i <= h {
m := (i + h) >> 1
switch cmp := oidCmp(k, hit.d[m].k); {
case cmp > 0:
i = m + 1
case cmp == 0:
return m, true
default:
h = m - 1
}
}
return i, false
}
// First returns the first item of the tree in the key collating order, or
// (zero-value, zero-value) if the tree is empty.
func (t *Tree) First() (k zodb.Oid, v int64) {
if q := t.first; q != nil {
q := &q.d[0]
k, v = q.k, q.v
}
return
}
// Get returns the value associated with k and true if it exists. Otherwise Get
// returns (zero-value, false).
func (t *Tree) Get(k zodb.Oid) (v int64, ok bool) {
q := t.r
if q == nil {
return
}
for {
switch x := q.(type) {
case *x:
i, ok := t.findX(x, k)
if ok {
i++
}
q = x.x[i].ch
case *d:
i, ok := t.find(x, k)
if ok {
return x.d[i].v, true
}
return v, ok
}
}
}
func (t *Tree) insert(q *d, i int, k zodb.Oid, v int64) *d {
t.ver++
c := q.c
if i < c {
copy(q.d[i+1:], q.d[i:c])
}
c++
q.c = c
q.d[i].k, q.d[i].v = k, v
t.c++
t.hitD = q
t.hitDi = i
return q
}
// Last returns the last item of the tree in the key collating order, or
// (zero-value, zero-value) if the tree is empty.
func (t *Tree) Last() (k zodb.Oid, v int64) {
if q := t.last; q != nil {
q := &q.d[q.c-1]
k, v = q.k, q.v
}
return
}
// Len returns the number of items in the tree.
func (t *Tree) Len() int {
return t.c
}
func (t *Tree) overflow(p *x, q *d, pi, i int, k zodb.Oid, v int64) {
t.ver++
l, r := p.siblings(pi)
if l != nil && l.c < 2*kd && i != 0 {
l.mvL(q, 1)
t.insert(q, i-1, k, v)
p.x[pi-1].k = q.d[0].k
t.setHitKmin(q.d[0].k)
//t.hitPi = pi already pre-set this way
return
}
if r != nil && r.c < 2*kd {
if i < 2*kd {
q.mvR(r, 1)
t.insert(q, i, k, v)
p.x[pi].k = r.d[0].k
t.setHitKmax(r.d[0].k)
//t.hitPi = pi already pre-set this way
return
}
t.insert(r, 0, k, v)
p.x[pi].k = k
t.setHitKmin(k)
if pi+1 < p.c { // k=+∞ @p.c
t.setHitKmax(p.x[pi+1].k)
} else {
t.hitKmax = t.hitPKmax
t.hitKmaxSet = t.hitPKmaxSet
}
t.hitPi = pi + 1
return
}
t.split(p, q, pi, i, k, v)
}
// Seek returns an Enumerator positioned on an item such that k >= item's key.
// ok reports if k == item.key The Enumerator's position is possibly after the
// last item in the tree.
func (t *Tree) Seek(k zodb.Oid) (e *Enumerator, ok bool) {
q := t.r
if q == nil {
e = btEPool.get(nil, false, 0, k, nil, t, t.ver)
return
}
for {
switch x := q.(type) {
case *x:
i, ok := t.findX(x, k)
if ok {
i++
}
q = x.x[i].ch
case *d:
i, ok := t.find(x, k)
return btEPool.get(nil, ok, i, k, x, t, t.ver), ok
}
}
}
// SeekFirst returns an enumerator positioned on the first KV pair in the tree,
// if any. For an empty tree, err == io.EOF is returned and e will be nil.
func (t *Tree) SeekFirst() (e *Enumerator, err error) {
q := t.first
if q == nil {
return nil, io.EOF
}
return btEPool.get(nil, true, 0, q.d[0].k, q, t, t.ver), nil
}
// SeekLast returns an enumerator positioned on the last KV pair in the tree,
// if any. For an empty tree, err == io.EOF is returned and e will be nil.
func (t *Tree) SeekLast() (e *Enumerator, err error) {
q := t.last
if q == nil {
return nil, io.EOF
}
return btEPool.get(nil, true, q.c-1, q.d[q.c-1].k, q, t, t.ver), nil
}
// Set sets the value associated with k.
func (t *Tree) Set(k zodb.Oid, v int64) {
//dbg("--- PRE Set(%v, %v)\t; %v @%d, [%v, %v) pk: [%v, %v)\n%s", k, v, t.hitD, t.hitDi, t.hitKmin, t.hitKmax, t.hitPKmin, t.hitPKmax, t.dump())
//defer t.checkHit(k, opSet)
//defer func() {
// dbg("--- POST\n%s\n====\n", t.dump())
//}()
// check if we can do the update nearby previous change
i, ok := t.hitFind(k)
if i >= 0 {
dd := t.hitD
switch {
case ok:
dd.d[i].v = v
t.hitDi = i
return
case dd.c < 2*kd:
t.insert(dd, i, k, v)
return
// here: need to overflow but we have to check: if overflowing would
// cause upper level overflow (splitX) -> we cannot overflow here -
// - need to do the usual scan from root to split index pages.
default:
p, pi := t.hitP, t.hitPi
if p != nil && p.c > 2*kx {
break
}
// NOTE overflow corrects hit Kmin, Kmax and Pi as needed
t.overflow(p, dd, pi, i, k, v)
return
}
}
// don't leave previously active data page with < 50% load
if t.hitD != nil && t.hitD.c < kd && t.hitP != nil {
//println("refill")
t.refill(t.hitP, t.hitD, t.hitPi)
}
// data page not quickly found - search and descent from root
pi := -1
var p *x
q := t.r
if q == nil {
z := t.insert(btDPool.Get().(*d), 0, k, v)
t.r, t.first, t.last = z, z, z
return
}
t.hitKminSet, t.hitKmaxSet = false, false // initially [-∞, +∞)
t.hitPKminSet, t.hitPKmaxSet = false, false
for {
switch x := q.(type) {
case *x:
i, ok := t.findX(x, k)
if ok {
i++
}
if x.c > 2*kx {
// NOTE splitX corrects hit Kmin and Kmax as needed
x, i = t.splitX(p, x, pi, i)
}
t.hitPKmin = t.hitKmin
t.hitPKmax = t.hitKmax
t.hitPKminSet = t.hitKminSet
t.hitPKmaxSet = t.hitKmaxSet
p = x
pi = i
q = p.x[pi].ch
if pi > 0 { // k=-∞ @-1
t.setHitKmin(p.x[pi-1].k)
}
if pi < p.c { // k=+∞ @p.c
t.setHitKmax(p.x[pi].k)
}
case *d:
i, ok := t.find(x, k)
// data page found - perform the update
t.hitP = p
t.hitPi = pi
switch {
case ok:
x.d[i].v = v
t.hitD, t.hitDi = x, i
case x.c < 2*kd:
t.insert(x, i, k, v)
default:
// NOTE overflow corrects hit Kmin, Kmax and Pi as needed
t.overflow(p, x, pi, i, k, v)
}
return
}
}
}
// Put combines Get and Set in a more efficient way where the tree is walked
// only once. The upd(ater) receives (old-value, true) if a KV pair for k
// exists or (zero-value, false) otherwise. It can then return a (new-value,
// true) to create or overwrite the existing value in the KV pair, or
// (whatever, false) if it decides not to create or not to update the value of
// the KV pair.
//
// tree.Set(k, v) call conceptually equals calling
//
// tree.Put(k, func(zodb.Oid, bool){ return v, true })
//
// modulo the differing return values.
func (t *Tree) Put(k zodb.Oid, upd func(oldV int64, exists bool) (newV int64, write bool)) (oldV int64, written bool) {
//defer func () { t.checkHit(k, opPut(written)) }()
pi := -1
var p *x
q := t.r
var newV int64
if q == nil {
// new KV pair in empty tree
newV, written = upd(newV, false)
if !written {
return
}
z := t.insert(btDPool.Get().(*d), 0, k, newV)
t.r, t.first, t.last = z, z, z
return
}
// check if we can do the update nearby previous change
i, ok := t.hitFind(k)
if i >= 0 {
dd := t.hitD
switch {
case ok:
oldV = dd.d[i].v
newV, written = upd(oldV, true)
if written {
dd.d[i].v = newV
}
t.hitDi = i
return
case dd.c < 2*kd:
newV, written = upd(newV, false)
if written {
t.insert(dd, i, k, newV)
} else {
t.hitDi = i
// if it was only Get landed past max key - adjust it to valid entry
if t.hitDi >= dd.c {
t.hitDi--
}
}
return
// here: need to overflow but we have to check: if overflowing would
// cause upper level overflow (splitX) -> we cannot overflow here -
// - need to do the usual scan from root to split index pages.
default:
p, pi := t.hitP, t.hitPi
if p != nil && p.c > 2*kx {
break
}
newV, written = upd(newV, false)
if written {
// NOTE overflow corrects hit Kmin, Kmax and Pi as needed
t.overflow(p, dd, pi, i, k, newV)
} else {
t.hitDi = i
// see about "valid entry" ^^^
if t.hitDi >= dd.c {
t.hitDi--
}
}
return
}
}
// don't leave previously active data page with < 50% load
if t.hitD != nil && t.hitD.c < kd && t.hitP != nil {
//println("refill")
t.refill(t.hitP, t.hitD, t.hitPi)
}
// data page not quickly found - search and descent from root
t.hitKminSet, t.hitKmaxSet = false, false // initially [-∞, +∞)
t.hitPKminSet, t.hitPKmaxSet = false, false
for {
switch x := q.(type) {
case *x:
i, ok := t.findX(x, k)
if ok {
i++
}
if x.c > 2*kx {
// NOTE splitX corrects hit Kmin and Kmax as needed
x, i = t.splitX(p, x, pi, i)
}
t.hitPKmin = t.hitKmin
t.hitPKmax = t.hitKmax
t.hitPKminSet = t.hitKminSet
t.hitPKmaxSet = t.hitKmaxSet
p = x
pi = i
q = p.x[pi].ch
if pi > 0 { // k=-∞ @-1
t.setHitKmin(p.x[pi-1].k)
}
if pi < p.c { // k=+∞ @p.c
t.setHitKmax(p.x[pi].k)
}
case *d:
i, ok := t.find(x, k)
// data page found - perform the update
t.hitP = p
t.hitPi = pi
switch {
case ok:
oldV = x.d[i].v
newV, written = upd(oldV, true)
if written {
x.d[i].v = newV
}
t.hitD, t.hitDi = x, i
default:
newV, written = upd(newV, false)
if !written {
t.hitD, t.hitDi = x, i
// see about "valid entry" ^^^
if t.hitDi >= x.c {
t.hitDi--
}
break
}
switch {
case x.c < 2*kd:
t.insert(x, i, k, newV)
default:
// NOTE overflow corrects hit Kmin, Kmax and Pi as needed
t.overflow(p, x, pi, i, k, newV)
}
}
return
}
}
}
func (t *Tree) split(p *x, q *d, pi, i int, k zodb.Oid, v int64) {
t.ver++
r := btDPool.Get().(*d)
if q.n != nil {
r.n = q.n
r.n.p = r
} else {
t.last = r
}
q.n = r
r.p = q
// don't copy on sequential bulk-set pattern
if i == 2*kd {
t.insert(r, 0, k, v)
} else {
copy(r.d[:], q.d[kd:2*kd])
for i := range q.d[kd:] {
q.d[kd+i] = zde
}
q.c = kd
r.c = kd
}
// XXX vs ^^^ (not adding elements)
if pi >= 0 {
p.insert(pi, r.d[0].k, r)
} else {
p = newX(q).insert(0, r.d[0].k, r)
pi = 0
t.r = p
t.hitP = p
t.hitPi = pi
}
if i > kd {
// XXX vvv try to merge with ^^^ t.insert(r, 0, k, v)
if i < 2*kd {
t.insert(r, i-kd, k, v)
}
t.setHitKmin(p.x[pi].k)
if pi+1 < p.c { // k=+∞ @p.c
t.setHitKmax(p.x[pi+1].k)
} else {
t.hitKmax = t.hitPKmax
t.hitKmaxSet = t.hitPKmaxSet
}
t.hitPi = pi + 1
} else {
t.insert(q, i, k, v)
t.setHitKmax(r.d[0].k)
//t.hitPi = pi already pre-set this way
}
}
func (t *Tree) splitX(p *x, q *x, pi int, i int) (*x, int) {
t.ver++
r := btXPool.Get().(*x)
copy(r.x[:], q.x[kx+1:])
q.c = kx
r.c = kx
if pi >= 0 {
p.insert(pi, q.x[kx].k, r)
} else {
p = newX(q).insert(0, q.x[kx].k, r)
pi = 0
t.r = p
}
q.x[kx].k = zk
for i := range q.x[kx+1:] {
q.x[kx+i+1] = zxe
}
if i > kx {
q = r
i -= kx + 1
t.setHitKmin(p.x[pi].k)
if pi+1 < p.c { // k=+∞ @p.c
t.setHitKmax(p.x[pi+1].k)
} else {
t.hitKmax = t.hitPKmax
t.hitKmaxSet = t.hitPKmaxSet
}
} else {
t.setHitKmax(p.x[pi].k)
}
return q, i
}
func (t *Tree) underflow(p *x, q *d, pi int) {
t.ver++
l, r := p.siblings(pi)
// TODO don't move 1 elements for bulk-delete patterb (see split for bulk-set case)
if l != nil && l.c+q.c >= 2*kd {
l.mvR(q, 1)
p.x[pi-1].k = q.d[0].k
t.setHitKmin(q.d[0].k)
//t.hitPi = pi already pre-set this way
t.hitDi += 1
return
}
if r != nil && q.c+r.c >= 2*kd {
q.mvL(r, 1)
p.x[pi].k = r.d[0].k
t.setHitKmax(r.d[0].k)
//t.hitPi = pi already pre-set this way
// hitDi stays the same
r.d[r.c] = zde // GC
return
}
if l != nil {
t.hitD = l
t.hitDi += l.c
pi--
t.cat(p, l, q, pi)
t.hitKmin = t.hitPKmin
t.hitKminSet = t.hitPKminSet // XXX move vvv under else ? (but vs t.r == l)
if t.r == l {
// cat removed p
t.hitP = nil
t.hitPi = -1
} else {
if pi > 0 { // k=-∞ @-1
t.setHitKmin(p.x[pi-1].k)
}
t.hitPi = pi
}
return
}
t.cat(p, q, r, pi)
// hitD/hitDi stays unchanged
t.hitKmax = t.hitPKmax
t.hitKmaxSet = t.hitPKmaxSet // XXX move vvv under else ? (but vs t.r == q)
if t.r == q {
// cat removed p
t.hitP = nil
t.hitPi = -1
} else {
if pi < p.c { // k=+∞ @p.c
t.setHitKmax(p.x[pi].k)
}
//t.hitPi = pi already pre-set this way
}
}
func (t *Tree) underflowX(p *x, q *x, pi int, i int) (*x, int) {
t.ver++
var l, r *x
if pi >= 0 {
if pi > 0 {
l = p.x[pi-1].ch.(*x)
}
if pi < p.c {
r = p.x[pi+1].ch.(*x)
}
}
if l != nil && l.c > kx {
q.x[q.c+1].ch = q.x[q.c].ch
copy(q.x[1:], q.x[:q.c])
q.x[0].ch = l.x[l.c].ch
q.x[0].k = p.x[pi-1].k
q.c++
i++
l.c--
p.x[pi-1].k = l.x[l.c].k
t.setHitKmin(l.x[l.c].k)
return q, i
}
if r != nil && r.c > kx {
q.x[q.c].k = p.x[pi].k
q.c++
q.x[q.c].ch = r.x[0].ch
p.x[pi].k = r.x[0].k
t.setHitKmax(r.x[0].k)
copy(r.x[:], r.x[1:r.c])
r.c--
rc := r.c
r.x[rc].ch = r.x[rc+1].ch
r.x[rc].k = zk
r.x[rc+1].ch = nil
return q, i
}
if l != nil {
i += l.c + 1
pi--
t.catX(p, l, q, pi)
q = l
if t.r != q && pi > 0 { // k=+∞ @p.c
t.setHitKmin(p.x[pi-1].k)
} else {
t.hitKmin = t.hitPKmin
t.hitKminSet = t.hitPKminSet
}
return q, i
}
t.catX(p, q, r, pi)
if t.r != q && pi < p.c { // k=+∞ @p.c
t.setHitKmax(p.x[pi].k)
} else {
t.hitKmax = t.hitPKmax
t.hitKmaxSet = t.hitPKmaxSet
}
return q, i
}
// refill refills data page to have >= 50% load taking data entries from siblings
// if siblings have not enough entries for such refill, the data page in question is concatenated into one of them.
// XXX naming -> fillup ?
func (t *Tree) refill(p *x, q *d, pi int) {
t.ver++
l, r := p.siblings(pi)
δl := 0
δr := 0
if l != nil {
δl = l.c - kd
}
if r != nil {
δr = r.c - kd
}
if q.c + δl + δr < kd {
// cannot refill - concatenate into l or r
if l != nil {
t.cat(p, l, q, pi-1)
} else {
t.cat(p, q, r, pi)
}
return
}
// refill from siblings
δ := kd - q.c
δl = δ * δl / (δl + δr)
δr = δ - δl
if δl != 0 {
l.mvR(q, δl)
p.x[pi-1].k = q.d[0].k
}
if δr != 0 {
q.mvL(r, δr)
p.x[pi].k = r.d[0].k
}
}
// ----------------------------------------------------------------- Enumerator
// Close recycles e to a pool for possible later reuse. No references to e
// should exist or such references must not be used afterwards.
func (e *Enumerator) Close() {
*e = ze
btEPool.Put(e)
}
// Next returns the currently enumerated item, if it exists and moves to the
// next item in the key collation order. If there is no item to return, err ==
// io.EOF is returned.
func (e *Enumerator) Next() (k zodb.Oid, v int64, err error) {
if err = e.err; err != nil {
return
}
if e.ver != e.t.ver {
f, _ := e.t.Seek(e.k)
*e = *f
f.Close()
}
if e.q == nil {
e.err, err = io.EOF, io.EOF
return
}
if e.i >= e.q.c {
if err = e.next(); err != nil {
return
}
}
i := e.q.d[e.i]
k, v = i.k, i.v
e.k, e.hit = k, true
e.next()
return
}
func (e *Enumerator) next() error {
if e.q == nil {
e.err = io.EOF
return io.EOF
}
switch {
case e.i < e.q.c-1:
e.i++
default:
if e.q, e.i = e.q.n, 0; e.q == nil {
e.err = io.EOF
}
}
return e.err
}
// Prev returns the currently enumerated item, if it exists and moves to the
// previous item in the key collation order. If there is no item to return, err
// == io.EOF is returned.
func (e *Enumerator) Prev() (k zodb.Oid, v int64, err error) {
if err = e.err; err != nil {
return
}
if e.ver != e.t.ver {
f, _ := e.t.Seek(e.k)
*e = *f
f.Close()
}
if e.q == nil {
e.err, err = io.EOF, io.EOF
return
}
if !e.hit {
// move to previous because Seek overshoots if there's no hit
if err = e.prev(); err != nil {
return
}
}
if e.i >= e.q.c {
if err = e.prev(); err != nil {
return
}
}
i := e.q.d[e.i]
k, v = i.k, i.v
e.k, e.hit = k, true
e.prev()
return
}
func (e *Enumerator) prev() error {
if e.q == nil {
e.err = io.EOF
return io.EOF
}
switch {
case e.i > 0:
e.i--
default:
if e.q = e.q.p; e.q == nil {
e.err = io.EOF
break
}
e.i = e.q.c - 1
}
return e.err
}
// Code generated by gen-fsbtree from github.com/cznic/b 93348d0; DO NOT EDIT.
// (from patched version available at https://lab.nexedi.com/kirr/b.git)
//
// ---- 8< ----
package fsb
import (
"bytes"
"github.com/cznic/strutil" // XXX better to not depend on it
)
func isNil(p interface{}) bool {
switch x := p.(type) {
case *x:
if x == nil {
return true
}
case *d:
if x == nil {
return true
}
}
return false
}
func (t *Tree) Dump() string {
var buf bytes.Buffer
f := strutil.IndentFormatter(&buf, "\t")
num := map[interface{}]int{}
visited := map[interface{}]bool{}
handle := func(p interface{}) int {
if isNil(p) {
return 0
}
if n, ok := num[p]; ok {
return n
}
n := len(num) + 1
num[p] = n
return n
}
var pagedump func(interface{}, string)
pagedump = func(p interface{}, pref string) {
if isNil(p) || visited[p] {
return
}
visited[p] = true
switch x := p.(type) {
case *x:
h := handle(p)
n := 0
for i, v := range x.x {
if v.ch != nil || v.k != 0 {
n = i + 1
}
}
f.Format("%sX#%d(%p) n %d:%d {", pref, h, x, x.c, n)
a := []interface{}{}
for i, v := range x.x[:n] {
a = append(a, v.ch)
if i != 0 {
f.Format(" ")
}
f.Format("(C#%d K %v)", handle(v.ch), v.k)
}
f.Format("}\n")
for _, p := range a {
pagedump(p, pref+". ")
}
case *d:
h := handle(p)
n := 0
for i, v := range x.d {
if v.k != 0 || v.v != 0 {
n = i + 1
}
}
f.Format("%sD#%d(%p) P#%d N#%d n %d:%d {", pref, h, x, handle(x.p), handle(x.n), x.c, n)
for i, v := range x.d[:n] {
if i != 0 {
f.Format(" ")
}
f.Format("%v:%v", v.k, v.v)
}
f.Format("}\n")
}
}
pagedump(t.r, "")
s := buf.String()
if s != "" {
s = s[:len(s)-1]
}
return s
}
#!/bin/bash -e
# generate b.Tree with compile-time KEY=zodb.Oid, VALUE=int64, tuned kd and direct oidCmp calls
# Copyright (C) 2017 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
KEY=zodb.Oid
VALUE=int64
# adjust kd so that sizeof(d) ~= page
de=16 # KEY+VALUE
o=24 # d.c, d.n, d.p
kd=$(( (4096 - $o - $de) / (2 * $de) ))
# git_upstream_url <repo> - show current branch upstream URL
git_upstream_url() {
repo=$1
head="`git -C $repo symbolic-ref --short HEAD`" # current branch - e.g. "t"
remote="`git -C $repo config --get branch.$head.remote`" # upstream name, e.g. "kirr"
url="`git -C $repo config --get remote.$remote.url`" # upstream URL
echo "$url"
}
b=github.com/cznic/b
Bdir=`go list -f '{{.Dir}}' $b`
Brev=`git -C $Bdir describe --always`
Bweb=`git_upstream_url $Bdir`
out=fsbtree.go
echo "// Code generated by gen-fsbtree from $b $Brev; DO NOT EDIT." >$out
echo "// (from patched version available at $Bweb)" >>$out
echo "//" >>$out
echo "// KEY=$KEY VALUE=$VALUE" >>$out
echo "// ---- 8< ----" >>$out
echo >>$out
make -s -C $Bdir generic |sed \
-e '/package b/a \\nimport "lab.nexedi.com/kirr/neo/go/zodb"' \
-e 's/package b/package fsb/g' \
-e "s/KEY/$KEY/g" \
-e "s/VALUE/$VALUE/g" \
\
-e "s/^\(\\s*kd = \).*\$/\\1$kd/g" \
\
-e '/cmp *Cmp$/d' \
-e 's/t\.cmp(/oidCmp(/g' \
-e 's/func TreeNew(cmp Cmp)/func TreeNew()/g' \
-e 's/btTPool.get(cmp)/btTPool.get()/g' \
-e 's/func (p \*btTpool) get(cmp Cmp)/func (p *btTpool) get()/g' \
-e '/x\.cmp = cmp$/d' \
>>$out
# also extract dump() routine
out=fsbtree_util.go
echo "// Code generated by gen-fsbtree from $b $Brev; DO NOT EDIT." >$out
echo "// (from patched version available at $Bweb)" >>$out
echo "//" >>$out
echo "// ---- 8< ----" >>$out
echo >>$out
cat >>$out <<EOF
package fsb
import (
"bytes"
"github.com/cznic/strutil" // XXX better to not depend on it
)
EOF
echo >>$out
sed -n \
-e '/^func isNil(/,/^}/p' \
-e '/^func.* dump()/,/^}/ {
s/dump()/Dump()/g
s/v\.k != nil/v.k != 0/g
s/v\.v != nil/v.v != 0/g
p
}' \
\
$Bdir/all_test.go \
>>$out
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment