Commit 33d10066 authored by Kirill Smelkov's avatar Kirill Smelkov

go/zodb/fs1: BTree specialized with KEY=zodb.Oid, VALUE=int64

FileStorage index maps oid to file position storing latest data record
for this oid. This index is naturally to implement via BTree as e.g.
ZODB/py does.

In Go world there is github.com/cznic/b BTree library but without
specialization and working via interface{} it is slower than it could be
and allocates a lot. So generate specialized version of that code with
key and value types exactly suitable for FileStorage indexing.

We use a bit patched b version with speed ups for bulk-loading data via
regular point-ingestion BTree entry point:

	https://lab.nexedi.com/kirr/b x/refill

The patches has not been upstreamed because it slows down general case a
bit (only a bit, but still this is a "no" to me), and because with
dedicated bulk-loading API it could be possible to still load data
several times faster. Still current version is enough for not very-huge
indices.

Btw ZODB/py does the same (see fsBucket + friends).
parent 8f64f6ed
// Copyright (C) 2017 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
// Package fsb specializes cznic/b.Tree for FileStorage index needs.
//
// See gen-fsbtree for details.
package fsb
//go:generate ./gen-fsbtree
import "lab.nexedi.com/kirr/neo/go/zodb"
// comparison function for fsbTree.
// kept short & inlineable.
func oidCmp(a, b zodb.Oid) int {
if a < b {
return -1
} else if a > b {
return +1
} else {
return 0
}
}
This diff is collapsed.
// Code generated by gen-fsbtree from github.com/cznic/b 93348d0; DO NOT EDIT.
// (from patched version available at https://lab.nexedi.com/kirr/b.git)
//
// ---- 8< ----
package fsb
import (
"bytes"
"github.com/cznic/strutil" // XXX better to not depend on it
)
func isNil(p interface{}) bool {
switch x := p.(type) {
case *x:
if x == nil {
return true
}
case *d:
if x == nil {
return true
}
}
return false
}
func (t *Tree) Dump() string {
var buf bytes.Buffer
f := strutil.IndentFormatter(&buf, "\t")
num := map[interface{}]int{}
visited := map[interface{}]bool{}
handle := func(p interface{}) int {
if isNil(p) {
return 0
}
if n, ok := num[p]; ok {
return n
}
n := len(num) + 1
num[p] = n
return n
}
var pagedump func(interface{}, string)
pagedump = func(p interface{}, pref string) {
if isNil(p) || visited[p] {
return
}
visited[p] = true
switch x := p.(type) {
case *x:
h := handle(p)
n := 0
for i, v := range x.x {
if v.ch != nil || v.k != 0 {
n = i + 1
}
}
f.Format("%sX#%d(%p) n %d:%d {", pref, h, x, x.c, n)
a := []interface{}{}
for i, v := range x.x[:n] {
a = append(a, v.ch)
if i != 0 {
f.Format(" ")
}
f.Format("(C#%d K %v)", handle(v.ch), v.k)
}
f.Format("}\n")
for _, p := range a {
pagedump(p, pref+". ")
}
case *d:
h := handle(p)
n := 0
for i, v := range x.d {
if v.k != 0 || v.v != 0 {
n = i + 1
}
}
f.Format("%sD#%d(%p) P#%d N#%d n %d:%d {", pref, h, x, handle(x.p), handle(x.n), x.c, n)
for i, v := range x.d[:n] {
if i != 0 {
f.Format(" ")
}
f.Format("%v:%v", v.k, v.v)
}
f.Format("}\n")
}
}
pagedump(t.r, "")
s := buf.String()
if s != "" {
s = s[:len(s)-1]
}
return s
}
#!/bin/bash -e
# generate b.Tree with compile-time KEY=zodb.Oid, VALUE=int64, tuned kd and direct oidCmp calls
# Copyright (C) 2017 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
KEY=zodb.Oid
VALUE=int64
# adjust kd so that sizeof(d) ~= page
de=16 # KEY+VALUE
o=24 # d.c, d.n, d.p
kd=$(( (4096 - $o - $de) / (2 * $de) ))
# git_upstream_url <repo> - show current branch upstream URL
git_upstream_url() {
repo=$1
head="`git -C $repo symbolic-ref --short HEAD`" # current branch - e.g. "t"
remote="`git -C $repo config --get branch.$head.remote`" # upstream name, e.g. "kirr"
url="`git -C $repo config --get remote.$remote.url`" # upstream URL
echo "$url"
}
b=github.com/cznic/b
Bdir=`go list -f '{{.Dir}}' $b`
Brev=`git -C $Bdir describe --always`
Bweb=`git_upstream_url $Bdir`
out=fsbtree.go
echo "// Code generated by gen-fsbtree from $b $Brev; DO NOT EDIT." >$out
echo "// (from patched version available at $Bweb)" >>$out
echo "//" >>$out
echo "// KEY=$KEY VALUE=$VALUE" >>$out
echo "// ---- 8< ----" >>$out
echo >>$out
make -s -C $Bdir generic |sed \
-e '/package b/a \\nimport "lab.nexedi.com/kirr/neo/go/zodb"' \
-e 's/package b/package fsb/g' \
-e "s/KEY/$KEY/g" \
-e "s/VALUE/$VALUE/g" \
\
-e "s/^\(\\s*kd = \).*\$/\\1$kd/g" \
\
-e '/cmp *Cmp$/d' \
-e 's/t\.cmp(/oidCmp(/g' \
-e 's/func TreeNew(cmp Cmp)/func TreeNew()/g' \
-e 's/btTPool.get(cmp)/btTPool.get()/g' \
-e 's/func (p \*btTpool) get(cmp Cmp)/func (p *btTpool) get()/g' \
-e '/x\.cmp = cmp$/d' \
>>$out
# also extract dump() routine
out=fsbtree_util.go
echo "// Code generated by gen-fsbtree from $b $Brev; DO NOT EDIT." >$out
echo "// (from patched version available at $Bweb)" >>$out
echo "//" >>$out
echo "// ---- 8< ----" >>$out
echo >>$out
cat >>$out <<EOF
package fsb
import (
"bytes"
"github.com/cznic/strutil" // XXX better to not depend on it
)
EOF
echo >>$out
sed -n \
-e '/^func isNil(/,/^}/p' \
-e '/^func.* dump()/,/^}/ {
s/dump()/Dump()/g
s/v\.k != nil/v.k != 0/g
s/v\.v != nil/v.v != 0/g
p
}' \
\
$Bdir/all_test.go \
>>$out
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment