Commit 2c152d41 authored by Kirill Smelkov's avatar Kirill Smelkov

wcfs: Add zdata package to load ZBlk/ZBigFile data

Add functionality to load objects from ZODB as saved by py wendelin.core.
Mostly straightforward code.
The main part is in zblk.go .

Contrary to python implementation, go can load ZBlk1's subobjects in
parallel, which, given scalable ZODB storage, can be significantly
faster compared to serially loading all ZData subobjects as py code
does.

TODO test wrt data saved by Python3.

Some preliminary history:

878b2787 X draft loading
bf9a7405 X No longer rely on ZODB cache invariant for invalidations
0d62b05e X Adjust to btree.VGet & friends signature change to include keycov in visit callback
b74dda09 X Start switching Track from Track(key) to Track(keycov)
parent 2163fcaf
module lab.nexedi.com/nexedi/wendelin.core/wcfs
go 1.14
require (
github.com/johncgriffin/overflow v0.0.0-20211019200055-46fa312c352c
github.com/kisielk/og-rek v1.1.1-0.20210310094122-8def3d024dac
github.com/stretchr/testify v1.7.0
lab.nexedi.com/kirr/go123 v0.0.0-20210906140734-c9eb28d9e408
lab.nexedi.com/kirr/neo/go v0.0.0-20211004111643-c74a5a3cd0d0
)
This diff is collapsed.
// XXX dup from neo/go/zodb/internal/pickletools
package pycompat
import (
"math/big"
)
// Int64 tries to convert unpickled Python value to int64.
//
// (ogórek decodes python long as big.Int)
//
// XXX + support for float?
func Int64(xv interface{}) (v int64, ok bool) {
switch v := xv.(type) {
case int64:
return v, true
case *big.Int:
if v.IsInt64() {
return v.Int64(), true
}
}
return 0, false
}
// Copyright (C) 2018-2021 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
// Package xzodb compements package zodb.
package xzodb
import (
"fmt"
"lab.nexedi.com/kirr/neo/go/zodb"
)
// TypeOf returns string for object's type.
//
// - for ZODB objects, it uses zodb.ClassOf, which in particular supports
// printing zodb.Broken with details properly.
//
// - for other objects, it uses %T.
func TypeOf(obj interface{}) string {
switch obj := obj.(type) {
case zodb.IPersistent:
return zodb.ClassOf(obj)
default:
return fmt.Sprintf("%T", obj)
}
}
// Copyright (C) 2018-2021 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
package zdata
import (
"fmt"
"lab.nexedi.com/kirr/neo/go/zodb"
)
// tidmax returns latest revision.
func tidmax(a, b zodb.Tid) zodb.Tid {
if a > b {
return a
} else {
return b
}
}
// tidmin returns earliest revision.
func tidmin(a, b zodb.Tid) zodb.Tid {
if a < b {
return a
} else {
return b
}
}
func panicf(format string, argv ...interface{}) {
panic(fmt.Sprintf(format, argv...))
}
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2021 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
"""zblk_test_gen.py generates test data for zblk_test.go"""
from ZODB.DB import DB
from ZODB.utils import u64
from wendelin.bigfile.file_zodb import ZBlk0, ZBlk1, ZBigFile
from BTrees.IOBTree import IOBTree, IOBucket
from numpy import arange
import os, os.path, transaction
import zodbtools.test.gen_testdata # to make time predictable
from zodbtools.test.gen_testdata import run_with_zodb4py2_compat
K = 1024
def main():
run_with_zodb4py2_compat(main2)
def main2():
outfs = "testdata/zblk.fs"
rm_f(outfs)
rm_f(outfs + ".index")
db = DB(outfs)
conn = db.open()
root = conn.root()
# one ZBigFile with two data blocks
root['zbigf'] = zf = ZBigFile(2*1024*K)
root['zblk0'] = z0 = ZBlk0()
root['zblk1'] = z1 = ZBlk1() # also covers ZData
# zblk0 with small data
z0.setblkdata(brange32(16*K))
# zblk1 with head + large hole + tail.
# head and tail are 128K because if smaller, z1.chunktab would serialize without buckets.
z1ht = 128*K
z1.setblkdata(brange32(z1ht) + bzeros(zf.blksize - 2*z1ht) + breverse(brange32(z1ht)))
zf.blktab[1] = z0
zf.blktab[3] = z1
# make sure there is at least one bucket in z1's chunktab
#
# we need at least one bucket to verify how zblk.go handles chunktab bucket loading;
# more than 1 bucket just wastes space, however with only 1 bucket a tree
# is serialized to have leaf nodes directly in the tree state.
# -> verify for ==2 buckets.
assertIOBTreeHas2Buckets(z1.chunktab)
transaction.commit()
with open("ztestdata_zblk_test.go", "w") as f:
def emit(v):
print >>f, v
emit("// Code generated by %s; DO NOT EDIT." % __file__)
emit("package zdata\n")
emit('import "lab.nexedi.com/kirr/neo/go/zodb"\n')
emit("const zf_blksize = %d" % zf.blksize)
emit("const zf_size = %d" % ((zf.blktab.maxKey()+1)*zf.blksize))
emit("const z0_oid = zodb.Oid(%d)" % u64(z0._p_oid))
emit("const z1_oid = zodb.Oid(%d)" % u64(z1._p_oid))
emit("const zf_oid = zodb.Oid(%d)" % u64(zf._p_oid))
emit("const z0_rev = zodb.Tid(0x%x)" % u64(z0._p_serial))
emit("const z1_rev = zodb.Tid(0x%x)" % u64(z1._p_serial))
emit("const z0_len = %d" % len(z0.loadblkdata()))
emit("const z1_htlen = %d" % z1ht)
conn.close()
db.close()
# brange32 returns bytes with big-endian uint32 sequence filling them.
# returned bytes has len == size.
def brange32(size):
# 0, 1, 2, ... as u32
return arange(0, size//4, dtype='>u4').tobytes()
# bzeros returns bytes of requested size with 0 filling them.
def bzeros(size):
return b'\0'*size
# breverse returns bytes in the reverse order.
def breverse(b):
assert isinstance(b, bytes)
_ = bytearray(b)
_.reverse()
return bytes(_)
# assertIOBTreeHas2Buckets asserts that IOBTree has 2 buckets.
def assertIOBTreeHas2Buckets(t):
assert isinstance(t, IOBTree)
# https://github.com/zopefoundation/BTrees/blob/4.5.0-1-gc8bf24e/BTrees/BTreeTemplate.c#L1087-L1109
_ = t.__getstate__()
assert len(_) == 2, (len(_), _)
assert len(_[0]) == 3, _[0] # (bucket0, key1, bucket1)
assert isinstance(_[0][0], IOBucket), _[0][0] # bucket0
assert isinstance(_[0][1], int), _[0][2] # key1
assert isinstance(_[0][2], IOBucket), _[0][2] # bucket1
assert isinstance(_[1], IOBucket), _[1] # .firstbucket
# rm_f is like `rm -f` in shell.
def rm_f(path):
if os.path.exists(path):
os.remove(path)
if __name__ == '__main__':
main()
This diff is collapsed.
// Copyright (C) 2018-2021 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
package zdata
//go:generate ./testdata/zblk_test_gen.py
import (
"bytes"
"context"
"encoding/binary"
"testing"
"lab.nexedi.com/kirr/go123/exc"
"lab.nexedi.com/kirr/neo/go/transaction"
"lab.nexedi.com/kirr/neo/go/zodb"
_ "lab.nexedi.com/kirr/neo/go/zodb/wks"
"github.com/stretchr/testify/require"
)
// TestZBlk verifies that ZBlk* and ZBigFile saved by Python can be read correctly by Go.
// TODO also test with data saved by Python3.
func TestZBlk(t *testing.T) {
X := exc.Raiseif
assert := require.New(t)
ctx := context.Background()
stor, err := zodb.Open(ctx, "testdata/zblk.fs", &zodb.OpenOptions{ReadOnly: true}); X(err)
db := zodb.NewDB(stor, &zodb.DBOptions{})
defer func() {
err := db.Close(); X(err)
err = stor.Close(); X(err)
}()
txn, ctx := transaction.New(ctx)
defer txn.Abort()
conn, err := db.Open(ctx, &zodb.ConnOptions{}); X(err)
xz0, err := conn.Get(ctx, z0_oid); X(err)
xz1, err := conn.Get(ctx, z1_oid); X(err)
xzf, err := conn.Get(ctx, zf_oid); X(err)
z0, ok := xz0.(*ZBlk0)
if !ok {
t.Fatalf("z0: want ZBlk0; got %T", xz0)
}
z1, ok := xz1.(*ZBlk1)
if !ok {
t.Fatalf("z1: want ZBlk1; got %T", xz1)
}
zf, ok := xzf.(*ZBigFile)
if !ok {
t.Fatalf("zf: want ZBigFile; got %T", xzf)
}
xactivate := func(obj zodb.IPersistent) {
t.Helper()
err := obj.PActivate(ctx)
if err != nil {
t.Fatal(err)
}
}
z0Data, z0Rev, err := z0.LoadBlkData(ctx); X(err)
z0DataOK := brange32(z0_len)
assert.Equal(z0Data, z0DataOK, "ZBlk0 data wrong")
assert.Equal(z0Rev, z0_rev, "ZBlk0 rev wrong")
z1Data, z1Rev, err := z1.LoadBlkData(ctx); X(err)
z1DataOK := make([]byte, zf_blksize) // zeros
copy(z1DataOK[0:], brange32(z1_htlen)) // head
copy(z1DataOK[len(z1DataOK)-z1_htlen:], breverse(brange32(z1_htlen))) // tail
z1DataOK = bytes.TrimRight(z1DataOK, "\x00") // trailing 0 are not persisted
assert.Equal(z1Data, z1DataOK, "ZBlk1 data wrong")
assert.Equal(z1Rev, z1_rev, "ZBlk1 rev wrong")
xactivate(zf)
if zf.blksize != zf_blksize {
t.Fatalf("zf: blksize=%d; want %d", zf.blksize, zf_blksize)
}
z0_, ok, err := zf.blktab.Get(ctx, 1); X(err)
if !(ok && z0_ == z0) {
t.Fatalf("zf: [0] -> %#v; want z0", z0_)
}
z1_, ok, err := zf.blktab.Get(ctx, 3); X(err)
if !(ok && z1_ == z1) {
t.Fatalf("zf: [1] -> %#v; want z1", z1_)
}
size, _, _, err := zf.Size(ctx); X(err)
assert.Equal(size, int64(zf_size), "ZBigFile size wrong")
// LoadBlk
z0Data, _, _, _, _, err = zf.LoadBlk(ctx, 1); X(err)
assert.Equal(len(z0Data), int(zf.blksize))
z0Data = bytes.TrimRight(z0Data, "\x00")
assert.Equal(z0Data, z0DataOK)
z1Data, _, _, _, _, err = zf.LoadBlk(ctx, 3); X(err)
assert.Equal(len(z1Data), int(zf.blksize))
z1Data = bytes.TrimRight(z1Data, "\x00")
assert.Equal(z1Data, z1DataOK)
}
// TODO verify PyGetState vs PySetState
// brange32 returns bytes with big-endian uint32 sequence filling them.
// returned bytes has len == size.
func brange32(size int) []byte {
data := make([]byte, size)
for i := 0; i < size / 4; i++ {
binary.BigEndian.PutUint32(data[i*4:], uint32(i))
}
return data
}
// breverse returns bytes in the reverse order.
func breverse(b []byte) []byte {
r := make([]byte, len(b))
for i := range(b) {
r[i] = b[len(b)-i-1]
}
return r
}
// Code generated by ./testdata/zblk_test_gen.py; DO NOT EDIT.
package zdata
import "lab.nexedi.com/kirr/neo/go/zodb"
const zf_blksize = 2097152
const zf_size = 8388608
const z0_oid = zodb.Oid(2)
const z1_oid = zodb.Oid(3)
const zf_oid = zodb.Oid(1)
const z0_rev = zodb.Tid(0x285cbac3851eb99)
const z1_rev = zodb.Tid(0x285cbac3851eb99)
const z0_len = 16384
const z1_htlen = 131072
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment