Commit 850703f8 authored by Levin Zimmermann's avatar Levin Zimmermann

ZBigFile: Add ZBlk format option 'h' (heuristic)

There are two formats to save data with a ZBigFile: ZBlk0 and ZBlk1.
They differ by adjusting the ratio between access-time and growing
disk-space, where ZBlk1 is better regarding to disk space, while ZBlk0
has a better access-time. Wendelin.core users may not always know yet or
care which format fits better for their data. In this case it may be
easier for users to just let the program automatically select the ZBlk
format. With this patch and the new 'h' (for heuristic) option of the
'ZBlk' argument of ZBigFile, this is now possible. The 'h' option isn't
really a new ZBlk format in itself, but it just tries to automatically
select the best ZBlk format option according to the characteristics
of the changes that the user applies to the ZBigFile.

With this patch comes a test (bigfile/tests/test-zblk-fmt) that creates
benchmarks for different combinations and zblk formats. The test aims
at checking that the 'heuristic' format behaves mostly as good as the
explicitly set formats:

Use only a very small change size, so that heuristic always uses ZBlk1
---------------------------------------------
---------------------------------------------
Set change_size_set to 20
Set change_count to 500
Set arrsize to 1000000

Run tests with format h:

	ZODB storage size: 20.388751 MB
	Access time: 3.83012294769

Run tests with format ZBlk0:

	ZODB storage size: 1064.636095 MB
	Access time: 3.63488578796

Run tests with format ZBlk1:

	ZODB storage size: 18.59421 MB
	Access time: 3.93918204308

---------------------------------------------
---------------------------------------------

Use only a very big change size, so that heuristic always uses ZBlk0
---------------------------------------------
---------------------------------------------
Set change_size_set to 200000
Set change_count to 500
Set arrsize to 1000000

Run tests with format h:

	ZODB storage size: 2113.32534 MB
	Access time: 3.79592084885

Run tests with format ZBlk0:

	ZODB storage size: 2113.254473 MB
	Access time: 3.76431703568

Run tests with format ZBlk1:

	ZODB storage size: 1651.236315 MB
	Access time: 4.11528992653

---------------------------------------------
---------------------------------------------

Mix between change size so that heuristic switches between ZBlk0 and ZBlk1
---------------------------------------------
---------------------------------------------
Set change_size_set to 20,200000
Set change_count to 500
Set arrsize to 1000000

Run tests with format h:

	ZODB storage size: 820.17736 MB
	Access time: 3.85217094421

Run tests with format ZBlk0:

	ZODB storage size: 1576.361791 MB
	Access time: 3.65322995186

Run tests with format ZBlk1:

	ZODB storage size: 815.323463 MB
	Access time: 3.96401691437

---------------------------------------------
---------------------------------------------
parent 743937a4
......@@ -414,14 +414,7 @@ class ZBlk1(ZBlkBase):
break
# scan over buf and update/delete changed chunks
for start in range(0, len(buf), CHUNKSIZE):
data = buf[start:start+CHUNKSIZE] # FIXME copy on py2
# make sure data is bytes
# (else we cannot .rstrip() it below)
if not isinstance(data, bytes):
data = bytes(data) # FIXME copy on py3
# trim trailing \0
data = data.rstrip(b'\0') # FIXME copy
for data, start in _buf_iterator(buf, CHUNKSIZE):
chunk = chunktab.get(start)
# all 0 -> make sure to remove chunk
......@@ -511,22 +504,23 @@ class ZBigFile(LivePersistent):
def __init__(self, blksize, zblk_fmt=""):
LivePersistent.__init__(self)
self.__setstate__((blksize, LOBTree(), zblk_fmt)) # NOTE L enough for blk_t
self.__setstate__((blksize, LOBTree(), zblk_fmt, 0, 0)) # NOTE L enough for blk_t
self.zblk_fmt = zblk_fmt # Evoke check if zblk_fmt is valid
# state is (.blksize, .blktab, .zblk_fmt)
# state is (.blksize, .blktab, .zblk_fmt, .zblk_fmt0_counter .zblk_fmt1_counter)
def __getstate__(self):
return (self.blksize, self.blktab, self.zblk_fmt)
return (self.blksize, self.blktab, self.zblk_fmt, self.zblk_fmt0_counter, self.zblk_fmt1_counter)
def __setstate__(self, state):
state_length = len(state)
# NOTE set _zblk_fmt instead of zblk_fmt to avoid check => ↑ performance
if state_length == 2: # BBB
self.blksize, self.blktab = state
self._zblk_fmt = ""
elif state_length == 3:
self.blksize, self.blktab, self._zblk_fmt = state
self.__setstate__(tuple(state) + ("", 0, 0))
elif state_length == 3: # BBB
self.__setstate__(tuple(state) + (0, 0))
elif state_length == 5:
# NOTE set _zblk_fmt instead of zblk_fmt to avoid check => ↑ performance
self.blksize, self.blktab, self._zblk_fmt, self.zblk_fmt0_counter, self.zblk_fmt1_counter = state
else:
raise RuntimeError("E: Unexpected state length: %s" % state)
self._v_file = _ZBigFile._new(self, self.blksize)
......@@ -555,7 +549,10 @@ class ZBigFile(LivePersistent):
# store data dirty page -> ZODB obj
def storeblk(self, blk, buf):
zblk = self.blktab.get(blk)
zblk_type_write = ZBlk_fmt_registry[self.zblk_fmt or ZBlk_fmt_write]
zblk_fmt = self.zblk_fmt
if zblk_fmt == "h": # apply heuristic
zblk_fmt = self._zblk_fmt_heuristic(zblk, buf)
zblk_type_write = ZBlk_fmt_registry[zblk_fmt or ZBlk_fmt_write]
# if zblk was absent or of different type - we (re-)create it anew
if zblk is None or \
type(zblk) is not zblk_type_write:
......@@ -576,6 +573,43 @@ class ZBigFile(LivePersistent):
zblk.bindzfile(self, blk)
# Heuristically determine zblk format by optimizing
# storage-space/access-speed ratio. Both can't be ideal, see
# module docstring: "Due to weakness of current ZODB storage
# servers, wendelin.core cannot provide at the same time both
# fast reads and small database size growth ..."
def _zblk_fmt_heuristic(self, zblk, buf):
# If the heuristic often switches between ZBlk0 and ZBlk1 the
# access time is even worse than when using only ZBlk1. Therefore
# the heuristic keeps track on how often the ZBlk format is changed.
# If it's more frequently changing than being stable, it switches
# forever to ZBlk1 and doesn't apply the heuristic anymore.
c0, c1 = self.zblk_fmt0_counter, self.zblk_fmt1_counter
try:
zblk_fmt_ratio = c0 / c1 if c1 > c0 else c1 / c0
except ZeroDivisionError:
zblk_fmt_ratio = 0
if zblk_fmt_ratio > 0.5: # Switch forever to ZBlk1
self.zblk_fmt = zblk_fmt = 'ZBlk1'
return zblk_fmt
if zblk is None: # no data yet => can't make any assumptions yet
return "ZBlk0"
else:
# We already commited our first data. Now let's
# see whether it's better to use ZBlk0 or ZBlk1.
p = _change_percentage(zblk, buf)
if p > 0.5: # more than half of all chunks changed
# Pick ZBlk0 in case of wide change: ZBlk1 advantage of
# a smaller disk footprint isn't so strong then:
# we can go for a faster read access with ZBlk0.
self.zblk_fmt0_counter += 1
return 'ZBlk0'
else:
self.zblk_fmt1_counter += 1
return 'ZBlk1'
# invalidate data .blktab[blk] invalidated -> invalidate page
def invalidateblk(self, blk):
for fileh in self._v_filehset:
......@@ -622,7 +656,7 @@ class ZBigFile(LivePersistent):
@zblk_fmt.setter
def zblk_fmt(self, zblk_fmt):
if zblk_fmt and zblk_fmt not in ZBlk_fmt_registry:
if zblk_fmt and zblk_fmt != "h" and zblk_fmt not in ZBlk_fmt_registry:
raise RuntimeError('E: Unknown ZBlk format %r' % zblk_fmt)
self._zblk_fmt = zblk_fmt
......@@ -851,3 +885,61 @@ class _ZBigFileH(object):
# and also more right - tpc_finish is there assumed as non-failing by
# ZODB design)
self.abort(txn)
# Utility functions for zblk
# Percentage how much the page changed to previous commit:
# 0.0 = nothing changed
# 0.5 = half of data changed
# 1.0 = all data changed
def _change_percentage(zblk, buf):
if type(zblk) == ZBlk0:
CHUNKSIZE = 4096
chunktab = _adhoc_chunktab(zblk.loadblkdata(), CHUNKSIZE)
else:
chunktab, CHUNKSIZE = zblk.chunktab, zblk.CHUNKSIZE
chunk_count = len(buf) / CHUNKSIZE
change_count = _count_changes(buf, chunktab, CHUNKSIZE)
return change_count / float(chunk_count)
# Count how many chunks changed to previous commit.
def _count_changes(buf, chunktab, CHUNKSIZE):
change_count = 0
for data, start in _buf_iterator(buf, CHUNKSIZE):
chunk = chunktab.get(start)
if data:
if chunk is None:
change_count += 1
elif chunk.data != data:
change_count += 1
elif chunk is not None: # and not data
change_count += 1
return change_count
# Create chunktab from buffer with chunk objects
# that mimic ZData objects.
def _adhoc_chunktab(buf, CHUNKSIZE):
class chunk(): # mimic ZData
def __init__(self, data):
self.data = data
chunktab = {}
for data, start in _buf_iterator(buf, CHUNKSIZE):
chunktab[start] = chunk(data)
return chunktab
# Iterate over buffer and yield chunks and start position
def _buf_iterator(buf, CHUNKSIZE):
for start in range(0, len(buf), CHUNKSIZE):
data = buf[start:start+CHUNKSIZE]
# make sure data is bytes
# (else we cannot .rstrip() it below)
if not isinstance(data, bytes):
data = bytes(data) # FIXME copy on py3
# trim trailing \0
data = data.rstrip(b'\0') # FIXME copy
yield data, start
# Copyright (C) 2023 Nexedi SA and Contributors.
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
# Test to compare disk-space and access-speed of the different ZBlk format options:
#
# - ZBlk0
# - ZBlk1
# - h
#
# The heuristic 'h' should behave as good as ZBlk0 in case of wide changes
# and as good as ZBlk1 in case of small changes.
import os
import random
import resource
import tempfile
import timeit
import sys
# Add relative module path, to run tests on local code
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '.'))
from golang import defer, func
import numpy as np
import transaction
import ZODB, ZODB.FileStorage
from wendelin.bigarray.array_zodb import ZBigArray
random.seed(10)
# Avoid error due to too many opened file descriptors.
cur_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
new_limit = (cur_limit[1], cur_limit[1])
resource.setrlimit(resource.RLIMIT_NOFILE, new_limit)
storage_path = tempfile.mktemp()
# Declare test parameters.
zblk_fmt = os.environ.get('zblk_fmt', 'h')
change_size_set = tuple(int(n) for n in os.environ.get('change_size_set', '20').split(','))
change_count = int(os.environ.get('change_count', '1000'))
arrsize = int(os.environ.get('arrsize', '1000000'))
# Utiliy functions
def randarr(size=1000000):
return np.array([[random.randint(1, 1000), random.randint(1, 1000)] for _ in range(size)])
def setrand(A, size=20):
A[0:size][:] = randarr(size)
transaction.commit()
def accessrand(A, size=1000):
n = random.randint(0, arrsize - size)
a = A[n:n+size]
@func
def root(func):
storage = ZODB.FileStorage.FileStorage(storage_path)
db = ZODB.DB(storage)
connection = db.open()
root = connection.root
defer(connection.close)
defer(db.close)
defer(storage.close)
func(root)
@root
def setup(root):
root.A = A = ZBigArray(shape=[1, 2], dtype=int, zblk_fmt=zblk_fmt)
transaction.commit()
@root
def fillup(root):
A = root.A
values = randarr(arrsize)
A.append(values)
transaction.commit()
@root
def change(root):
A = root.A
for _ in range(change_count):
change_size = random.choice(change_size_set)
setrand(A, change_size)
transaction.commit()
def access():
@root
def _(root):
accessrand(root.A)
def statistics():
print("\tZODB storage size: %s MB" % (os.path.getsize(storage_path) / float(10**6)))
random.seed(10)
time = timeit.timeit("access()", "from __main__ import access", number=5000)
print("\tAccess time: %s" % time)
statistics()
#!/usr/bin/env bash
# Copyright (C) 2023 Nexedi SA and Contributors.
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
# Test to compare disk-space and access-speed of the different ZBlk format options:
#
# - ZBlk0
# - ZBlk1
# - h
#
# The heuristic 'h' should behave as good as ZBlk0 in case of wide changes
# and as good as ZBlk1 in case of small changes.
function test {
function t {
zblkfmt=$1
echo "Run tests with format $zblkfmt:"
echo ""
export zblk_fmt=$zblkfmt
python bigfile/tests/_test_zblk_fmt
echo ""
echo ""
}
change_size_set=$1
change_count=$2
arrsize=$3
echo "---------------------------------------------"
echo "---------------------------------------------"
echo "Set change_size_set to $change_size_set"
echo "Set change_count to $change_count"
echo "Set arrsize to $arrsize"
echo ""
export change_size_set=$change_size_set
export change_count=$change_count
export arrsize=$arrsize
t h
t ZBlk0
t ZBlk1
echo ""
echo "---------------------------------------------"
echo "---------------------------------------------"
echo ""
}
echo "Use only a very small change size, so that heuristic always uses ZBlk1"
test 20 500 1000000
echo "Use only a very big change size, so that heuristic always uses ZBlk0"
test 200000 500 1000000
echo "Mix between change size so that heuristic switches between ZBlk0 and ZBlk1"
test 20,200000 500 1000000
# Wendelin.core.bigfile | Tests for ZODB BigFile backend
# Copyright (C) 2014-2021 Nexedi SA and Contributors.
# Copyright (C) 2014-2023 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
......@@ -716,3 +716,27 @@ def test_bigfile_set_zblk_fmt():
transaction.commit()
assert type(f.blktab[0]) is file_zodb.ZBlk1
# Minimal test to ensure normal operations work as expected
# with zblk fmt 'h'
@func
def test_bigfile_zblk_fmt_heuristic():
root = dbopen()
defer(lambda: dbclose(root))
root['zfile8'] = f = ZBigFile(blksize, zblk_fmt="h")
transaction.commit()
fh = f.fileh_open()
vma = fh.mmap(0, blen)
b = Blk(vma, 0)
b[:] = 1
transaction.commit()
assert (b == 1).all()
b[0] = 2
transaction.commit()
assert b[0] == 2
......@@ -354,37 +354,42 @@ func (zb *ZBlk1) LoadBlkData(ctx context.Context) (_ []byte, _ zodb.Tid, err err
type ZBigFile struct {
zodb.Persistent
// state: (.blksize, .blktab, .zblk_fmt)
blksize int64
blktab *btree.LOBTree // {} blk -> ZBlk*(blkdata)
zblk_fmt string
// state: (.blksize, .blktab, .zblk_fmt, .zblk_fmt0_counter, .zblk_fmt1_counter)
blksize int64
blktab *btree.LOBTree // {} blk -> ZBlk*(blkdata)
zblk_fmt string
zblk_fmt0_counter int64
zblk_fmt1_counter int64
}
type zBigFileState ZBigFile // hide state methods from public API
// DropState implements zodb.Ghostable.
func (bf *zBigFileState) DropState() {
bf.blksize = 0
bf.blktab = nil
bf.zblk_fmt = ""
bf.blksize = 0
bf.blktab = nil
bf.zblk_fmt = ""
bf.zblk_fmt0_counter = 0
bf.zblk_fmt1_counter = 0
}
// PyGetState implements zodb.PyStateful.
func (bf *zBigFileState) PyGetState() interface{} {
return pickle.Tuple{bf.blksize, bf.blktab, bf.zblk_fmt}
return pickle.Tuple{bf.blksize, bf.blktab, bf.zblk_fmt, bf.zblk_fmt0_counter, bf.zblk_fmt1_counter}
}
// PySetState implements zodb.PyStateful.
func (bf *zBigFileState) PySetState(pystate interface{}) (err error) {
t, ok := pystate.(pickle.Tuple)
if !ok {
return fmt.Errorf("expect [2|3](); got %s", xzodb.TypeOf(pystate))
return fmt.Errorf("expect [2|3|5](); got %s", xzodb.TypeOf(pystate))
}
// BBB: we either accept data before adding zblk_fmt to state
// (lent==2) or data after adding zblk_fmt to state (lent==3).
// (lent==2) or data after adding zblk_fmt to state (lent==3) or
// data after adding zblk_fmt counter (lent==5).
lent := len(t)
if lent != 2 && lent != 3 {
return fmt.Errorf("expect [2|3](); got [%d]()", len(t))
if lent != 2 && lent != 3 && lent != 5 {
return fmt.Errorf("expect [2|3|5](); got [%d]()", len(t))
}
blksize, ok := pycompat.Int64(t[0])
......@@ -409,6 +414,21 @@ func (bf *zBigFileState) PySetState(pystate interface{}) (err error) {
return fmt.Errorf("zblk_fmt: expect str; got %s", xzodb.TypeOf(t[2]))
}
bf.zblk_fmt = zblk_fmt
if lent == 5 {
zblk_fmt0_counter, ok := pycompat.Int64(t[3])
if !ok {
return fmt.Errorf("zblk_fmt0_counter: expect integer; got %s", xzodb.TypeOf(t[3]))
}
zblk_fmt1_counter, ok := pycompat.Int64(t[4])
if !ok {
return fmt.Errorf("zblk_fmt1_counter: expect integer; got %s", xzodb.TypeOf(t[4]))
}
bf.zblk_fmt0_counter = zblk_fmt0_counter
bf.zblk_fmt1_counter = zblk_fmt1_counter
}
}
return nil
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment