Commit 1bb14faf authored by Jim Fulton's avatar Jim Fulton

Added fsIndex save method and fsIndex load class method for saving and

loading index data.  This leverages the new fsBucket toString and
fromString methods and provides much faster FileStorage index saving and loading
and smaller index files.  On my machine, saves are 5 times faster and
loads are 20 times faster (after a save, when data are in disk
cache).  Indexes are roughly 30% smaller.

The index format has changed.  Old indexes can be read just fine, but
new indexes won't be readable by older versions of ZODB.
parent ef3e0f10
......@@ -2,12 +2,16 @@
Change History
================
3.10.0a1 (2009-12-??)
3.10.0a1 (2010-02-??)
=====================
New Features
------------
- FileStorage indexes use a new format. They are saved and loaded much
faster and take less space. Old indexes can still be read, but new
indexes won't be readable by older versions of ZODB.
- The API for undoing multiple transactions has changed. To undo
multiple transactions in a single transaction, pass pass a list of
transaction identifiers to a database's undoMultiple method. Calling a
......
......@@ -246,23 +246,7 @@ class FileStorage(
index_name = self.__name__ + '.index'
tmp_name = index_name + '.index_tmp'
f=open(tmp_name,'wb')
p=Pickler(f,1)
# Pickle the index buckets first to avoid deep recursion:
buckets = []
bucket = self._index._data._firstbucket
while bucket is not None:
buckets.append(bucket)
bucket = bucket._next
buckets.reverse()
info=BTrees.OOBTree.Bucket(dict(
_buckets=buckets, index=self._index, pos=self._pos))
p.dump(info)
f.flush()
f.close()
self._index.save(self._pos, tmp_name)
try:
try:
......@@ -357,19 +341,15 @@ class FileStorage(
file_name=self.__name__
index_name=file_name+'.index'
try:
f = open(index_name, 'rb')
except:
if os.path.exists(index_name):
try:
info = fsIndex.load(index_name)
except:
logger.exception('loading index')
return None
else:
return None
p=Unpickler(f)
try:
info=p.load()
except:
exc, err = sys.exc_info()[:2]
logger.warning("Failed to load database index: %s: %s", exc, err)
return None
index = info.get('index')
pos = info.get('pos')
if index is None or pos is None:
......
......@@ -39,6 +39,7 @@
# bytes back before using u64 to convert the data back to (long)
# integers.
import cPickle
import struct
from BTrees._fsBTree import fsBucket
......@@ -62,12 +63,62 @@ def prefix_minus_one(s):
class fsIndex(object):
def __init__(self):
def __init__(self, data=None):
self._data = OOBTree()
if data:
self.update(data)
def __getstate__(self):
return dict(
state_version = 1,
_data = [(k, v.toString())
for (k, v) in self._data.iteritems()
]
)
def __setstate__(self, state):
version = state.pop('state_version', 0)
getattr(self, '_setstate_%s' % version)(state)
def _setstate_0(self, state):
self.__dict__.clear()
self.__dict__.update(state)
def _setstate_1(self, state):
self._data = OOBTree([
(k, fsBucket().fromString(v))
for (k, v) in state['_data']
])
def __getitem__(self, key):
return str2num(self._data[key[:6]][key[6:]])
def save(self, pos, fname):
with open(fname, 'wb') as f:
pickler = cPickle.Pickler(f, 1)
pickler.fast = True
pickler.dump(pos)
for k, v in self._data.iteritems():
pickler.dump((k, v.toString()))
pickler.dump(None)
@classmethod
def load(class_, fname):
with open(fname, 'rb') as f:
unpickler = cPickle.Unpickler(f)
pos = unpickler.load()
if not isinstance(pos, (int, long)):
return pos # Old format
index = class_()
data = index._data
while 1:
v = unpickler.load()
if not v:
break
k, v = v
data[k] = fsBucket().fromString(v)
return dict(pos=pos, index=index)
def get(self, key, default=None):
tree = self._data.get(key[:6], default)
if tree is default:
......
......@@ -11,6 +11,7 @@
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import cPickle
import os, unittest
import transaction
import ZODB.FileStorage
......@@ -19,6 +20,7 @@ import ZODB.tests.util
import zope.testing.setupstack
from ZODB import POSException
from ZODB import DB
from ZODB.fsIndex import fsIndex
from ZODB.tests import StorageTestBase, BasicStorage, TransactionalUndoStorage
from ZODB.tests import PackableStorage, Synchronization, ConflictResolution
......@@ -69,7 +71,6 @@ class FileStorageTests(
self.fail("expect long user field to raise error")
def check_use_fsIndex(self):
from ZODB.fsIndex import fsIndex
self.assertEqual(self._storage._index.__class__, fsIndex)
......@@ -78,21 +79,13 @@ class FileStorageTests(
def convert_index_to_dict(self):
# Convert the index in the current .index file to a Python dict.
# Return the index originally found.
import cPickle as pickle
f = open('FileStorageTests.fs.index', 'r+b')
p = pickle.Unpickler(f)
data = p.load()
data = fsIndex.load('FileStorageTests.fs.index')
index = data['index']
newindex = dict(index)
data['index'] = newindex
f.seek(0)
f.truncate()
p = pickle.Pickler(f, 1)
p.dump(data)
f.close()
cPickle.dump(data, open('FileStorageTests.fs.index', 'wb'), 1)
return index
def check_conversion_to_fsIndex(self, read_only=False):
......
......@@ -11,11 +11,13 @@
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import unittest
import doctest
import random
import unittest
from ZODB.fsIndex import fsIndex
from ZODB.utils import p64, z64
from ZODB.tests.util import setUp, tearDown
class Test(unittest.TestCase):
......@@ -30,7 +32,7 @@ class Test(unittest.TestCase):
index = self.index
self.assert_(p64(1000) in index)
self.assert_(p64(100*1000) in index)
del self.index[p64(1000)]
del self.index[p64(100*1000)]
......@@ -186,9 +188,44 @@ class Test(unittest.TestCase):
self.assertEqual(index.minKey(b), c)
self.assertRaises(ValueError, index.minKey, d)
def test_suite():
loader=unittest.TestLoader()
return loader.loadTestsFromTestCase(Test)
def fsIndex_save_and_load():
"""
fsIndex objects now have save methods for saving them to disk in a new
format. The fsIndex class has a load class method that can load data.
Let's start by creating an fsIndex. We'll bother to allocate the
object ids to get multiple buckets:
>>> index = fsIndex(dict((p64(i), i) for i in xrange(0, 1<<28, 1<<15)))
>>> len(index._data)
4096
Now, we'll save the data to disk and then load it:
if __name__=='__main__':
unittest.TextTestRunner().run(test_suite())
>>> index.save(42, 'index')
Note that we pass a file position, which gets saved with the index data.
>>> info = fsIndex.load('index')
>>> info['pos']
42
>>> info['index'].__getstate__() == index.__getstate__()
True
If we save the data in the old format, we can still read it:
>>> import cPickle
>>> cPickle.dump(dict(pos=42, index=index), open('old', 'wb'), 1)
>>> info = fsIndex.load('old')
>>> info['pos']
42
>>> info['index'].__getstate__() == index.__getstate__()
True
"""
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test))
suite.addTest(doctest.DocTestSuite(setUp=setUp, tearDown=tearDown))
return suite
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment