Commit 2f8cc67a authored by Julien Muchembled's avatar Julien Muchembled

Make TransactionMetaData in charge of (de)serializing extension data

IStorage implementations used to do this task themselves which leads to code
duplication and sometimes bugs (one was fixed recently in NEO). Like for object
serialization, this should be done by the upper layer (Connection).

This commit also provides a way to get raw extensions data while iterating
over transactions (this is actually the original purpose[2]). So far, extension
data could only be retrieved unpickled, which caused several problems:

- tools like `zodb dump` [1] cannot dump data exactly as stored on a
  storage. This makes database potentially not bit-to-bit identical to
  its original after restoring from such dump.

- `zodb dump` output could be changing from run to run on the same
  database. This comes from the fact that e.g. python dictionaries are
  unordered and so when pickling a dict back to bytes the result could
  be not the same as original.

  ( this problem can be worked-around partly to work reliably for e.g.
    dict with str keys - by always emitting items in key sorted order,
    but it is hard to make it work reliably for arbitrary types )

Both issues make it hard to verify integrity of database at the lowest
possible level after restoration, and make it hard to verify bit-to-bit
compatibility with non-python ZODB implementations.

For this, TransactionMetaData gets a new 'extension_bytes' attribute and
and common usage becomes:

* Application committing a transaction:

  - 'extension' is set with a dictionary
  - the storage gets the bytes via 'extension_bytes'

* Iteration:

  - the storage passes bytes as 'extension' parameter of TransactionMetaData
  - the application can get extension data either as bytes ('extension_bytes')
    or deserialized ('extension'): in the former case, no deserialization
    happens and the returned value is exactly what was passed by the storage

[1] https://lab.nexedi.com/nexedi/zodbtools
[2] https://github.com/zopefoundation/ZODB/pull/183Co-Authored-By: Kirill Smelkov's avatarKirill Smelkov <kirr@nexedi.com>
parent 98dc5f1e
...@@ -18,6 +18,12 @@ ...@@ -18,6 +18,12 @@
rolled back. See `issue 268 rolled back. See `issue 268
<https://github.com/zopefoundation/ZODB/issues/268>`_. <https://github.com/zopefoundation/ZODB/issues/268>`_.
- Make TransactionMetaData in charge of (de)serializing extension data.
A new ``extension_bytes`` attribute converts automatically from
``extension``, or vice-versa. During storage iteration, ``extension_bytes``
holds bytes as they are stored (i.e. no deserialization happens).
See `issue 207 <https://github.com/zopefoundation/ZODB/pull/207>`_.
5.5.1 (2018-10-25) 5.5.1 (2018-10-25)
================== ==================
......
...@@ -31,7 +31,7 @@ from . import POSException, utils ...@@ -31,7 +31,7 @@ from . import POSException, utils
from .Connection import TransactionMetaData from .Connection import TransactionMetaData
from .utils import z64, oid_repr, byte_ord, byte_chr, load_current from .utils import z64, oid_repr, byte_ord, byte_chr, load_current
from .UndoLogCompatible import UndoLogCompatible from .UndoLogCompatible import UndoLogCompatible
from ._compat import dumps, _protocol, py2_hasattr from ._compat import py2_hasattr
log = logging.getLogger("ZODB.BaseStorage") log = logging.getLogger("ZODB.BaseStorage")
...@@ -190,11 +190,7 @@ class BaseStorage(UndoLogCompatible): ...@@ -190,11 +190,7 @@ class BaseStorage(UndoLogCompatible):
user = transaction.user user = transaction.user
desc = transaction.description desc = transaction.description
ext = transaction.extension ext = transaction.extension_bytes
if ext:
ext = dumps(ext, _protocol)
else:
ext = ""
self._ude = user, desc, ext self._ude = user, desc, ext
......
...@@ -49,10 +49,10 @@ from ZODB.utils import p64, u64, z64, oid_repr, positive_id ...@@ -49,10 +49,10 @@ from ZODB.utils import p64, u64, z64, oid_repr, positive_id
from ZODB import utils from ZODB import utils
import six import six
from ._compat import dumps, loads, _protocol
from .mvccadapter import HistoricalStorageAdapter from .mvccadapter import HistoricalStorageAdapter
from . import valuedoc from . import valuedoc
from . import _compat
global_reset_counter = 0 global_reset_counter = 0
...@@ -1303,10 +1303,24 @@ large-record-size option in a configuration file) to specify a larger ...@@ -1303,10 +1303,24 @@ large-record-size option in a configuration file) to specify a larger
size. size.
""" """
class overridable_property(object):
"""
Same as property() with only a getter, except that setting a
value overrides the property rather than raising AttributeError.
"""
def __init__(self, func):
self.__doc__ = func.__doc__
self.func = func
def __get__(self, obj, cls):
return self if obj is None else self.func(obj)
@implementer(IStorageTransactionMetaData) @implementer(IStorageTransactionMetaData)
class TransactionMetaData(object): class TransactionMetaData(object):
def __init__(self, user=u'', description=u'', extension=b''): def __init__(self, user=u'', description=u'', extension=None):
if not isinstance(user, bytes): if not isinstance(user, bytes):
user = user.encode('utf-8') user = user.encode('utf-8')
self.user = user self.user = user
...@@ -1315,9 +1329,20 @@ class TransactionMetaData(object): ...@@ -1315,9 +1329,20 @@ class TransactionMetaData(object):
description = description.encode('utf-8') description = description.encode('utf-8')
self.description = description self.description = description
if not isinstance(extension, dict): if isinstance(extension, bytes):
extension = _compat.loads(extension) if extension else {} self.extension_bytes = extension
self.extension = extension else:
self.extension = {} if extension is None else extension
@overridable_property
def extension(self):
extension_bytes = self.extension_bytes
return loads(extension_bytes) if extension_bytes else {}
@overridable_property
def extension_bytes(self):
extension = self.extension
return dumps(extension, _protocol) if extension else b''
def note(self, text): # for tests def note(self, text): # for tests
text = text.strip() text = text.strip()
......
...@@ -1988,15 +1988,8 @@ class FileIterator(FileStorageFormatter): ...@@ -1988,15 +1988,8 @@ class FileIterator(FileStorageFormatter):
if h.status != "u": if h.status != "u":
pos = tpos + h.headerlen() pos = tpos + h.headerlen()
e = {}
if h.elen:
try:
e = loads(h.ext)
except:
pass
result = TransactionRecord(h.tid, h.status, h.user, h.descr, result = TransactionRecord(h.tid, h.status, h.user, h.descr,
e, pos, tend, self._file, tpos) h.ext, pos, tend, self._file, tpos)
# Read the (intentionally redundant) transaction length # Read the (intentionally redundant) transaction length
self._file.seek(tend) self._file.seek(tend)
......
...@@ -83,7 +83,6 @@ except ImportError: ...@@ -83,7 +83,6 @@ except ImportError:
import ZODB.FileStorage import ZODB.FileStorage
from ZODB.utils import u64, as_text from ZODB.utils import u64, as_text
from ZODB.FileStorage import TransactionRecord from ZODB.FileStorage import TransactionRecord
from ZODB._compat import loads
from persistent.TimeStamp import TimeStamp from persistent.TimeStamp import TimeStamp
...@@ -143,12 +142,9 @@ def read_txn_header(f, pos, file_size, outp, ltid): ...@@ -143,12 +142,9 @@ def read_txn_header(f, pos, file_size, outp, ltid):
pos = tpos+(23+ul+dl+el) pos = tpos+(23+ul+dl+el)
user = f.read(ul) user = f.read(ul)
description = f.read(dl) description = f.read(dl)
if el: ext = f.read(el)
try: e = loads(f.read(el))
except: e = {}
else: e = {}
result = TransactionRecord(tid, status, user, description, e, pos, tend, result = TransactionRecord(tid, status, user, description, ext, pos, tend,
f, tpos) f, tpos)
pos = tend pos = tend
......
...@@ -540,12 +540,19 @@ class IStorageTransactionMetaData(Interface): ...@@ -540,12 +540,19 @@ class IStorageTransactionMetaData(Interface):
Note that unlike transaction.interfaces.ITransaction, the ``user`` Note that unlike transaction.interfaces.ITransaction, the ``user``
and ``description`` attributes are bytes, not text. and ``description`` attributes are bytes, not text.
If either 'extension' or 'extension_bytes' is unset, it is automatically
converted for the other, which is set during __init__ depending of the
passed extension is of types bytes or not (the default value None sets {}).
The 2 attributes should not be set at the same time.
""" """
user = Attribute("Bytes transaction user") user = Attribute("Bytes transaction user")
description = Attribute("Bytes transaction Description") description = Attribute("Bytes transaction Description")
extension = Attribute( extension = Attribute(
"A dictionary carrying a transaction's extended_info data") "A dictionary carrying a transaction's extended_info data")
extension_bytes = Attribute(
"Serialization of 'extension' attribute"
" (verbatim bytes in case of storage iteration)")
def set_data(ob, data): def set_data(ob, data):
"""Hold data on behalf of an object """Hold data on behalf of an object
......
...@@ -49,6 +49,10 @@ class IteratorCompare(object): ...@@ -49,6 +49,10 @@ class IteratorCompare(object):
class IteratorStorage(IteratorCompare): class IteratorStorage(IteratorCompare):
# Override this in tests for storages that delegate to TransactionMetaData
# the task to (de)serialize extension data.
use_extension_bytes = False
def checkSimpleIteration(self): def checkSimpleIteration(self):
# Store a bunch of revisions of a single object # Store a bunch of revisions of a single object
self._oid = oid = self._storage.new_oid() self._oid = oid = self._storage.new_oid()
...@@ -85,14 +89,28 @@ class IteratorStorage(IteratorCompare): ...@@ -85,14 +89,28 @@ class IteratorStorage(IteratorCompare):
self.assertEqual(rec.data, None) self.assertEqual(rec.data, None)
def checkTransactionExtensionFromIterator(self): def checkTransactionExtensionFromIterator(self):
# It will be deserialized into a simple dict, which will be serialized
# differently. This simulates that 'dumps(loads(x), ...)' does not
# always return x.
class ext(dict):
def __reduce__(self):
return dict,(tuple(self.items()),)
ext = ext(foo=1)
oid = self._storage.new_oid() oid = self._storage.new_oid()
revid = self._dostore(oid, data=MinPO(1)) revid = self._dostore(oid, data=MinPO(1), extension=ext)
iter = self._storage.iterator() txn, = self._storage.iterator()
count = 0 self.assertEqual(txn.extension, ext)
for txn in iter: try:
self.assertEqual(txn.extension, {}) extension_bytes = txn.extension_bytes
count +=1 except AttributeError:
self.assertEqual(count, 1) # Volatile storages often don't serialize extension because it
# would be useless.
return
txn.extension = ext
if self.use_extension_bytes:
self.assertEqual(extension_bytes, txn.extension_bytes)
else:
self.assertNotEqual(extension_bytes, txn.extension_bytes)
def checkIterationIntraTransaction(self): def checkIterationIntraTransaction(self):
# TODO: Try this test with logging enabled. If you see something # TODO: Try this test with logging enabled. If you see something
...@@ -215,6 +233,7 @@ class IteratorDeepCompare(object): ...@@ -215,6 +233,7 @@ class IteratorDeepCompare(object):
eq(txn1.user, txn2.user) eq(txn1.user, txn2.user)
eq(txn1.description, txn2.description) eq(txn1.description, txn2.description)
eq(txn1.extension, txn2.extension) eq(txn1.extension, txn2.extension)
eq(txn1.extension_bytes, txn2.extension_bytes)
itxn1 = iter(txn1) itxn1 = iter(txn1)
itxn2 = iter(txn2) itxn2 = iter(txn2)
for rec1, rec2 in zip(itxn1, itxn2): for rec1, rec2 in zip(itxn1, itxn2):
......
...@@ -124,7 +124,7 @@ class StorageTestBase(ZODB.tests.util.TestCase): ...@@ -124,7 +124,7 @@ class StorageTestBase(ZODB.tests.util.TestCase):
ZODB.tests.util.TestCase.tearDown(self) ZODB.tests.util.TestCase.tearDown(self)
def _dostore(self, oid=None, revid=None, data=None, def _dostore(self, oid=None, revid=None, data=None,
already_pickled=0, user=None, description=None): already_pickled=0, user=None, description=None, extension=None):
"""Do a complete storage transaction. The defaults are: """Do a complete storage transaction. The defaults are:
- oid=None, ask the storage for a new oid - oid=None, ask the storage for a new oid
...@@ -144,7 +144,7 @@ class StorageTestBase(ZODB.tests.util.TestCase): ...@@ -144,7 +144,7 @@ class StorageTestBase(ZODB.tests.util.TestCase):
if not already_pickled: if not already_pickled:
data = zodb_pickle(data) data = zodb_pickle(data)
# Begin the transaction # Begin the transaction
t = TransactionMetaData() t = TransactionMetaData(extension=extension)
if user is not None: if user is not None:
t.user = user t.user = user
if description is not None: if description is not None:
......
...@@ -56,6 +56,8 @@ class FileStorageTests( ...@@ -56,6 +56,8 @@ class FileStorageTests(
ReadOnlyStorage.ReadOnlyStorage ReadOnlyStorage.ReadOnlyStorage
): ):
use_extension_bytes = True
def open(self, **kwargs): def open(self, **kwargs):
self._storage = ZODB.FileStorage.FileStorage('FileStorageTests.fs', self._storage = ZODB.FileStorage.FileStorage('FileStorageTests.fs',
**kwargs) **kwargs)
...@@ -77,7 +79,13 @@ class FileStorageTests( ...@@ -77,7 +79,13 @@ class FileStorageTests(
except POSException.StorageError: except POSException.StorageError:
pass pass
else: else:
self.fail("expect long user field to raise error") self.fail("expect long description field to raise error")
try:
self._dostore(extension={s: 1})
except POSException.StorageError:
pass
else:
self.fail("expect long extension field to raise error")
def check_use_fsIndex(self): def check_use_fsIndex(self):
......
...@@ -14,17 +14,18 @@ ...@@ -14,17 +14,18 @@
import unittest import unittest
import warnings import warnings
from .._compat import dumps, loads, _protocol from .._compat import dumps, loads
from ..Connection import TransactionMetaData from ..Connection import TransactionMetaData
class TransactionMetaDataTests(unittest.TestCase): class TransactionMetaDataTests(unittest.TestCase):
def test_basic(self): def test_basic(self):
t = TransactionMetaData( extension = dict(foo='FOO')
u'user\x80', u'description\x80', dict(foo='FOO')) t = TransactionMetaData(u'user\x80', u'description\x80', extension)
self.assertEqual(t.user, b'user\xc2\x80') self.assertEqual(t.user, b'user\xc2\x80')
self.assertEqual(t.description, b'description\xc2\x80') self.assertEqual(t.description, b'description\xc2\x80')
self.assertEqual(t.extension, dict(foo='FOO')) self.assertEqual(t.extension, extension)
self.assertEqual(loads(t.extension_bytes), extension)
with warnings.catch_warnings(record=True) as w: with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always") warnings.simplefilter("always")
self.assertEqual(t._extension, t.extension) self.assertEqual(t._extension, t.extension)
...@@ -33,14 +34,16 @@ class TransactionMetaDataTests(unittest.TestCase): ...@@ -33,14 +34,16 @@ class TransactionMetaDataTests(unittest.TestCase):
self.assertTrue("_extension is deprecated" in str(w[-1].message)) self.assertTrue("_extension is deprecated" in str(w[-1].message))
def test_basic_no_encoding(self): def test_basic_no_encoding(self):
t = TransactionMetaData( extension = dict(foo='FOO')
b'user', b'description', dumps(dict(foo='FOO'), _protocol)) extension_bytes = dumps(extension)
t = TransactionMetaData(b'user', b'description', extension_bytes)
self.assertEqual(t.user, b'user') self.assertEqual(t.user, b'user')
self.assertEqual(t.description, b'description') self.assertEqual(t.description, b'description')
self.assertEqual(t.extension, dict(foo='FOO')) self.assertEqual(t.extension, extension)
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
self.assertEqual(t._extension, t.extension) self.assertEqual(t._extension, t.extension)
self.assertIs(t.extension_bytes, extension_bytes)
def test_constructor_default_args(self): def test_constructor_default_args(self):
t = TransactionMetaData() t = TransactionMetaData()
...@@ -52,23 +55,28 @@ class TransactionMetaDataTests(unittest.TestCase): ...@@ -52,23 +55,28 @@ class TransactionMetaDataTests(unittest.TestCase):
self.assertEqual(t._extension, t.extension) self.assertEqual(t._extension, t.extension)
def test_set_extension(self): def test_set_extension(self):
t = TransactionMetaData(u'', u'', b'') data = {}
t = TransactionMetaData(u'', u'', data)
self.assertEqual(t.user, b'') self.assertEqual(t.user, b'')
self.assertEqual(t.description, b'') self.assertEqual(t.description, b'')
self.assertEqual(t.extension, {}) self.assertIs(t.extension, data)
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
self.assertEqual(t._extension, t.extension) self.assertEqual(t._extension, t.extension)
self.assertEqual(t.extension_bytes, b'')
for name in 'extension', '_extension': for name in 'extension', '_extension':
data = {name: name + 'foo'} data = {name: name + 'foo'}
setattr(t, name, data) setattr(t, name, data)
self.assertEqual(t.extension, data) self.assertIs(t.extension, data)
self.assertEqual(t._extension, t.extension) self.assertIs(t._extension, t.extension)
data = {} extension_bytes = t.extension_bytes
setattr(t, name, data) self.assertEqual(loads(extension_bytes), data)
self.assertEqual(t.extension, data) empty = {}
self.assertEqual(t._extension, t.extension) setattr(t, name, empty)
self.assertIs(t.extension, empty)
self.assertIs(t._extension, t.extension)
self.assertEqual(t.extension_bytes, b'')
def test_used_by_connection(self): def test_used_by_connection(self):
import ZODB import ZODB
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment