Commit 0718a45e authored by Jason Madden's avatar Jason Madden

Use a Python __reduce__ method to make pickles match C.

On it's own, this breaks on protocols >= 2 (including Python 3 by
default) because the pickled object's class is required to match the
reduce output. One fix would be to change the pickle format to use a
custom constructor function, but then we'd have different pickle formats
in C and Python.

Rather than do that, we instead create a __class__ property for the
python implementation. This bypasses the protocol limitations. The
__class__ property wasn't assignable in the C implementation, so the
compatibility risk should be very small.

All the special pickle machinery is only defined in Python if needed.
parent f1f88b6b
......@@ -14,7 +14,7 @@
__all__ = ('Bucket', 'Set', 'BTree', 'TreeSet',
'IFBucket', 'IFSet', 'IFBTree', 'IFTreeSet',
'union', 'intersection', 'difference',
'union', 'intersection', 'difference',
'weightedUnion', 'weightedIntersection', 'multiunion',
)
......@@ -38,6 +38,7 @@ from ._base import to_float as _to_value
from ._base import union as _union
from ._base import weightedIntersection as _weightedIntersection
from ._base import weightedUnion as _weightedUnion
from ._base import _fix_pickle
_BUCKET_SIZE = 120
_TREE_SIZE = 500
......@@ -134,4 +135,6 @@ Set = IFSet
BTree = IFBTree
TreeSet = IFTreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IIntegerFloatBTreeModule)
......@@ -14,7 +14,7 @@
__all__ = ('Bucket', 'Set', 'BTree', 'TreeSet',
'IIBucket', 'IISet', 'IIBTree', 'IITreeSet',
'union', 'intersection', 'difference',
'union', 'intersection', 'difference',
'weightedUnion', 'weightedIntersection', 'multiunion',
)
......@@ -38,6 +38,7 @@ from ._base import to_int as _to_value
from ._base import union as _union
from ._base import weightedIntersection as _weightedIntersection
from ._base import weightedUnion as _weightedUnion
from ._base import _fix_pickle
_BUCKET_SIZE = 120
_TREE_SIZE = 500
......@@ -135,4 +136,6 @@ Set = IISet
BTree = IIBTree
TreeSet = IITreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IIntegerIntegerBTreeModule)
......@@ -33,6 +33,7 @@ from ._base import set_operation as _set_operation
from ._base import to_int as _to_key
from ._base import to_ob as _to_value
from ._base import union as _union
from ._base import _fix_pickle
_BUCKET_SIZE = 60
_TREE_SIZE = 500
......@@ -113,4 +114,6 @@ Set = IOSet
BTree = IOBTree
TreeSet = IOTreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IIntegerObjectBTreeModule)
......@@ -14,7 +14,7 @@
__all__ = ('Bucket', 'Set', 'BTree', 'TreeSet',
'LFBucket', 'LFSet', 'LFBTree', 'LFTreeSet',
'union', 'intersection', 'difference',
'union', 'intersection', 'difference',
'weightedUnion', 'weightedIntersection', 'multiunion',
)
......@@ -38,6 +38,7 @@ from ._base import to_float as _to_value
from ._base import union as _union
from ._base import weightedIntersection as _weightedIntersection
from ._base import weightedUnion as _weightedUnion
from ._base import _fix_pickle
_BUCKET_SIZE = 120
_TREE_SIZE = 500
......@@ -135,4 +136,6 @@ Set = LFSet
BTree = LFBTree
TreeSet = LFTreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IIntegerFloatBTreeModule)
......@@ -14,7 +14,7 @@
__all__ = ('Bucket', 'Set', 'BTree', 'TreeSet',
'LLBucket', 'LLSet', 'LLBTree', 'LLTreeSet',
'union', 'intersection', 'difference',
'union', 'intersection', 'difference',
'weightedUnion', 'weightedIntersection', 'multiunion',
)
......@@ -38,6 +38,7 @@ from ._base import to_long as _to_value
from ._base import union as _union
from ._base import weightedIntersection as _weightedIntersection
from ._base import weightedUnion as _weightedUnion
from ._base import _fix_pickle
_BUCKET_SIZE = 120
_TREE_SIZE = 500
......@@ -135,4 +136,6 @@ Set = LLSet
BTree = LLBTree
TreeSet = LLTreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IIntegerIntegerBTreeModule)
......@@ -33,6 +33,7 @@ from ._base import set_operation as _set_operation
from ._base import to_long as _to_key
from ._base import to_ob as _to_value
from ._base import union as _union
from ._base import _fix_pickle
_BUCKET_SIZE = 60
_TREE_SIZE = 500
......@@ -114,4 +115,6 @@ Set = LOSet
BTree = LOBTree
TreeSet = LOTreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IIntegerObjectBTreeModule)
......@@ -14,7 +14,7 @@
__all__ = ('Bucket', 'Set', 'BTree', 'TreeSet',
'OIBucket', 'OISet', 'OIBTree', 'OITreeSet',
'union', 'intersection', 'difference',
'union', 'intersection', 'difference',
'weightedUnion', 'weightedIntersection',
)
......@@ -37,6 +37,7 @@ from ._base import to_int as _to_value
from ._base import union as _union
from ._base import weightedIntersection as _weightedIntersection
from ._base import weightedUnion as _weightedUnion
from ._base import _fix_pickle
_BUCKET_SIZE = 60
_TREE_SIZE = 250
......@@ -131,4 +132,6 @@ Set = OISet
BTree = OIBTree
TreeSet = OITreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IObjectIntegerBTreeModule)
......@@ -14,7 +14,7 @@
__all__ = ('Bucket', 'Set', 'BTree', 'TreeSet',
'OLBucket', 'OLSet', 'OLBTree', 'OLTreeSet',
'union', 'intersection', 'difference',
'union', 'intersection', 'difference',
'weightedUnion', 'weightedIntersection',
)
......@@ -37,6 +37,7 @@ from ._base import to_long as _to_value
from ._base import union as _union
from ._base import weightedIntersection as _weightedIntersection
from ._base import weightedUnion as _weightedUnion
from ._base import _fix_pickle
_BUCKET_SIZE = 60
_TREE_SIZE = 250
......@@ -131,4 +132,6 @@ Set = OLSet
BTree = OLBTree
TreeSet = OLTreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IObjectIntegerBTreeModule)
......@@ -14,7 +14,7 @@
__all__ = ('Bucket', 'Set', 'BTree', 'TreeSet',
'OOBucket', 'OOSet', 'OOBTree', 'OOTreeSet',
'union', 'intersection','difference',
'union', 'intersection','difference',
)
from zope.interface import moduleProvides
......@@ -31,6 +31,7 @@ from ._base import set_operation as _set_operation
from ._base import to_ob as _to_key
from ._base import to_ob as _to_value
from ._base import union as _union
from ._base import _fix_pickle
_BUCKET_SIZE = 30
_TREE_SIZE = 250
......@@ -102,9 +103,13 @@ else: #pragma NO COVER w/o C extensions
from ._OOBTree import union
from ._OOBTree import intersection
Bucket = OOBucket
Set = OOSet
BTree = OOBTree
TreeSet = OOTreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IObjectObjectBTreeModule)
......@@ -40,6 +40,45 @@ class _Base(Persistent):
if items:
self.update(items)
try:
# Detect the presence of the C extensions.
# If they're NOT around, we don't need to do any of the
# special pickle support to make Python versions look like
# C---we just rename the classes. By not defining these methods,
# we can (theoretically) regain a bit of speed.
# If the C extensions are around, we do need these methods, but
# these classes are unlikely to be used in production anyway.
__import__('BTrees._OOBTree')
except ImportError:
pass
else:
def __reduce__(self):
# Swap out the type constructor for the C version, if present.
type_self = type(self)
func, typ_gna, state = Persistent.__reduce__(self)
# We ignore the returned type altogether in favor of
# our calculated class (which allows subclasses but replaces our exact
# type with the C equivalent)
typ = self.__class__
gna = typ_gna[1:]
return (func, (typ,) + gna, state)
@property
def __class__(self):
type_self = type(self)
return type_self._BTree_reduce_as if type_self._BTree_reduce_up_bound is type_self else type_self
@property
def _BTree_reduce_as(self):
# Return the pickle replacement class for this object.
# If the C extensions are available, this will be the
# C type (setup by _fix_pickle), otherwise it will be the real
# type of this object.
# This implementation is replaced by _fix_pickle and exists for
# testing purposes.
return type(self)
_BTree_reduce_up_bound = _BTree_reduce_as
class _BucketBase(_Base):
......@@ -328,7 +367,7 @@ class Bucket(_BucketBase):
def _split(self, index=-1):
if index < 0 or index >= len(self._keys):
index = len(self._keys) // 2
new_instance = self.__class__()
new_instance = type(self)()
new_instance._keys = self._keys[index:]
new_instance._values = self._values[index:]
del self._keys[index:]
......@@ -388,13 +427,13 @@ class Bucket(_BucketBase):
values.append(state[i+1])
def _p_resolveConflict(self, s_old, s_com, s_new):
b_old = self.__class__()
b_old = type(self)()
if s_old is not None:
b_old.__setstate__(s_old)
b_com = self.__class__()
b_com = type(self)()
if s_com is not None:
b_com.__setstate__(s_com)
b_new = self.__class__()
b_new = type(self)()
if s_new is not None:
b_new.__setstate__(s_new)
if (b_com._next != b_old._next or
......@@ -412,7 +451,7 @@ class Bucket(_BucketBase):
return BTreesConflictError(
i_old.position, i_com.position, i_new.position, reason)
result = self.__class__()
result = type(self)()
def merge_output(it):
result._keys.append(it.key)
......@@ -586,7 +625,7 @@ class Set(_BucketBase):
def _split(self, index=-1):
if index < 0 or index >= len(self._keys):
index = len(self._keys) // 2
new_instance = self.__class__()
new_instance = type(self)()
new_instance._keys = self._keys[index:]
del self._keys[index:]
new_instance._next = self._next
......@@ -595,13 +634,13 @@ class Set(_BucketBase):
def _p_resolveConflict(self, s_old, s_com, s_new):
b_old = self.__class__()
b_old = type(self)()
if s_old is not None:
b_old.__setstate__(s_old)
b_com = self.__class__()
b_com = type(self)()
if s_com is not None:
b_com.__setstate__(s_com)
b_new = self.__class__()
b_new = type(self)()
if s_new is not None:
b_new.__setstate__(s_new)
......@@ -620,7 +659,7 @@ class Set(_BucketBase):
return BTreesConflictError(
i_old.position, i_com.position, i_new.position, reason)
result = self.__class__()
result = type(self)()
def merge_output(it):
result._keys.append(it.key)
......@@ -739,6 +778,15 @@ class _Tree(_Base):
'_firstbucket',
)
def __new__(cls, *args):
value = _Base.__new__(cls, *args)
# Empty trees don't get their __setstate__ called upon
# unpickling (or __init__, obviously), so clear() is never called
# and _data and _firstbucket are never defined, unless we do it here.
value._data = []
value._firstbucket = None
return value
def setdefault(self, key, value):
return self._set(self._to_key(key), self._to_value(value), True)[1]
......@@ -768,7 +816,9 @@ class _Tree(_Base):
self._del(self._to_key(key))
def clear(self):
self._data = []
if self._data:
# In the case of __init__, this was already set by __new__
self._data = []
self._firstbucket = None
def __nonzero__(self):
......@@ -888,7 +938,7 @@ class _Tree(_Base):
result = child._set(key, value, ifunset)
grew = result[0]
if grew:
if child.__class__ is self.__class__:
if type(child) is type(self):
max_size = self.max_internal_size
else:
max_size = self.max_leaf_size
......@@ -901,7 +951,7 @@ class _Tree(_Base):
# changed, it's *our* oid that should be marked as changed -- the
# bucket doesn't have one.
if (grew is not None and
child.__class__ is self._bucket_type and
type(child) is self._bucket_type and
len(data) == 1 and
child._p_oid is None):
self._p_changed = 1
......@@ -915,7 +965,7 @@ class _Tree(_Base):
self._split_root()
def _split_root(self):
child = self.__class__()
child = type(self)()
child._data = self._data
child._firstbucket = self._firstbucket
self._data = [_TreeItem(None, child)]
......@@ -926,13 +976,13 @@ class _Tree(_Base):
if index is None:
index = len(data) // 2
next = self.__class__()
next = type(self)()
next._data = data[index:]
first = data[index]
del data[index:]
if len(data) == 0:
self._firstbucket = None # lost our bucket, can't buy no beer
if isinstance(first.child, self.__class__):
if isinstance(first.child, type(self)):
next._firstbucket = first.child._firstbucket
else:
next._firstbucket = first.child;
......@@ -955,7 +1005,7 @@ class _Tree(_Base):
# See comment in _set about small trees
if (len(data) == 1 and
child.__class__ is self._bucket_type and
type(child) is self._bucket_type and
child._p_oid is None):
self._p_changed = True
......@@ -972,7 +1022,7 @@ class _Tree(_Base):
self._firstbucket = child._firstbucket
if not child.size:
if child.__class__ is self._bucket_type:
if type(child) is self._bucket_type:
if index:
data[index-1].child._deleteNextBucket()
else:
......@@ -989,10 +1039,12 @@ class _Tree(_Base):
data = self._data
if not data:
# Note: returning None here causes our __setstate__
# to not be called on unpickling
return None
if (len(data) == 1 and
data[0].child.__class__ is not self.__class__ and
type(data[0].child) is not type(self) and
data[0].child._p_oid is None
):
return ((data[0].child.__getstate__(), ), )
......@@ -1043,14 +1095,14 @@ class _Tree(_Base):
assert_(self._firstbucket is not None,
"Non-empty BTree has NULL firstbucket")
child_class = data[0].child.__class__
child_class = type(data[0].child)
for i in data:
assert_(i.child is not None, "BTree has NULL child")
assert_(i.child.__class__ is child_class,
assert_(type(i.child) is child_class,
"BTree children have different types")
assert_(i.child.size, "Bucket length < 1")
if child_class is self.__class__:
if child_class is type(self):
assert_(self._firstbucket is data[0].child._firstbucket,
"BTree has firstbucket different than "
"its first child's firstbucket")
......@@ -1494,3 +1546,44 @@ def MERGE_WEIGHT_default(self, value, weight):
def MERGE_WEIGHT_numeric(self, value, weight):
return value * weight
def _fix_pickle(mod_dict, mod_name):
# Make the pure-Python objects pickle with the same
# class names and types as the C extensions by setting the appropriate
# _BTree_reduce_as attribute.
# If the C extensions are not available, we also change the
# __name__ attribute of the type to match the C name (otherwise
# we wind up with *Py in the pickles)
# Each module must call this as `_fix_pickle(globals(), __name__)`
# at the bottom.
mod_prefix = mod_name.split('.')[-1][:2] # BTrees.OOBTree -> 'OO'
bucket_name = mod_prefix + 'Bucket'
py_bucket_name = bucket_name + 'Py'
have_c_extensions = mod_dict[bucket_name] is not mod_dict[py_bucket_name]
for name in 'Bucket', 'Set', 'BTree', 'TreeSet', 'TreeIterator':
raw_name = mod_prefix + name
py_name = raw_name + 'Py'
try:
py_type = mod_dict[py_name]
except KeyError:
if name == 'TreeIterator':
# Optional
break
raise
raw_type = mod_dict[raw_name] # Could be C or Python
py_type._BTree_reduce_as = raw_type
py_type._BTree_reduce_up_bound = py_type
if not have_c_extensions:
# Set FooPy to have the __name__ of simply Foo.
# We can't do this if the C extension is available,
# because then mod_dict[FooPy.__name__] is not FooPy
# and pickle refuses to save something like that.
# On the other hand (no C extension) this makes our
# Python pickle match the C version by default
py_type.__name__ = raw_name
py_type.__qualname__ = raw_name # Py 3.3+
......@@ -34,6 +34,7 @@ from ._base import intersection as _intersection
from ._base import set_operation as _set_operation
from ._base import to_bytes as _to_bytes
from ._base import union as _union
from ._base import _fix_pickle
_BUCKET_SIZE = 500
_TREE_SIZE = 500
......@@ -123,4 +124,6 @@ Set = fsSet
BTree = fsBTree
TreeSet = fsTreeSet
_fix_pickle(globals(), __name__)
moduleProvides(IIntegerObjectBTreeModule)
......@@ -215,15 +215,28 @@ class Base(object):
# Issue #2
# Nothing we pickle should include the 'Py' suffix of
# implementation classes, and unpickling should give us
# back the same type we started with
# back the best available type
import pickle
t = self._makeOne()
made_one = self._makeOne()
s = pickle.dumps(t)
self.assertTrue(b'Py' not in s, repr(s))
for proto in range(1, pickle.HIGHEST_PROTOCOL + 1):
dumped_str = pickle.dumps(made_one, proto)
self.assertTrue(b'Py' not in dumped_str, repr(dumped_str))
t2 = pickle.loads(s)
self.assertTrue(type(t2) is type(t) is self._getTargetClass())
loaded_one = pickle.loads(dumped_str)
# If we're testing the pure-Python version, but we have the
# C extension available, then the loaded type will be the C
# extension but the made type will be the Python version.
# Otherwise, they match. (Note that if we don't have C extensions
# available, the __name__ will be altered to not have Py in it. See _fix_pickle)
if 'Py' in type(made_one).__name__:
self.assertTrue(type(loaded_one) is not type(made_one))
else:
self.assertTrue(type(loaded_one) is type(made_one) is self._getTargetClass(), (type(loaded_one), type(made_one), self._getTargetClass(), repr(dumped_str)))
dumped_str2 = pickle.dumps(loaded_one, proto)
self.assertEqual(dumped_str, dumped_str2)
def test_pickle_empty(self):
# Issue #2
......@@ -239,9 +252,55 @@ class Base(object):
s2 = pickle.dumps(t2)
self.assertEqual(s, s2)
if hasattr(t2, '__len__'):
# checks for _firstbucket
self.assertEqual(0, len(t2))
# This doesn't hold for things like Bucket and Set, sadly
# self.assertEqual(t, t2)
def test_pickle_subclass(self):
# Issue #2: Make sure our class swizzling doesn't break
# pickling subclasses
global PickleSubclass # XXX: Has to be global to pickle, but this prevents running tests in parallel
class PickleSubclass(type(self._makeOne())):
pass
import pickle
loaded = pickle.loads(pickle.dumps(PickleSubclass()))
self.assertTrue(type(loaded) is PickleSubclass, type(loaded))
self.assertTrue(PickleSubclass().__class__ is PickleSubclass)
def test_isinstance_subclass(self):
# Issue #2:
# In some cases we define a __class__ attribute that gets
# invoked for isinstance and *lies*. Check that isinstance still
# works (almost) as expected.
t = self._makeOne()
# It's a little bit weird, but in the fibbing case,
# we're an instance of two unrelated classes
self.assertTrue(isinstance(t, type(t)), (t, type(t)))
self.assertTrue(isinstance(t, t.__class__))
class Sub(type(t)):
pass
self.assertTrue(issubclass(Sub, type(t)))
if type(t) is not t.__class__:
# We're fibbing; this breaks issubclass of itself,
# contrary to the usual mechanism
self.assertFalse(issubclass(t.__class__, type(t)))
class NonSub(object):
pass
self.assertFalse(issubclass(NonSub, type(t)))
self.assertFalse(isinstance(NonSub(), type(t)))
class MappingBase(Base):
# Tests common to mappings (buckets, btrees)
......@@ -1241,25 +1300,28 @@ class BTreeTests(MappingBase):
def test_legacy_py_pickle(self):
# Issue #2
# If we have a pickle that includes the 'Py' suffix,
# it should unpickle to the type that we're working with
# it (unfortunately) unpickles to the python type. But
# new pickles never produce that.
import pickle
t = self._makeOne()
made_one = self._makeOne()
s = pickle.dumps(t)
# It's not legacy
assert b'TreePy\n' not in s, repr(s)
assert b'Tree\np' in s, repr(s)
for proto in (1, 2):
s = pickle.dumps(made_one, proto)
# It's not legacy
assert b'TreePy\n' not in s, repr(s)
# \np for protocol 1, \nq for proto 2,
assert b'Tree\np' in s or b'Tree\nq' in s, repr(s)
# Now make it legacy
legacys = s.replace(b'Tree\np', b'TreePy\np')
# Now make it pseudo-legacy
legacys = s.replace(b'Tree\np', b'TreePy\np').replace(b'Tree\nq', b'TreePy\nq')
# It loads up as the current class
t2 = pickle.loads(legacys)
self.assertTrue(type(t2) is type(t) is self._getTargetClass(), (repr(legacys), type(t2), type(t), self._getTargetClass()))
# It loads up as the specified class
loaded_one = pickle.loads(legacys)
# It still functions and can be dumped again
s2 = pickle.dumps(t2)
self.assertEqual(s2, s2)
# It still functions and can be dumped again, as the original class
s2 = pickle.dumps(loaded_one, proto)
self.assertTrue(b'Py' not in s2)
self.assertEqual(s2, s)
class NormalSetTests(Base):
......@@ -2238,9 +2300,9 @@ class MappingConflictTestBase(ConflictTestBase):
base = self._makeOne()
base.update([(i, i*i) for i in l[:20]])
b1=base.__class__(base)
b2=base.__class__(base)
bm=base.__class__(base)
b1 = type(base)(base)
b2 = type(base)(base)
bm = type(base)(base)
items=base.items()
......
``BTrees`` Changelog
====================
4.3.0 (TBD)
-----------
- The pure-Python implementation, used on PyPy and when a C compiler
isn't available for CPython, now pickles identically to the C
version. Unpickling will choose the best available implementation.
This prevents interoperability problems and database corruption if
both implementations are in use. While it is no longer possible to
pickle a Python implementation and have it unpickle to the Python
implementation if the C implementation is available, existing Python
pickles will still unpickle to the Python implementation (until
pickled again). See:
https://github.com/zopefoundation/BTrees/issues/19
- Unpickling empty BTrees in a pure-Python environment no longer
creates invalid objects that faile with ``AttributeError``.
4.2.0 (2015-11-13)
------------------
......
......@@ -12,7 +12,7 @@
#
##############################################################################
__version__ = '4.2.0'
__version__ = '4.3.0.dev0'
import os
import platform
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment