Commit a7e995d4 authored by Jérome Perrin's avatar Jérome Perrin

XMLExportImport: support python3

This includes heuristics to guess the str or bytes from business template
XML: oids are bytes and also some strings that do not decode to UTF-8,
so that we can install python2 business templates on py3.

When exporting business templates, we need to build a list of referenced
persistent objects to export them separately in the XML, this is is done
using noload, in a way which does not support pickle protocol 1 on py3
(the persistent ids are None and the assertion in
https://github.com/zopefoundation/ZODB/blob/d698507bb89eeb38c6e655199bc9f54c909dbf4d/src/ZODB/serialize.py#L669
fails), so we need to use pickle protocol 3 on py3.

With these changes, on py2 we are still using the same protocol 1 and
the output is stable, but on py3 we are using protocol 3 and the output
is different. Business template exported on py

This also adds some test coverage of XMLExportImport, while doing this
it was found that the ad-hoc handling of boolean in protocol 1 was not
implemented correctly and they were serialized as integers (0 for False
and 1 for True), this is also fixed.
parent 733c35fc
Pipeline #35995 failed with stage
in 0 seconds
......@@ -35,6 +35,7 @@ from io import BytesIO
from six import StringIO
from Products.ERP5Type.XMLExportImport import importXML, ppml
import six
import lxml.etree
class DummyClass:
......@@ -46,12 +47,22 @@ class DummyClass:
self.data = []
class DummyPersistentClass:
def __init__(self, v, oid):
self.v = v
self._p_oid = oid
class XMLPickleTestCase(unittest.TestCase):
_pickle_protocol = 3
def dump_to_xml(self, obj):
pickled_string = pickle.dumps(obj, protocol=self._pickle_protocol)
f = BytesIO(pickled_string)
xml = ppml.ToXMLUnpickler(f).load().__str__()
def dump_to_xml(self, obj, persistent_id=None):
f = BytesIO()
pickler = pickle.Pickler(f, protocol=self._pickle_protocol)
if persistent_id:
pickler.persistent_id = persistent_id
pickler.dump(obj)
f.seek(0)
xml = ppml.ToXMLUnpickler(f).load(id_mapping=ppml.MinimalMapping()).__str__()
self.assertIsInstance(xml, str)
return xml
......@@ -150,8 +161,10 @@ class TestXMLPickle(XMLPickleTestCase):
def test_bytes(self):
self.check_and_load(b"bytes")
self.check_and_load(b"long bytes" * 100)
self.check_and_load(zodbpickle.binary(b"bytes"))
self.check_and_load(zodbpickle.binary(b""))
if six.PY3 or self._pickle_protocol > 1:
# protocol 1 does not keep bytes
self.check_and_load(zodbpickle.binary(b"bytes"))
self.check_and_load(zodbpickle.binary(b""))
def test_unicode(self): # BBB PY2
self.assertIs(type(self.dump_and_load(u"OK")), six.text_type)
......@@ -218,6 +231,47 @@ class TestXMLPickle(XMLPickleTestCase):
self.assertEqual(reconstructed, [ref, ref, ref])
self.assertIs(reconstructed[0], reconstructed[1])
def test_persistent(self):
p1 = DummyPersistentClass(1, b'1')
p2 = DummyPersistentClass(2, b'2')
persistent_ids = []
def persistent_id(obj):
if isinstance(obj, DummyPersistentClass):
persistent_ids.append(obj._p_oid)
return obj._p_oid
xml = self.dump_to_xml(
{'p1': p1, 'p2': p2, 'not p': 'not p'},
persistent_id=persistent_id)
self.assertEqual(sorted(persistent_ids), [b'1', b'2'])
def persistent_load(oid):
persistent_ids.remove(oid)
return oid
obj = self.load_from_xml(xml, persistent_load)
self.assertEqual(obj,
{'p1': b'1', 'p2': b'2', 'not p': 'not p'})
self.assertEqual(persistent_ids, [])
def test_renamed_class(self):
if six.PY2:
from UserList import UserList
else:
from collections import UserList
l = UserList([1, 2])
xml = self.dump_to_xml(l)
if self._pickle_protocol == 1:
self.assertEqual(
lxml.etree.fromstring(xml).xpath('//global[@name="UserList"]/@module'),
["UserList"],
)
self.check_and_load(l)
class TestXMLPickleProtocol1(TestXMLPickle):
_pickle_protocol = 1
class TestXMLPickleStringEncoding(XMLPickleTestCase):
def test_string_base64(self):
......@@ -255,7 +309,7 @@ class TestXMLPickleStringHeuristics(XMLPickleTestCase):
"""Heuristics to map python2 str to unicode or bytes in business templates.
"""
def test_bytes_base64(self):
def test_load_bytes_base64(self):
# if it does not decode as utf-8, it's bytes
self.assertEqual(
self.load_from_xml("""
......@@ -263,7 +317,7 @@ class TestXMLPickleStringHeuristics(XMLPickleTestCase):
"""),
b"\xFF\x00")
def test_long_bytes_base64(self):
def test_load_long_bytes_base64(self):
# if it does not decode as utf-8, it's bytes
long_bytes = b"\xFF\x00" * 256
self.assertEqual(
......@@ -272,7 +326,7 @@ class TestXMLPickleStringHeuristics(XMLPickleTestCase):
""" % base64.b64encode(long_bytes).decode()),
long_bytes)
def test_string_persistent_id_base64(self):
def test_load_string_persistent_id_base64(self):
# persistent ids are loaded as bytes
persistent_ids = []
def persistent_load(oid):
......
......@@ -67,6 +67,9 @@ MARSHALLER_NAMESPACE_URI = 'http://www.erp5.org/namespaces/marshaller'
marshaller = Marshaller(namespace_uri=MARSHALLER_NAMESPACE_URI,
as_tree=True).dumps
DEFAULT_PICKLE_PROTOCOL = 1 if six.PY2 else 3
class OrderedPickler(Pickler):
"""Pickler producing consistent output by saving dicts in order
"""
......@@ -250,7 +253,8 @@ from . import ppml
magic=b'<?xm' # importXML(jar, file, clue)}
def reorderPickle(jar, p):
def reorderPickle(jar, p, pickle_protocol):
try:
from ZODB._compat import Unpickler, Pickler
except ImportError: # BBB: ZODB 3.10
......@@ -284,7 +288,7 @@ def reorderPickle(jar, p):
unpickler.persistent_load=persistent_load
newp=BytesIO()
pickler = OrderedPickler(newp, 3)
pickler = OrderedPickler(newp, pickle_protocol)
pickler.persistent_id=persistent_id
classdef = unpickler.load()
......@@ -294,7 +298,7 @@ def reorderPickle(jar, p):
if 0: # debug
debugp = BytesIO()
debugpickler = OrderedPickler(debugp, 3)
debugpickler = OrderedPickler(debugp, pickle_protocol)
debugpickler.persistent_id = persistent_id
debugpickler.dump(obj)
import pickletools
......@@ -323,7 +327,8 @@ def XMLrecord(oid, plen, p, id_mapping):
String=' <record id="%s" aka="%s">\n%s </record>\n' % (id, bytes2str(aka), p)
return String
def exportXML(jar, oid, file=None):
def exportXML(jar, oid, file=None, pickle_protocol=DEFAULT_PICKLE_PROTOCOL):
# For performance reasons, exportXML does not use 'XMLrecord' anymore to map
# oids. This requires to initialize MinimalMapping.marked_reference before
# any string output, i.e. in ppml.Reference.__init__
......@@ -336,10 +341,10 @@ def exportXML(jar, oid, file=None):
pickle_dict = {oid: None}
max_cache = [1e7] # do not cache more than 10MB of pickle data
def getReorderedPickle(oid):
p = pickle_dict[oid]
p = pickle_dict.get(oid)
if p is None:
p = load(oid)[0]
p = reorderPickle(jar, p)[1]
p = reorderPickle(jar, p, pickle_protocol)[1]
if len(p) < max_cache[0]:
max_cache[0] -= len(p)
pickle_dict[oid] = p
......
......@@ -352,6 +352,7 @@ class Persistent(Wrapper):
v=v.__str__(indent+2)
return '%s<%s%s>\n%s%s</%s>\n' % (i, name, id, v, i, name)
blanck_line_expression = re.compile('^ +$')
class NoBlanks(object):
"""
......@@ -525,6 +526,21 @@ class ToXMLUnpickler(Unpickler):
dispatch[NONE] = load_none
dispatch[NONE[0]] = load_none
def load_int(self):
line = self.readline()[:-1]
# on protocol 1, bool are saved as int
# https://github.com/python/cpython/blob/b455a5a55cb1fd5bb6178a969e8ebd0e6e91b610/Lib/pickletools.py#L1173-L1179
if line == b'00':
val = Bool(False, self.id_mapping)
elif line == b'01':
val = Bool(True, self.id_mapping)
else:
val = Int(int(line), self.id_mapping)
self.append(val)
if six.PY2:
dispatch[INT] = load_int
dispatch[INT[0]] = load_int
def load_binint(self):
self.append(Int(mloads(b'i' + self.read(4)), self.id_mapping))
if six.PY2:
......@@ -543,6 +559,17 @@ class ToXMLUnpickler(Unpickler):
dispatch[BININT2] = load_binint2
dispatch[BININT2[0]] = load_binint2
def load_long(self):
val = self.readline()[:-1]
if six.PY3:
val = val.decode('ascii')
if val and val[-1] == 'L':
val = val[:-1]
self.append(Long(long_(val, 0), self.id_mapping))
if six.PY2:
dispatch[LONG] = load_long
dispatch[LONG[0]] = load_long
def load_long1(self):
n = ord(self.read(1))
data = self.read(n)
......@@ -752,12 +779,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[LONG_BINGET] = load_long_binget
dispatch[LONG_BINGET[0]] = load_long_binget
def load_put(self):
self.stack[-1].id=self.idprefix+self.readline()[:-1]
if six.PY2:
dispatch[PUT] = load_put
dispatch[PUT[0]] = load_put
def load_binput(self):
i = mloads(b'i' + self.read(1) + b'\000\000\000')
self.stack[-1].id=self.idprefix+repr(i)
......@@ -772,11 +793,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[LONG_BINPUT] = load_long_binput
dispatch[LONG_BINPUT[0]] = load_long_binput
for code in PERSID, INT, LONG, FLOAT, STRING, UNICODE, GET, PUT:
if six.PY2:
dispatch[code] = unsupported_opcode(code)
dispatch[code[0]] = unsupported_opcode(code)
class LogCall:
def __init__(self, func):
self.func = func
......@@ -788,6 +804,17 @@ class ToXMLUnpickler(Unpickler):
# for code in dispatch.keys():
# dispatch[code] = LogCall(dispatch[code])
for opcode, name in (
(STRING, 'STRING'),
(UNICODE, 'UNICODE'),
(GET, 'GET'),
(PUT, 'PUT'),
):
if six.PY2:
dispatch[opcode] = unsupported_opcode(name)
dispatch[opcode[0]] = unsupported_opcode(name)
def ToXMLload(file):
return ToXMLUnpickler(file).load()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment