Commit a8bcb6b5 authored by Jérome Perrin's avatar Jérome Perrin

XMLExportImport: more support pickle protocol 3 🚧 ( repair python2 )

parent 3c34562f
......@@ -317,7 +317,7 @@ def XMLrecord(oid, plen, p, id_mapping):
p = u.load(id_mapping=id_mapping).__str__(4)
if f.tell() < plen:
p=p+u.load(id_mapping=id_mapping).__str__(4)
String=' <record id="%s" aka="%s">\n%s </record>\n' % (id, aka.decode(), p)
String=' <record id="%s" aka="%s">\n%s </record>\n' % (id, bytes2str(aka), p)
return String
def exportXML(jar, oid, file=None):
......@@ -363,12 +363,6 @@ def exportXML(jar, oid, file=None):
p = getReorderedPickle(oid)
write(XMLrecord(oid, len(p), p, id_mapping))
write('</ZopeData>\n')
if 0:
try:
print(file.getvalue())
except AttributeError:
pass
import pdb; pdb.set_trace()
return file
class zopedata:
......@@ -421,7 +415,6 @@ def importXML(jar, file, clue=''):
F.end_handlers['record'] = save_record
F.end_handlers['ZopeData'] = save_zopedata
F.start_handlers['ZopeData'] = start_zopedata
F.binary=1
F.file=outfile
# <patch>
# Our BTs XML files don't declare encoding but have accented chars in them
......
......@@ -19,21 +19,22 @@
# attribute. dispatch_table should be used instead.
from zodbpickle.slowpickle import *
import ast
import struct
import six
if six.PY2:
from base64 import encodestring as base64_encodebytes, decodestring as base64_decodebytes
from zodbpickle.pickle_2 import decode_long
else:
from base64 import encodebytes as base64_encodebytes, decodebytes as base64_decodebytes
from zodbpickle.pickle_3 import decode_long
import re
from marshal import loads as mloads
from .xyap import NoBlanks
from .xyap import xyap
from Products.ERP5Type.Utils import str2bytes
from Products.ERP5Type.Utils import bytes2str, str2bytes, unicode2str
from marshal import dumps as mdumps
#from zLOG import LOG
binary = re.compile('[^\x1f-\x7f]').search
......@@ -94,17 +95,16 @@ def convert(S):
### readable output.
try:
if not isinstance(S, six.text_type):
S = S.decode('utf8')
decoded = S.decode('utf8')
if six.PY3:
S = decoded
except UnicodeDecodeError:
return 'base64', base64_encodebytes(S)[:-1]
return 'base64', bytes2str(base64_encodebytes(S)[:-1])
else:
new = reprs_re.sub(sub_reprs, S)
### patch end
if len(new) > (1.4*len(S)):
if not isinstance(S, six.binary_type):
# TODO zope4py3: is this the right place ? this supports Unicode('\n')
S = S.encode('ascii')
return 'base64', base64_encodebytes(S)[:-1]
return 'base64', bytes2str(base64_encodebytes(str2bytes(S))[:-1])
elif '>' in new or '<' in S or '&' in S:
if not ']]>' in S:
return 'cdata', '<![CDATA[\n\n' + new + '\n\n]]>'
......@@ -117,9 +117,9 @@ def unconvert(encoding,S):
if encoding == 'base64':
return base64_decodebytes(S)
else:
return str2bytes(eval(b"'" + S.replace(b'\n', b'') + b"'"))
return str2bytes(ast.literal_eval(bytes2str(b"'" + S.replace(b'\n', b'') + b"'")))
class Global:
class Global(object):
def __init__(self, module, name, mapping):
self.module=module
self.name=name
......@@ -133,14 +133,14 @@ class Global:
return '%s<%s%s name="%s" module="%s"/>\n' % (
' '*indent, name, id, self.name, self.module)
class Immutable:
class Immutable(object):
def __init__(self, value):
self.value = value
def getValue(self):
return self.value
class Scalar:
class Scalar(object):
def __init__(self, v, mapping):
self._v=v
self.mapping = mapping
......@@ -185,7 +185,7 @@ class String(Scalar):
# be converted.
encoding = 'base64'
v = base64_encodebytes(self._v)[:-1]
self._v = self.mapping.convertBase64(v).decode()
self._v = bytes2str(self.mapping.convertBase64(v))
else:
encoding, self._v = convert(self._v)
self.encoding = encoding
......@@ -203,17 +203,22 @@ class String(Scalar):
self.mapping.setImmutable(self.id, Immutable(value = result))
return '%s%s\n' % (' '*indent, result)
class Unicode(String):
def tag_name(self):
if six.PY3:
return 'string'
return super(Unicode, self).tag_name()
def value(self):
return unicode2str(super(Unicode, self).value())
class Bytes(String):
pass
class Wrapper:
class Wrapper(object):
def __init__(self, v, mapping):
self._v=v
self.mapping = mapping
......@@ -227,13 +232,13 @@ class Wrapper:
name=self.__class__.__name__.lower()
v=self._v
i=' '*indent
if isinstance(v,Scalar):
return '%s<%s%s> %s </%s>\n' % (i, name, id, str(v)[:-1], name)
if isinstance(v, Scalar):
return '%s<%s%s> %s </%s>\n' % (i, name, id, v.__str__()[:-1], name)
else:
v=v.__str__(indent+2)
return '%s<%s%s>\n%s%s</%s>\n' % (i, name, id, v, i, name)
class Collection:
class Collection(object):
def __init__(self, mapping):
self.mapping = mapping
......@@ -320,6 +325,7 @@ class Object(Sequence):
def __setstate__(self, v): self.append(State(v, self.mapping))
class Bool(Scalar): pass
class Int(Scalar): pass
class Float(Scalar): pass
class List(Sequence): pass
......@@ -347,7 +353,7 @@ class Persistent(Wrapper):
return '%s<%s%s>\n%s%s</%s>\n' % (i, name, id, v, i, name)
blanck_line_expression = re.compile('^ +$')
class NoBlanks:
class NoBlanks(object):
"""
This allows to ignore at least whitespaces between elements and also
correctly handle string/unicode
......@@ -394,7 +400,7 @@ class NoBlanks:
self.append(data)
class IdentityMapping:
class IdentityMapping(object):
def __init__(self):
self.resetMapping()
self.immutable = {}
......@@ -480,6 +486,18 @@ class MinimalMapping(IdentityMapping):
def __str__(self, a):
return "Error here"
class UnsupportedOpCode(AssertionError):
"""Error when encountering an opcode that is not supposed to be used
by this implementation.
"""
def unsupported_opcode(opcode):
def handler(self):
raise UnsupportedOpCode(opcode)
return handler
class ToXMLUnpickler(Unpickler):
def load(self, id_mapping=None):
if id_mapping is None:
......@@ -494,13 +512,6 @@ class ToXMLUnpickler(Unpickler):
def persistent_load(self, v):
return Persistent(v, self.id_mapping)
def load_persid(self):
pid = self.readline()[:-1]
self.append(self.persistent_load(String(pid, self.id_mapping)))
if six.PY2:
dispatch[PERSID] = load_persid
dispatch[PERSID[0]] = load_persid
def load_binpersid(self):
pid = self.stack.pop()
self.append(self.persistent_load(pid))
......@@ -514,12 +525,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[NONE] = load_none
dispatch[NONE[0]] = load_none
def load_int(self):
self.append(Int(int(self.readline()[:-1]), self.id_mapping))
if six.PY2:
dispatch[INT] = load_int
dispatch[INT[0]] = load_int
def load_binint(self):
self.append(Int(mloads(b'i' + self.read(4)), self.id_mapping))
if six.PY2:
......@@ -538,17 +543,36 @@ class ToXMLUnpickler(Unpickler):
dispatch[BININT2] = load_binint2
dispatch[BININT2[0]] = load_binint2
def load_long(self):
self.append(Long(long_(self.readline()[:-1], 0), self.id_mapping))
def load_long1(self):
n = ord(self.read(1))
data = self.read(n)
self.append(Long(decode_long(data), self.id_mapping))
if six.PY2:
dispatch[LONG1] = load_long1
dispatch[LONG1[0]] = load_long1
def load_long4(self):
n = mloads(b'i' + self.read(4))
if n < 0:
# Corrupt or hostile pickle -- we never write one like this
raise UnpicklingError("LONG pickle has negative byte count");
data = self.read(n)
self.append(Long(decode_long(data), self.id_mapping))
if six.PY2:
dispatch[LONG4] = load_long4
dispatch[LONG4[0]] = load_long4
def load_true(self):
self.append(Bool(True, self.id_mapping))
if six.PY2:
dispatch[LONG] = load_long
dispatch[LONG[0]] = load_long
dispatch[NEWTRUE] = load_true
dispatch[NEWTRUE[0]] = load_true
def load_float(self):
self.append(Float(float(self.readline()[:-1]), self.id_mapping))
def load_false(self):
self.append(Bool(False, self.id_mapping))
if six.PY2:
dispatch[FLOAT] = load_float
dispatch[FLOAT[0]] = load_float
dispatch[NEWFALSE] = load_false
dispatch[NEWFALSE[0]] = load_false
def load_binfloat(self, unpack=struct.unpack):
self.append(Float(unpack('>d', self.read(8))[0], self.id_mapping))
......@@ -556,13 +580,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[BINFLOAT] = load_binfloat
dispatch[BINFLOAT[0]] = load_binfloat
def load_string(self):
self.append(String(eval(self.readline()[:-1],
{'__builtins__': {}}), self.id_mapping)) # Let's be careful
if six.PY2:
dispatch[STRING] = load_string
dispatch[STRING[0]] = load_string
def load_binstring(self):
len = mloads(b'i' + self.read(4))
self.append(String(self.read(len), self.id_mapping))
......@@ -570,14 +587,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[BINSTRING] = load_binstring
dispatch[BINSTRING[0]] = load_binstring
def load_unicode(self):
line = self.readline()
self.append(Unicode(six.text_type(eval(line[:-1],
{'__builtins__': {}})), self.id_mapping)) # Let's be careful
if six.PY2:
dispatch[UNICODE] = load_unicode
dispatch[UNICODE[0]] = load_unicode
def load_binunicode(self):
len = mloads(b'i' + self.read(4))
self.append(Unicode(six.text_type(self.read(len), 'utf-8'), self.id_mapping))
......@@ -608,8 +617,6 @@ class ToXMLUnpickler(Unpickler):
def load_tuple(self):
k = self.marker()
#LOG('load_tuple, k',0,k)
#LOG('load_tuple, stack[k+1:]',0,self.stack[k+1:])
self.stack[k:] = [Tuple(self.id_mapping, v=self.stack[k+1:])]
if six.PY2:
dispatch[TUPLE] = load_tuple
......@@ -675,8 +682,8 @@ class ToXMLUnpickler(Unpickler):
k = self.marker()
args = Tuple(self.id_mapping, v=self.stack[k+1:])
del self.stack[k:]
module = self.readline()[:-1].decode()
name = self.readline()[:-1].decode()
module = bytes2str(self.readline()[:-1])
name = bytes2str(self.readline()[:-1])
value=Object(Global(module, name, self.id_mapping), args, self.id_mapping)
self.append(value)
if six.PY2:
......@@ -709,8 +716,8 @@ class ToXMLUnpickler(Unpickler):
dispatch[NEWOBJ[0]] = load_newobj
def load_global(self):
module = self.readline()[:-1].decode()
name = self.readline()[:-1].decode()
module = bytes2str(self.readline()[:-1])
name = bytes2str(self.readline()[:-1])
self.append(Global(module, name, self.id_mapping))
if six.PY2:
dispatch[GLOBAL] = load_global
......@@ -731,12 +738,6 @@ class ToXMLUnpickler(Unpickler):
idprefix=''
def load_get(self):
self.append(Get(self.idprefix+self.readline()[:-1], self.id_mapping))
if six.PY2:
dispatch[GET] = load_get
dispatch[GET[0]] = load_get
def load_binget(self):
i = mloads(b'i' + self.read(1) + b'\000\000\000')
self.append(Get(self.idprefix+repr(i), self.id_mapping))
......@@ -771,6 +772,11 @@ class ToXMLUnpickler(Unpickler):
dispatch[LONG_BINPUT] = load_long_binput
dispatch[LONG_BINPUT[0]] = load_long_binput
for code in PERSID, INT, LONG, FLOAT, STRING, UNICODE, GET, PUT:
if six.PY2:
dispatch[code] = unsupported_opcode(code)
dispatch[code[0]] = unsupported_opcode(code)
class LogCall:
def __init__(self, func):
self.func = func
......@@ -798,23 +804,19 @@ def start_pickle(self, tag, attrs):
return [tag, attrs]
def save_int(self, tag, data):
if self.binary:
v = int(name(self, tag, data))
if v >= 0:
if v <= 0xff:
return BININT1 + six.int2byte(v)
if v <= 0xffff:
return BININT2 + b'%c%c' % (v & 0xff, v >> 8)
hb = v >> 31
if hb == 0 or hb == -1:
return BININT + struct.pack('<i', v)
v = int(name(self, tag, data))
if v >= 0:
if v <= 0xff:
return BININT1 + six.int2byte(v)
if v <= 0xffff:
return BININT2 + b'%c%c' % (v & 0xff, v >> 8)
hb = v >> 31
if hb == 0 or hb == -1:
return BININT + struct.pack('<i', v)
return INT + name(self, tag, data) + b'\n'
def save_float(self, tag, data):
if self.binary:
return BINFLOAT + struct.pack('>d', float(name(self, tag, data)))
else:
return FLOAT + name(self, tag, data) + b'\n'
return BINFLOAT + struct.pack('>d', float(name(self, tag, data)))
def save_put(self, v, attrs):
id = attrs.get('id', '')
......@@ -824,14 +826,11 @@ def save_put(self, v, attrs):
id = id[prefix + 1:]
elif id[0] == 'i':
id = id[1:]
if self.binary:
id = int(id)
if id < 256:
id = BINPUT + six.int2byte(id)
else:
id = LONG_BINPUT + struct.pack('<i', id)
id = int(id)
if id < 256:
id = BINPUT + six.int2byte(id)
else:
id = PUT + repr(id) + b'\n'
id = LONG_BINPUT + struct.pack('<i', id)
return v + id
return v
......@@ -841,81 +840,71 @@ def save_string(self, tag, data):
encoding = a.get('encoding', 'repr') # JPS: repr is default encoding
if encoding != '':
v = unconvert(encoding, v)
if self.binary:
l = len(v)
if l < 256:
if encoding == 'base64':
# TODO: zope4py3 (all this is unfinished)
# We can be here for two reasons:
# - the input was a string with \n or similar control characters
# that are not allowed in XML, so the str was exported as base64.
# - the input was a _p_oid exported from python2, in that case
# we want to get a zodbpickle.binary back
# XXX all this seems a bad idea, we need more context if we want
# to have such heuristics
if len(v) == 8:
# looks like a _p_oid, assume it is a persistent_id -> bytes
op = SHORT_BINBYTES
else:
# if it's a valid UTF-8 string -> str
try:
v.decode('utf-8')
# XXX maybe check with repr_re ?
op = BINUNICODE
v = op + struct.pack('<i', l) + v
return save_put(self, v, a)
except UnicodeDecodeError:
# not valid utf-8 -> bytes
op = SHORT_BINBYTES
l = len(v)
if l < 256:
if encoding == 'base64':
# We can be here for two reasons:
# - the input was a string with \n or similar control characters
# that are not allowed in XML, so the str was exported as base64.
# - the input was a persistent id exported from python2, in that case
# we want to get a zodbpickle.binary back
if len(v) == 8 and self._stack[-1][0] in ('persistent', ):
# looks like a persistent id, assume it is a persistent_id -> bytes
op = SHORT_BINBYTES
else:
# XXX this branch seems wrong
op = SHORT_BINSTRING
try:
v.decode('ascii')
# XXX zope4py3 we could also create an unpickler with encoding utf-8 ?
except UnicodeDecodeError:
op = BINUNICODE
v = op + struct.pack('<i', l) + v
return save_put(self, v, a)
v = op + six.int2byte(l) + v
# if it's a valid UTF-8 string -> str
try:
v.decode('utf-8')
# XXX maybe check with repr_re ?
op = BINUNICODE if six.PY3 else BINSTRING
v = op + struct.pack('<i', l) + v
return save_put(self, v, a)
except UnicodeDecodeError:
# not valid utf-8 -> bytes
op = SHORT_BINBYTES
else:
# TODO: zope4py3 see assumption above for SHORT_BINBYTES / SHORT_BINSTRING
# TODO no! check this more ...
# op = BINSTRING
if encoding == 'base64':
op = BINBYTES
else:
op = BINSTRING if six.PY2 else BINUNICODE
v = op + struct.pack('<i', l) + v
op = SHORT_BINSTRING
try:
v.decode('ascii')
except UnicodeDecodeError:
op = BINUNICODE if six.PY3 else BINSTRING
v = op + struct.pack('<i', l) + v
return save_put(self, v, a)
v = op + six.int2byte(l) + v
else:
v = STRING + repr(v) + '\n'
if encoding == 'base64':
op = BINBYTES
# if it's a valid UTF-8 string -> str
try:
v.decode('utf-8')
op = BINUNICODE if six.PY3 else BINSTRING
except UnicodeDecodeError:
# not valid utf-8 -> bytes
pass
else:
op = BINSTRING if six.PY2 else BINUNICODE
v = op + struct.pack('<i', l) + v
return save_put(self, v, a)
def save_bytes(self, tag, data):
a = data[1]
v = b''.join(data[2:])
encoding = a.get('encoding', 'repr')
assert encoding == 'base64'
if encoding != '':
if encoding:
v = unconvert(encoding, v)
if self.binary:
l = len(v)
if l < 256:
op = SHORT_BINBYTES
v = op + six.int2byte(l) + v
else:
op = BINBYTES
v = op + struct.pack('<i', l) + v
l = len(v)
if l < 256:
op = SHORT_BINBYTES
v = op + six.int2byte(l) + v
else:
# XXX used ??? seems wrong
v = BYTES + repr(v) + '\n'
op = BINBYTES
v = op + struct.pack('<i', l) + v
return save_put(self, v, a)
def save_unicode(self, tag, data):
binary=self.binary
v=b''
a=data[1]
if len(data)>2:
......@@ -924,11 +913,9 @@ def save_unicode(self, tag, data):
encoding=a.get('encoding','repr') # JPS: repr is default encoding
if encoding != '':
v=unconvert(encoding,v)
if binary:
l=len(v)
s=mdumps(l)[1:]
v=BINUNICODE+s+v
else: v=UNICODE+"'"+v+"'\012"
l=len(v)
s=mdumps(l)[1:]
v=BINUNICODE+s+v
return save_put(self, v, a)
def save_tuple(self, tag, data):
......@@ -939,26 +926,16 @@ def save_tuple(self, tag, data):
def save_list(self, tag, data):
L = data[2:]
if self.binary:
v = save_put(self, EMPTY_LIST, data[1])
if L:
v = v + MARK + b''.join(L) + APPENDS
else:
v = save_put(self, MARK + LIST, data[1])
if L:
v = APPEND.join(L) + APPEND
v = save_put(self, EMPTY_LIST, data[1])
if L:
v = v + MARK + b''.join(L) + APPENDS
return v
def save_dict(self, tag, data):
D = data[2:]
if self.binary:
v = save_put(self, EMPTY_DICT, data[1])
if D:
v = v + MARK + b''.join(D) + SETITEMS
else:
v = save_put(self, MARK + DICT, data[1])
if D:
v = v + SETITEM.join(D) + SETITEM
v = save_put(self, EMPTY_DICT, data[1])
if D:
v = v + MARK + b''.join(D) + SETITEMS
return v
def save_reference(self, tag, data):
......@@ -967,14 +944,11 @@ def save_reference(self, tag, data):
prefix = id.rfind('.')
if prefix >= 0:
id = id[prefix + 1:]
if self.binary:
id = int(id)
if id < 256:
return BINGET + six.int2byte(id)
else:
return LONG_BINGET + struct.pack('<i', i)
id = int(id)
if id < 256:
return BINGET + six.int2byte(id)
else:
return GET + repr(id) + b'\n'
return LONG_BINGET + struct.pack('<i', id)
def save_object(self, tag, data):
if len(data)==5:
......@@ -1002,10 +976,7 @@ def save_global(self, tag, data):
def save_persis(self, tag, data):
v = data[2]
if self.binary:
return v + BINPERSID
else:
return PERSID + v
return v + BINPERSID
def save_pickle_start(self, tag, attrs):
return [tag, attrs]
......@@ -1014,7 +985,14 @@ def save_pickle(self, tag, data):
return data[2] + b'.'
def save_none(self, tag, data):
return b'N'
return NONE
def save_bool(self, tag, data):
if data[2] == b'True':
return TRUE
else:
assert data[2] == b'False', data
return FALSE
def save_long(self, tag, data):
return b'L'+data[2]+b'L\012'
......@@ -1046,6 +1024,7 @@ class xmlPickler(NoBlanks, xyap):
'none': save_none,
'int': save_int,
'long': save_long,
'bool': save_bool,
'float': save_float,
'bytes': save_bytes,
'string': save_string,
......
......@@ -24,7 +24,7 @@ import string
import xml.parsers.expat
class xyap:
class xyap(object):
start_handlers = {}
end_handlers = {}
......@@ -57,7 +57,7 @@ class xyap:
top = end[tag](self, tag, top)
append(top)
class NoBlanks:
class NoBlanks(object):
def handle_data(self, data):
if data.strip():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment