Commit bba0673b authored by Julien Muchembled's avatar Julien Muchembled

xml_marshaller: add support for Python 3

parent b8b0a689
......@@ -33,6 +33,6 @@ setup(name=name,
package_dir={'': 'src'},
include_package_data=True,
zip_safe=False,
install_requires=['lxml',],
install_requires=['lxml','six'],
test_suite='xml_marshaller',
)
......@@ -3,7 +3,7 @@ import unittest
import xml_marshaller
from xml_marshaller.xml_marshaller import load, loads, dump, dumps, dump_ns,\
dumps_ns
from StringIO import StringIO
from io import BytesIO
from lxml import etree
import pkg_resources
......@@ -42,13 +42,10 @@ class TestXMLMarhsaller(unittest.TestCase):
# Disabled for xml with namespaces.
# Because URI is not predictable
return
if not isinstance(xml_data, str):
xml_string = xml_data.read()
xml_data.seek(0)
else:
xml_string = xml_data
if not isinstance(xml_data, bytes):
xml_data = xml_data.getvalue().decode("utf-8")
document_tree = etree.fromstring(xml_string)
document_tree = etree.fromstring(xml_data)
is_validated = xmlschema.validate(document_tree)
log = xmlschema.error_log
error = log.last_error
......@@ -57,14 +54,14 @@ class TestXMLMarhsaller(unittest.TestCase):
def test_string_serialisation(self):
"""
"""
data_list = [None, 1, pow(2, 123L), 19.72, 1+5j,
"here is a string & a <fake tag>",
data_list = [None, 1, 1<<123, 19.72, 1+5j,
b"here is a string & a <fake tag>",
(1, 2, 3),
['alpha', 'beta', 'gamma', [None, 1, pow(2, 123L), 19.72,
1+5j, "& a <fake tag>"]],
{'key': 'value', 1: 2},
'éàù^ç'.decode('utf-8'),
set(('a', 1,)),
[b'alpha', b'beta', b'gamma', [None, 1, 1<<123, 19.72,
1+5j, b"& a <fake tag>"]],
{b'key': b'value', 1: 2},
u'éàù^ç',
{b'a', 1},
True,
False,
]
......@@ -80,14 +77,14 @@ class TestXMLMarhsaller(unittest.TestCase):
def test_file_serialisation(self):
"""
"""
data_list = [None, 1, pow(2, 123L), 19.72, 1+5j,
"here is a string & a <fake tag>",
data_list = [None, 1, 1<<123, 19.72, 1+5j,
b"here is a string & a <fake tag>",
(1, 2, 3),
['alpha', 'beta', 'gamma', [None, 1, pow(2, 123L), 19.72,
1+5j, "& a <fake tag>"]],
{'key': 'value', 1: 2},
'éàù^ç'.decode('utf-8'),
set(('a', 1,)),
[b'alpha', b'beta', b'gamma', [None, 1, 1<<123, 19.72,
1+5j, b"& a <fake tag>"]],
{b'key': b'value', 1: 2},
u'éàù^ç',
{b'a', 1},
True,
False,
]
......@@ -96,7 +93,7 @@ class TestXMLMarhsaller(unittest.TestCase):
else:
dumper = dump
for item in data_list:
file_like_object = StringIO()
file_like_object = BytesIO()
dumper(item, file_like_object)
file_like_object.seek(0)
self._checkXML(file_like_object)
......@@ -107,8 +104,8 @@ class TestXMLMarhsaller(unittest.TestCase):
"""
instance = _A()
instance.subobject = _B()
instance.subobject.list_attribute=[None, 1, pow(2, 123L), 19.72, 1+5j,
"here is a string & a <fake tag>"]
instance.subobject.list_attribute=[None, 1, 1<<123, 19.72, 1+5j,
b"here is a string & a <fake tag>"]
instance.self = instance
if self.use_namespace_uri:
......@@ -123,10 +120,10 @@ class TestXMLMarhsaller(unittest.TestCase):
self.assertEquals(instance.subobject.list_attribute,
new_instance.subobject.list_attribute)
c_instance = _C('value1', attr2='value2')
c_instance.attr3 = 'value3'
nested_instance = _C('somevalue', 'someother')
nested_instance.attr3 = "stillanother"
c_instance = _C(b'value1', attr2=b'value2')
c_instance.attr3 = b'value3'
nested_instance = _C(b'somevalue', b'someother')
nested_instance.attr3 = b"stillanother"
c_instance.nested_instance = nested_instance
c_marshalled = dumps(c_instance)
self._checkXML(c_marshalled)
......
......@@ -9,14 +9,15 @@ Patched by Nicolas Delaby nicolas@nexedi.com to support namespaces
# same name, with four functions:
# dump(value, file), load(file)
# dumps(value), loads(string)
from types import ClassType, TypeType
from functools import cmp_to_key
from six import reraise
import sys
from xml.sax import make_parser
from xml.sax.saxutils import escape, unescape
from lxml.sax import ElementTreeContentHandler, saxify
from lxml import etree
from lxml.builder import ElementMaker
from cStringIO import StringIO
from io import BytesIO
# Basic marshaller class, customizable by overriding it and
# changing various attributes and methods.
......@@ -24,12 +25,19 @@ from cStringIO import StringIO
# also be a stupid hack.
MARSHAL_PREFIX = 'marshal'
try:
cmp
except NameError:
cmp = lambda x, y: (x > y) - (x < y)
@cmp_to_key
def version_independent_cmp(a, b):
ta = type(a)
tb = type(b)
if ta is not tb:
return cmp(ta.__name__, tb.__name__)
return cmp(a, b)
a = a[0]
b = b[0]
try:
return cmp(a, b)
except TypeError:
return cmp(repr(a), repr(b))
class Marshaller(object):
......@@ -103,7 +111,7 @@ class Marshaller(object):
def _marshal(self, value, kw):
t = type(value)
i = str(id(value))
if kw.has_key(i):
if i in kw:
return self.m_reference(value, kw)
else:
method_id = 'm_%s' % (type(value).__name__,)
......@@ -119,7 +127,7 @@ class Marshaller(object):
# Utility function, used for types that aren't implemented
def m_unimplemented(self, value, kw):
raise ValueError, ("Marshalling of object %r unimplemented or not supported in this DTD" % value)
raise ValueError("Marshalling of object %r unimplemented or not supported in this DTD" % value)
# The real entry point for marshalling, to handle properly
# and cleanly any root tag or tags necessary for the marshalled
......@@ -137,29 +145,34 @@ class Marshaller(object):
return self.tag_reference(id='i%s' % i)
def m_string(self, value, kw):
return self.tag_string(escape(str(value)))
return self.tag_string(escape(value))
def m_unicode(self, value, kw):
return self.tag_unicode(value)
return self.tag_unicode(escape(value))
# Since Python 2.2, the string type has a name of 'str'
# To avoid having to rewrite all classes that implement m_string
# we delegate m_str to m_string.
def m_str(self, value, kw):
return self.m_string(value, kw)
try:
unicode
except NameError:
def m_str(self, value, kw):
return self.m_unicode(value, kw)
def m_bytes(self, value, kw):
return self.m_string(value.decode('utf-8'), kw)
else:
def m_str(self, value, kw):
return self.m_string(value, kw)
def m_int(self, value, kw):
return self.tag_int(str(value))
return (self.tag_int if -1e24 < value < 1e24 else
self.tag_long)(str(value))
m_long = m_int
def m_float(self, value, kw):
return self.tag_float(str(value))
def m_long(self, value, kw):
value = str(value)
if value[-1] == 'L':
# some Python versions append and 'L'
value = value[:-1]
return self.tag_long(value)
def m_tuple(self, value, kw):
xml_tree = self.tag_tuple()
for elem in value:
......@@ -182,11 +195,10 @@ class Marshaller(object):
kw[str(id(value))] = i
kw[i] = value
xml_tree = self.tag_dictionary(id='i%s' % i)
item_list = value.items()
# Sort the items to allow reproducable results across Python
# versions
item_list.sort(version_independent_cmp)
for key, v in item_list:
for key, v in sorted(value.items(),
key=version_independent_cmp):
xml_tree.append(self._marshal(key, kw))
xml_tree.append(self._marshal(v, kw))
return xml_tree
......@@ -339,8 +351,7 @@ class Unmarshaller(ElementTreeContentHandler):
# Instantiate a new object; unmarshalling isn't thread-safe
# because it modifies attributes on the object.
m = self.__class__()
file_object = StringIO(string)
return m._load(file_object)
return m._load(BytesIO(string))
# Basic unmarshalling routine; it creates a SAX XML parser,
# registers self as the SAX handler, parses it, and returns
......@@ -394,8 +405,7 @@ class Unmarshaller(ElementTreeContentHandler):
ns_uri, local_name = ns_name
sm, em = self.unmarshal_meth[local_name]
if sm is not None:
attrib = {}
[attrib.update({k[1]: v}) for k, v in attrs.items()]
attrib = {k[1]: v for k, v in attrs.items()}
return sm(local_name, attrib)
def characters(self, data):
......@@ -420,14 +430,11 @@ class Unmarshaller(ElementTreeContentHandler):
# "empty" state.
def um_start_root(self, name, attrs):
if self.kw or self.data_stack:
raise ValueError, "root element %s found elsewhere than root" \
% repr(name)
raise ValueError("root element %r found elsewhere than root"
% name)
def um_start_reference(self, name, attrs):
assert attrs.has_key('id')
id = attrs['id']
assert self.kw.has_key(id)
self.data_stack.append(self.kw[id])
self.data_stack.append(self.kw[attrs['id']])
def um_start_generic(self, name, attrs):
self.data_stack.append([])
......@@ -441,7 +448,7 @@ class Unmarshaller(ElementTreeContentHandler):
def um_end_string(self, name):
ds = self.data_stack
value = unescape(''.join(ds[-1]))
if isinstance(value, unicode):
if not isinstance(value, bytes):
value = value.encode('utf-8')
ds[-1] = value
self.accumulating_chars = 0
......@@ -449,7 +456,7 @@ class Unmarshaller(ElementTreeContentHandler):
def um_end_unicode(self, name):
ds = self.data_stack
value = unescape(''.join(ds[-1]))
if not isinstance(value, unicode):
if isinstance(value, bytes):
value = value.decode('utf-8')
ds[-1] = value
self.accumulating_chars = 0
......@@ -463,7 +470,7 @@ class Unmarshaller(ElementTreeContentHandler):
def um_end_long(self, name):
ds = self.data_stack
ds[-1] = ''.join(ds[-1])
ds[-1] = long(ds[-1])
ds[-1] = int(ds[-1])
self.accumulating_chars = 0
def um_end_float(self, name):
......@@ -497,9 +504,8 @@ class Unmarshaller(ElementTreeContentHandler):
def um_start_list(self, name, attrs):
self.data_stack.append(LIST)
L = []
if attrs.has_key('id'):
id = attrs['id']
self.kw[id] = L
if 'id' in attrs:
self.kw[attrs['id']] = L
self.data_stack.append(L)
def um_start_set(self, name, attrs):
......@@ -553,9 +559,8 @@ class Unmarshaller(ElementTreeContentHandler):
def um_start_dictionary(self, name, attrs):
self.data_stack.append(DICT)
d = {}
if attrs.has_key('id'):
id = attrs['id']
self.kw[id] = d
if 'id' in attrs:
self.kw[attrs['id']] = d
self.data_stack.append(d)
def um_end_dictionary(self, name):
......@@ -575,9 +580,8 @@ class Unmarshaller(ElementTreeContentHandler):
module = attrs['module']
classname = attrs['class']
value = _EmptyClass()
if attrs.has_key('id'):
id = attrs['id']
self.kw[id] = value
if 'id' in attrs:
self.kw[attrs['id']] = value
self.data_stack.append(value)
self.data_stack.append(module)
self.data_stack.append(classname)
......@@ -585,15 +589,15 @@ class Unmarshaller(ElementTreeContentHandler):
def um_end_instance(self, name):
value, module, classname, initargs, kw = self.data_stack[-5:]
klass = self.find_class(module, classname)
if (not initargs and (isinstance(klass, (ClassType, TypeType))) and
if (not initargs and isinstance(klass, type) and
not hasattr(klass, '__getinitargs__')):
value = klass()
else:
try:
value = apply(klass, initargs)
except TypeError, err:
raise TypeError, 'in constructor for %s: %s' % (
klass.__name__, str(err)), sys.exc_info()[2]
value = klass(*initargs)
except TypeError as err:
reraise(TypeError, 'in constructor for %s: %s' % (
klass.__name__, err), sys.exc_info()[2])
# Now set the object's attributes from the marshalled dictionary
for k, v in kw.items():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment