Commit bba0673b authored by Julien Muchembled's avatar Julien Muchembled

xml_marshaller: add support for Python 3

parent b8b0a689
...@@ -33,6 +33,6 @@ setup(name=name, ...@@ -33,6 +33,6 @@ setup(name=name,
package_dir={'': 'src'}, package_dir={'': 'src'},
include_package_data=True, include_package_data=True,
zip_safe=False, zip_safe=False,
install_requires=['lxml',], install_requires=['lxml','six'],
test_suite='xml_marshaller', test_suite='xml_marshaller',
) )
...@@ -3,7 +3,7 @@ import unittest ...@@ -3,7 +3,7 @@ import unittest
import xml_marshaller import xml_marshaller
from xml_marshaller.xml_marshaller import load, loads, dump, dumps, dump_ns,\ from xml_marshaller.xml_marshaller import load, loads, dump, dumps, dump_ns,\
dumps_ns dumps_ns
from StringIO import StringIO from io import BytesIO
from lxml import etree from lxml import etree
import pkg_resources import pkg_resources
...@@ -42,13 +42,10 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -42,13 +42,10 @@ class TestXMLMarhsaller(unittest.TestCase):
# Disabled for xml with namespaces. # Disabled for xml with namespaces.
# Because URI is not predictable # Because URI is not predictable
return return
if not isinstance(xml_data, str): if not isinstance(xml_data, bytes):
xml_string = xml_data.read() xml_data = xml_data.getvalue().decode("utf-8")
xml_data.seek(0)
else:
xml_string = xml_data
document_tree = etree.fromstring(xml_string) document_tree = etree.fromstring(xml_data)
is_validated = xmlschema.validate(document_tree) is_validated = xmlschema.validate(document_tree)
log = xmlschema.error_log log = xmlschema.error_log
error = log.last_error error = log.last_error
...@@ -57,14 +54,14 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -57,14 +54,14 @@ class TestXMLMarhsaller(unittest.TestCase):
def test_string_serialisation(self): def test_string_serialisation(self):
""" """
""" """
data_list = [None, 1, pow(2, 123L), 19.72, 1+5j, data_list = [None, 1, 1<<123, 19.72, 1+5j,
"here is a string & a <fake tag>", b"here is a string & a <fake tag>",
(1, 2, 3), (1, 2, 3),
['alpha', 'beta', 'gamma', [None, 1, pow(2, 123L), 19.72, [b'alpha', b'beta', b'gamma', [None, 1, 1<<123, 19.72,
1+5j, "& a <fake tag>"]], 1+5j, b"& a <fake tag>"]],
{'key': 'value', 1: 2}, {b'key': b'value', 1: 2},
'éàù^ç'.decode('utf-8'), u'éàù^ç',
set(('a', 1,)), {b'a', 1},
True, True,
False, False,
] ]
...@@ -80,14 +77,14 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -80,14 +77,14 @@ class TestXMLMarhsaller(unittest.TestCase):
def test_file_serialisation(self): def test_file_serialisation(self):
""" """
""" """
data_list = [None, 1, pow(2, 123L), 19.72, 1+5j, data_list = [None, 1, 1<<123, 19.72, 1+5j,
"here is a string & a <fake tag>", b"here is a string & a <fake tag>",
(1, 2, 3), (1, 2, 3),
['alpha', 'beta', 'gamma', [None, 1, pow(2, 123L), 19.72, [b'alpha', b'beta', b'gamma', [None, 1, 1<<123, 19.72,
1+5j, "& a <fake tag>"]], 1+5j, b"& a <fake tag>"]],
{'key': 'value', 1: 2}, {b'key': b'value', 1: 2},
'éàù^ç'.decode('utf-8'), u'éàù^ç',
set(('a', 1,)), {b'a', 1},
True, True,
False, False,
] ]
...@@ -96,7 +93,7 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -96,7 +93,7 @@ class TestXMLMarhsaller(unittest.TestCase):
else: else:
dumper = dump dumper = dump
for item in data_list: for item in data_list:
file_like_object = StringIO() file_like_object = BytesIO()
dumper(item, file_like_object) dumper(item, file_like_object)
file_like_object.seek(0) file_like_object.seek(0)
self._checkXML(file_like_object) self._checkXML(file_like_object)
...@@ -107,8 +104,8 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -107,8 +104,8 @@ class TestXMLMarhsaller(unittest.TestCase):
""" """
instance = _A() instance = _A()
instance.subobject = _B() instance.subobject = _B()
instance.subobject.list_attribute=[None, 1, pow(2, 123L), 19.72, 1+5j, instance.subobject.list_attribute=[None, 1, 1<<123, 19.72, 1+5j,
"here is a string & a <fake tag>"] b"here is a string & a <fake tag>"]
instance.self = instance instance.self = instance
if self.use_namespace_uri: if self.use_namespace_uri:
...@@ -123,10 +120,10 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -123,10 +120,10 @@ class TestXMLMarhsaller(unittest.TestCase):
self.assertEquals(instance.subobject.list_attribute, self.assertEquals(instance.subobject.list_attribute,
new_instance.subobject.list_attribute) new_instance.subobject.list_attribute)
c_instance = _C('value1', attr2='value2') c_instance = _C(b'value1', attr2=b'value2')
c_instance.attr3 = 'value3' c_instance.attr3 = b'value3'
nested_instance = _C('somevalue', 'someother') nested_instance = _C(b'somevalue', b'someother')
nested_instance.attr3 = "stillanother" nested_instance.attr3 = b"stillanother"
c_instance.nested_instance = nested_instance c_instance.nested_instance = nested_instance
c_marshalled = dumps(c_instance) c_marshalled = dumps(c_instance)
self._checkXML(c_marshalled) self._checkXML(c_marshalled)
......
...@@ -9,14 +9,15 @@ Patched by Nicolas Delaby nicolas@nexedi.com to support namespaces ...@@ -9,14 +9,15 @@ Patched by Nicolas Delaby nicolas@nexedi.com to support namespaces
# same name, with four functions: # same name, with four functions:
# dump(value, file), load(file) # dump(value, file), load(file)
# dumps(value), loads(string) # dumps(value), loads(string)
from types import ClassType, TypeType from functools import cmp_to_key
from six import reraise
import sys import sys
from xml.sax import make_parser from xml.sax import make_parser
from xml.sax.saxutils import escape, unescape from xml.sax.saxutils import escape, unescape
from lxml.sax import ElementTreeContentHandler, saxify from lxml.sax import ElementTreeContentHandler, saxify
from lxml import etree from lxml import etree
from lxml.builder import ElementMaker from lxml.builder import ElementMaker
from cStringIO import StringIO from io import BytesIO
# Basic marshaller class, customizable by overriding it and # Basic marshaller class, customizable by overriding it and
# changing various attributes and methods. # changing various attributes and methods.
...@@ -24,12 +25,19 @@ from cStringIO import StringIO ...@@ -24,12 +25,19 @@ from cStringIO import StringIO
# also be a stupid hack. # also be a stupid hack.
MARSHAL_PREFIX = 'marshal' MARSHAL_PREFIX = 'marshal'
try:
cmp
except NameError:
cmp = lambda x, y: (x > y) - (x < y)
@cmp_to_key
def version_independent_cmp(a, b): def version_independent_cmp(a, b):
ta = type(a) a = a[0]
tb = type(b) b = b[0]
if ta is not tb: try:
return cmp(ta.__name__, tb.__name__) return cmp(a, b)
return cmp(a, b) except TypeError:
return cmp(repr(a), repr(b))
class Marshaller(object): class Marshaller(object):
...@@ -103,7 +111,7 @@ class Marshaller(object): ...@@ -103,7 +111,7 @@ class Marshaller(object):
def _marshal(self, value, kw): def _marshal(self, value, kw):
t = type(value) t = type(value)
i = str(id(value)) i = str(id(value))
if kw.has_key(i): if i in kw:
return self.m_reference(value, kw) return self.m_reference(value, kw)
else: else:
method_id = 'm_%s' % (type(value).__name__,) method_id = 'm_%s' % (type(value).__name__,)
...@@ -119,7 +127,7 @@ class Marshaller(object): ...@@ -119,7 +127,7 @@ class Marshaller(object):
# Utility function, used for types that aren't implemented # Utility function, used for types that aren't implemented
def m_unimplemented(self, value, kw): def m_unimplemented(self, value, kw):
raise ValueError, ("Marshalling of object %r unimplemented or not supported in this DTD" % value) raise ValueError("Marshalling of object %r unimplemented or not supported in this DTD" % value)
# The real entry point for marshalling, to handle properly # The real entry point for marshalling, to handle properly
# and cleanly any root tag or tags necessary for the marshalled # and cleanly any root tag or tags necessary for the marshalled
...@@ -137,29 +145,34 @@ class Marshaller(object): ...@@ -137,29 +145,34 @@ class Marshaller(object):
return self.tag_reference(id='i%s' % i) return self.tag_reference(id='i%s' % i)
def m_string(self, value, kw): def m_string(self, value, kw):
return self.tag_string(escape(str(value))) return self.tag_string(escape(value))
def m_unicode(self, value, kw): def m_unicode(self, value, kw):
return self.tag_unicode(value) return self.tag_unicode(escape(value))
# Since Python 2.2, the string type has a name of 'str' # Since Python 2.2, the string type has a name of 'str'
# To avoid having to rewrite all classes that implement m_string # To avoid having to rewrite all classes that implement m_string
# we delegate m_str to m_string. # we delegate m_str to m_string.
def m_str(self, value, kw): try:
return self.m_string(value, kw) unicode
except NameError:
def m_str(self, value, kw):
return self.m_unicode(value, kw)
def m_bytes(self, value, kw):
return self.m_string(value.decode('utf-8'), kw)
else:
def m_str(self, value, kw):
return self.m_string(value, kw)
def m_int(self, value, kw): def m_int(self, value, kw):
return self.tag_int(str(value)) return (self.tag_int if -1e24 < value < 1e24 else
self.tag_long)(str(value))
m_long = m_int
def m_float(self, value, kw): def m_float(self, value, kw):
return self.tag_float(str(value)) return self.tag_float(str(value))
def m_long(self, value, kw):
value = str(value)
if value[-1] == 'L':
# some Python versions append and 'L'
value = value[:-1]
return self.tag_long(value)
def m_tuple(self, value, kw): def m_tuple(self, value, kw):
xml_tree = self.tag_tuple() xml_tree = self.tag_tuple()
for elem in value: for elem in value:
...@@ -182,11 +195,10 @@ class Marshaller(object): ...@@ -182,11 +195,10 @@ class Marshaller(object):
kw[str(id(value))] = i kw[str(id(value))] = i
kw[i] = value kw[i] = value
xml_tree = self.tag_dictionary(id='i%s' % i) xml_tree = self.tag_dictionary(id='i%s' % i)
item_list = value.items()
# Sort the items to allow reproducable results across Python # Sort the items to allow reproducable results across Python
# versions # versions
item_list.sort(version_independent_cmp) for key, v in sorted(value.items(),
for key, v in item_list: key=version_independent_cmp):
xml_tree.append(self._marshal(key, kw)) xml_tree.append(self._marshal(key, kw))
xml_tree.append(self._marshal(v, kw)) xml_tree.append(self._marshal(v, kw))
return xml_tree return xml_tree
...@@ -339,8 +351,7 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -339,8 +351,7 @@ class Unmarshaller(ElementTreeContentHandler):
# Instantiate a new object; unmarshalling isn't thread-safe # Instantiate a new object; unmarshalling isn't thread-safe
# because it modifies attributes on the object. # because it modifies attributes on the object.
m = self.__class__() m = self.__class__()
file_object = StringIO(string) return m._load(BytesIO(string))
return m._load(file_object)
# Basic unmarshalling routine; it creates a SAX XML parser, # Basic unmarshalling routine; it creates a SAX XML parser,
# registers self as the SAX handler, parses it, and returns # registers self as the SAX handler, parses it, and returns
...@@ -394,8 +405,7 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -394,8 +405,7 @@ class Unmarshaller(ElementTreeContentHandler):
ns_uri, local_name = ns_name ns_uri, local_name = ns_name
sm, em = self.unmarshal_meth[local_name] sm, em = self.unmarshal_meth[local_name]
if sm is not None: if sm is not None:
attrib = {} attrib = {k[1]: v for k, v in attrs.items()}
[attrib.update({k[1]: v}) for k, v in attrs.items()]
return sm(local_name, attrib) return sm(local_name, attrib)
def characters(self, data): def characters(self, data):
...@@ -420,14 +430,11 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -420,14 +430,11 @@ class Unmarshaller(ElementTreeContentHandler):
# "empty" state. # "empty" state.
def um_start_root(self, name, attrs): def um_start_root(self, name, attrs):
if self.kw or self.data_stack: if self.kw or self.data_stack:
raise ValueError, "root element %s found elsewhere than root" \ raise ValueError("root element %r found elsewhere than root"
% repr(name) % name)
def um_start_reference(self, name, attrs): def um_start_reference(self, name, attrs):
assert attrs.has_key('id') self.data_stack.append(self.kw[attrs['id']])
id = attrs['id']
assert self.kw.has_key(id)
self.data_stack.append(self.kw[id])
def um_start_generic(self, name, attrs): def um_start_generic(self, name, attrs):
self.data_stack.append([]) self.data_stack.append([])
...@@ -441,7 +448,7 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -441,7 +448,7 @@ class Unmarshaller(ElementTreeContentHandler):
def um_end_string(self, name): def um_end_string(self, name):
ds = self.data_stack ds = self.data_stack
value = unescape(''.join(ds[-1])) value = unescape(''.join(ds[-1]))
if isinstance(value, unicode): if not isinstance(value, bytes):
value = value.encode('utf-8') value = value.encode('utf-8')
ds[-1] = value ds[-1] = value
self.accumulating_chars = 0 self.accumulating_chars = 0
...@@ -449,7 +456,7 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -449,7 +456,7 @@ class Unmarshaller(ElementTreeContentHandler):
def um_end_unicode(self, name): def um_end_unicode(self, name):
ds = self.data_stack ds = self.data_stack
value = unescape(''.join(ds[-1])) value = unescape(''.join(ds[-1]))
if not isinstance(value, unicode): if isinstance(value, bytes):
value = value.decode('utf-8') value = value.decode('utf-8')
ds[-1] = value ds[-1] = value
self.accumulating_chars = 0 self.accumulating_chars = 0
...@@ -463,7 +470,7 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -463,7 +470,7 @@ class Unmarshaller(ElementTreeContentHandler):
def um_end_long(self, name): def um_end_long(self, name):
ds = self.data_stack ds = self.data_stack
ds[-1] = ''.join(ds[-1]) ds[-1] = ''.join(ds[-1])
ds[-1] = long(ds[-1]) ds[-1] = int(ds[-1])
self.accumulating_chars = 0 self.accumulating_chars = 0
def um_end_float(self, name): def um_end_float(self, name):
...@@ -497,9 +504,8 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -497,9 +504,8 @@ class Unmarshaller(ElementTreeContentHandler):
def um_start_list(self, name, attrs): def um_start_list(self, name, attrs):
self.data_stack.append(LIST) self.data_stack.append(LIST)
L = [] L = []
if attrs.has_key('id'): if 'id' in attrs:
id = attrs['id'] self.kw[attrs['id']] = L
self.kw[id] = L
self.data_stack.append(L) self.data_stack.append(L)
def um_start_set(self, name, attrs): def um_start_set(self, name, attrs):
...@@ -553,9 +559,8 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -553,9 +559,8 @@ class Unmarshaller(ElementTreeContentHandler):
def um_start_dictionary(self, name, attrs): def um_start_dictionary(self, name, attrs):
self.data_stack.append(DICT) self.data_stack.append(DICT)
d = {} d = {}
if attrs.has_key('id'): if 'id' in attrs:
id = attrs['id'] self.kw[attrs['id']] = d
self.kw[id] = d
self.data_stack.append(d) self.data_stack.append(d)
def um_end_dictionary(self, name): def um_end_dictionary(self, name):
...@@ -575,9 +580,8 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -575,9 +580,8 @@ class Unmarshaller(ElementTreeContentHandler):
module = attrs['module'] module = attrs['module']
classname = attrs['class'] classname = attrs['class']
value = _EmptyClass() value = _EmptyClass()
if attrs.has_key('id'): if 'id' in attrs:
id = attrs['id'] self.kw[attrs['id']] = value
self.kw[id] = value
self.data_stack.append(value) self.data_stack.append(value)
self.data_stack.append(module) self.data_stack.append(module)
self.data_stack.append(classname) self.data_stack.append(classname)
...@@ -585,15 +589,15 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -585,15 +589,15 @@ class Unmarshaller(ElementTreeContentHandler):
def um_end_instance(self, name): def um_end_instance(self, name):
value, module, classname, initargs, kw = self.data_stack[-5:] value, module, classname, initargs, kw = self.data_stack[-5:]
klass = self.find_class(module, classname) klass = self.find_class(module, classname)
if (not initargs and (isinstance(klass, (ClassType, TypeType))) and if (not initargs and isinstance(klass, type) and
not hasattr(klass, '__getinitargs__')): not hasattr(klass, '__getinitargs__')):
value = klass() value = klass()
else: else:
try: try:
value = apply(klass, initargs) value = klass(*initargs)
except TypeError, err: except TypeError as err:
raise TypeError, 'in constructor for %s: %s' % ( reraise(TypeError, 'in constructor for %s: %s' % (
klass.__name__, str(err)), sys.exc_info()[2] klass.__name__, err), sys.exc_info()[2])
# Now set the object's attributes from the marshalled dictionary # Now set the object's attributes from the marshalled dictionary
for k, v in kw.items(): for k, v in kw.items():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment