Commit 87cfb189 authored by Julien Muchembled's avatar Julien Muchembled

xml_marshaller: stop distinguish unicode and bytes and always return 'str'

This will simplify the porting to Python 3 of software using this library,
and anyway, not-text data could not be stored as is in XML.
parent f91f3e48
......@@ -55,13 +55,13 @@ class TestXMLMarhsaller(unittest.TestCase):
"""
"""
data_list = [None, 1, 1<<123, 19.72, 1+5j,
b"here is a string & a <fake tag>",
"here is a string & a <fake tag>",
(1, 2, 3),
[b'alpha', b'beta', b'gamma', [None, 1, 1<<123, 19.72,
1+5j, b"& a <fake tag>"]],
{b'key': b'value', 1: 2},
['alpha', 'beta', 'gamma', [None, 1, 1<<123, 19.72,
1+5j, "& a <fake tag>"]],
{'key': 'value', 1: 2},
u'éàù^ç',
{b'a', 1},
{'a', 1},
True,
False,
]
......@@ -78,13 +78,13 @@ class TestXMLMarhsaller(unittest.TestCase):
"""
"""
data_list = [None, 1, 1<<123, 19.72, 1+5j,
b"here is a string & a <fake tag>",
"here is a string & a <fake tag>",
(1, 2, 3),
[b'alpha', b'beta', b'gamma', [None, 1, 1<<123, 19.72,
1+5j, b"& a <fake tag>"]],
{b'key': b'value', 1: 2},
['alpha', 'beta', 'gamma', [None, 1, 1<<123, 19.72,
1+5j, "& a <fake tag>"]],
{'key': 'value', 1: 2},
u'éàù^ç',
{b'a', 1},
{'a', 1},
True,
False,
]
......@@ -105,7 +105,7 @@ class TestXMLMarhsaller(unittest.TestCase):
instance = _A()
instance.subobject = _B()
instance.subobject.list_attribute=[None, 1, 1<<123, 19.72, 1+5j,
b"here is a string & a <fake tag>"]
"here is a string & a <fake tag>"]
instance.self = instance
if self.use_namespace_uri:
......@@ -120,10 +120,10 @@ class TestXMLMarhsaller(unittest.TestCase):
self.assertEquals(instance.subobject.list_attribute,
new_instance.subobject.list_attribute)
c_instance = _C(b'value1', attr2=b'value2')
c_instance.attr3 = b'value3'
nested_instance = _C(b'somevalue', b'someother')
nested_instance.attr3 = b"stillanother"
c_instance = _C('value1', attr2='value2')
c_instance.attr3 = 'value3'
nested_instance = _C('somevalue', 'someother')
nested_instance.attr3 = "stillanother"
c_instance.nested_instance = nested_instance
c_marshalled = dumps(c_instance)
self._checkXML(c_marshalled)
......
......@@ -147,22 +147,16 @@ class Marshaller(object):
def m_string(self, value, kw):
return self.tag_string(escape(value))
def m_unicode(self, value, kw):
return self.tag_unicode(escape(value))
# Since Python 2.2, the string type has a name of 'str'
# To avoid having to rewrite all classes that implement m_string
# we delegate m_str to m_string.
try:
unicode
except NameError:
def m_str(self, value, kw):
return self.m_unicode(value, kw)
def m_bytes(self, value, kw):
return self.m_string(value.decode('utf-8'), kw)
else:
def m_str(self, value, kw):
return self.m_string(value, kw)
def m_str(self, value, kw):
return self.m_string(value, kw)
def m_bytes(self, value, kw):
return self.m_string(value.decode('utf-8'), kw)
m_unicode = m_str
def m_int(self, value, kw):
return (self.tag_int if -1e24 < value < 1e24 else
......@@ -290,7 +284,6 @@ class Unmarshaller(ElementTreeContentHandler):
'float': ('um_start_float', 'um_end_float'),
'long': ('um_start_long', 'um_end_long'),
'string': ('um_start_string', 'um_end_string'),
'unicode': ('um_start_unicode', 'um_end_unicode'),
'tuple': ('um_start_tuple', 'um_end_tuple'),
'list': ('um_start_list', 'um_end_list'),
'dictionary': ('um_start_dictionary', 'um_end_dictionary'),
......@@ -302,6 +295,7 @@ class Unmarshaller(ElementTreeContentHandler):
'set': ('um_start_set', 'um_end_set'),
'bool': ('um_start_bool', 'um_end_bool'),
}
unmarshal_meth['unicode'] = unmarshal_meth['string'] # BBB
def __init__(self):
# Find the named methods, and convert them to the actual
......@@ -440,25 +434,14 @@ class Unmarshaller(ElementTreeContentHandler):
self.data_stack.append([])
self.accumulating_chars = 1
um_start_float = um_start_long = um_start_string = um_start_unicode = um_start_generic
um_start_float = um_start_long = um_start_string = um_start_generic
um_start_complex = um_start_code = um_start_none = um_start_generic
um_start_int = um_start_generic
um_start_bool = um_start_generic
def um_end_string(self, name):
ds = self.data_stack
value = unescape(''.join(ds[-1]))
if not isinstance(value, bytes):
value = value.encode('utf-8')
ds[-1] = value
self.accumulating_chars = 0
def um_end_unicode(self, name):
ds = self.data_stack
value = unescape(''.join(ds[-1]))
if isinstance(value, bytes):
value = value.decode('utf-8')
ds[-1] = value
ds[-1] = unescape(''.join(ds[-1]))
self.accumulating_chars = 0
def um_end_int(self, name):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment