Commit 87cfb189 authored by Julien Muchembled's avatar Julien Muchembled

xml_marshaller: stop distinguish unicode and bytes and always return 'str'

This will simplify the porting to Python 3 of software using this library,
and anyway, not-text data could not be stored as is in XML.
parent f91f3e48
...@@ -55,13 +55,13 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -55,13 +55,13 @@ class TestXMLMarhsaller(unittest.TestCase):
""" """
""" """
data_list = [None, 1, 1<<123, 19.72, 1+5j, data_list = [None, 1, 1<<123, 19.72, 1+5j,
b"here is a string & a <fake tag>", "here is a string & a <fake tag>",
(1, 2, 3), (1, 2, 3),
[b'alpha', b'beta', b'gamma', [None, 1, 1<<123, 19.72, ['alpha', 'beta', 'gamma', [None, 1, 1<<123, 19.72,
1+5j, b"& a <fake tag>"]], 1+5j, "& a <fake tag>"]],
{b'key': b'value', 1: 2}, {'key': 'value', 1: 2},
u'éàù^ç', u'éàù^ç',
{b'a', 1}, {'a', 1},
True, True,
False, False,
] ]
...@@ -78,13 +78,13 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -78,13 +78,13 @@ class TestXMLMarhsaller(unittest.TestCase):
""" """
""" """
data_list = [None, 1, 1<<123, 19.72, 1+5j, data_list = [None, 1, 1<<123, 19.72, 1+5j,
b"here is a string & a <fake tag>", "here is a string & a <fake tag>",
(1, 2, 3), (1, 2, 3),
[b'alpha', b'beta', b'gamma', [None, 1, 1<<123, 19.72, ['alpha', 'beta', 'gamma', [None, 1, 1<<123, 19.72,
1+5j, b"& a <fake tag>"]], 1+5j, "& a <fake tag>"]],
{b'key': b'value', 1: 2}, {'key': 'value', 1: 2},
u'éàù^ç', u'éàù^ç',
{b'a', 1}, {'a', 1},
True, True,
False, False,
] ]
...@@ -105,7 +105,7 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -105,7 +105,7 @@ class TestXMLMarhsaller(unittest.TestCase):
instance = _A() instance = _A()
instance.subobject = _B() instance.subobject = _B()
instance.subobject.list_attribute=[None, 1, 1<<123, 19.72, 1+5j, instance.subobject.list_attribute=[None, 1, 1<<123, 19.72, 1+5j,
b"here is a string & a <fake tag>"] "here is a string & a <fake tag>"]
instance.self = instance instance.self = instance
if self.use_namespace_uri: if self.use_namespace_uri:
...@@ -120,10 +120,10 @@ class TestXMLMarhsaller(unittest.TestCase): ...@@ -120,10 +120,10 @@ class TestXMLMarhsaller(unittest.TestCase):
self.assertEquals(instance.subobject.list_attribute, self.assertEquals(instance.subobject.list_attribute,
new_instance.subobject.list_attribute) new_instance.subobject.list_attribute)
c_instance = _C(b'value1', attr2=b'value2') c_instance = _C('value1', attr2='value2')
c_instance.attr3 = b'value3' c_instance.attr3 = 'value3'
nested_instance = _C(b'somevalue', b'someother') nested_instance = _C('somevalue', 'someother')
nested_instance.attr3 = b"stillanother" nested_instance.attr3 = "stillanother"
c_instance.nested_instance = nested_instance c_instance.nested_instance = nested_instance
c_marshalled = dumps(c_instance) c_marshalled = dumps(c_instance)
self._checkXML(c_marshalled) self._checkXML(c_marshalled)
......
...@@ -147,22 +147,16 @@ class Marshaller(object): ...@@ -147,22 +147,16 @@ class Marshaller(object):
def m_string(self, value, kw): def m_string(self, value, kw):
return self.tag_string(escape(value)) return self.tag_string(escape(value))
def m_unicode(self, value, kw):
return self.tag_unicode(escape(value))
# Since Python 2.2, the string type has a name of 'str' # Since Python 2.2, the string type has a name of 'str'
# To avoid having to rewrite all classes that implement m_string # To avoid having to rewrite all classes that implement m_string
# we delegate m_str to m_string. # we delegate m_str to m_string.
try: def m_str(self, value, kw):
unicode return self.m_string(value, kw)
except NameError:
def m_str(self, value, kw): def m_bytes(self, value, kw):
return self.m_unicode(value, kw) return self.m_string(value.decode('utf-8'), kw)
def m_bytes(self, value, kw):
return self.m_string(value.decode('utf-8'), kw) m_unicode = m_str
else:
def m_str(self, value, kw):
return self.m_string(value, kw)
def m_int(self, value, kw): def m_int(self, value, kw):
return (self.tag_int if -1e24 < value < 1e24 else return (self.tag_int if -1e24 < value < 1e24 else
...@@ -290,7 +284,6 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -290,7 +284,6 @@ class Unmarshaller(ElementTreeContentHandler):
'float': ('um_start_float', 'um_end_float'), 'float': ('um_start_float', 'um_end_float'),
'long': ('um_start_long', 'um_end_long'), 'long': ('um_start_long', 'um_end_long'),
'string': ('um_start_string', 'um_end_string'), 'string': ('um_start_string', 'um_end_string'),
'unicode': ('um_start_unicode', 'um_end_unicode'),
'tuple': ('um_start_tuple', 'um_end_tuple'), 'tuple': ('um_start_tuple', 'um_end_tuple'),
'list': ('um_start_list', 'um_end_list'), 'list': ('um_start_list', 'um_end_list'),
'dictionary': ('um_start_dictionary', 'um_end_dictionary'), 'dictionary': ('um_start_dictionary', 'um_end_dictionary'),
...@@ -302,6 +295,7 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -302,6 +295,7 @@ class Unmarshaller(ElementTreeContentHandler):
'set': ('um_start_set', 'um_end_set'), 'set': ('um_start_set', 'um_end_set'),
'bool': ('um_start_bool', 'um_end_bool'), 'bool': ('um_start_bool', 'um_end_bool'),
} }
unmarshal_meth['unicode'] = unmarshal_meth['string'] # BBB
def __init__(self): def __init__(self):
# Find the named methods, and convert them to the actual # Find the named methods, and convert them to the actual
...@@ -440,25 +434,14 @@ class Unmarshaller(ElementTreeContentHandler): ...@@ -440,25 +434,14 @@ class Unmarshaller(ElementTreeContentHandler):
self.data_stack.append([]) self.data_stack.append([])
self.accumulating_chars = 1 self.accumulating_chars = 1
um_start_float = um_start_long = um_start_string = um_start_unicode = um_start_generic um_start_float = um_start_long = um_start_string = um_start_generic
um_start_complex = um_start_code = um_start_none = um_start_generic um_start_complex = um_start_code = um_start_none = um_start_generic
um_start_int = um_start_generic um_start_int = um_start_generic
um_start_bool = um_start_generic um_start_bool = um_start_generic
def um_end_string(self, name): def um_end_string(self, name):
ds = self.data_stack ds = self.data_stack
value = unescape(''.join(ds[-1])) ds[-1] = unescape(''.join(ds[-1]))
if not isinstance(value, bytes):
value = value.encode('utf-8')
ds[-1] = value
self.accumulating_chars = 0
def um_end_unicode(self, name):
ds = self.data_stack
value = unescape(''.join(ds[-1]))
if isinstance(value, bytes):
value = value.decode('utf-8')
ds[-1] = value
self.accumulating_chars = 0 self.accumulating_chars = 0
def um_end_int(self, name): def um_end_int(self, name):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment