Commit f30bb0eb authored by Martin v. Löwis's avatar Martin v. Löwis

Patch #1762412: Fix test case for struni branch.

parent 3e1f85eb
...@@ -634,16 +634,16 @@ class ElementTree: ...@@ -634,16 +634,16 @@ class ElementTree:
if not encoding: if not encoding:
encoding = "us-ascii" encoding = "us-ascii"
elif encoding != "utf-8" and encoding != "us-ascii": elif encoding != "utf-8" and encoding != "us-ascii":
file.write("<?xml version='1.0' encoding='%s'?>\n" % encoding) file.write(_encode("<?xml version='1.0' encoding='%s'?>\n" % encoding, encoding))
self._write(file, self._root, encoding, {}) self._write(file, self._root, encoding, {})
def _write(self, file, node, encoding, namespaces): def _write(self, file, node, encoding, namespaces):
# write XML to file # write XML to file
tag = node.tag tag = node.tag
if tag is Comment: if tag is Comment:
file.write("<!-- %s -->" % _escape_cdata(node.text, encoding)) file.write(_encode("<!-- %s -->" % _escape_cdata(node.text), encoding))
elif tag is ProcessingInstruction: elif tag is ProcessingInstruction:
file.write("<?%s?>" % _escape_cdata(node.text, encoding)) file.write(_encode("<?%s?>" % _escape_cdata(node.text), encoding))
else: else:
items = list(node.items()) items = list(node.items())
xmlns_items = [] # new namespaces in this scope xmlns_items = [] # new namespaces in this scope
...@@ -653,7 +653,7 @@ class ElementTree: ...@@ -653,7 +653,7 @@ class ElementTree:
if xmlns: xmlns_items.append(xmlns) if xmlns: xmlns_items.append(xmlns)
except TypeError: except TypeError:
_raise_serialization_error(tag) _raise_serialization_error(tag)
file.write("<" + _encode(tag, encoding)) file.write(_encode("<" + tag, encoding))
if items or xmlns_items: if items or xmlns_items:
items.sort() # lexical order items.sort() # lexical order
for k, v in items: for k, v in items:
...@@ -669,24 +669,22 @@ class ElementTree: ...@@ -669,24 +669,22 @@ class ElementTree:
if xmlns: xmlns_items.append(xmlns) if xmlns: xmlns_items.append(xmlns)
except TypeError: except TypeError:
_raise_serialization_error(v) _raise_serialization_error(v)
file.write(" %s=\"%s\"" % (_encode(k, encoding), file.write(_encode(" %s=\"%s\"" % (k, _escape_attrib(v)), encoding))
_escape_attrib(v, encoding)))
for k, v in xmlns_items: for k, v in xmlns_items:
file.write(" %s=\"%s\"" % (_encode(k, encoding), file.write(_encode(" %s=\"%s\"" % (k, _escape_attrib(v)), encoding))
_escape_attrib(v, encoding)))
if node.text or len(node): if node.text or len(node):
file.write(">") file.write(_encode(">", encoding))
if node.text: if node.text:
file.write(_escape_cdata(node.text, encoding)) file.write(_encode(_escape_cdata(node.text), encoding))
for n in node: for n in node:
self._write(file, n, encoding, namespaces) self._write(file, n, encoding, namespaces)
file.write("</" + _encode(tag, encoding) + ">") file.write(_encode("</" + tag + ">", encoding))
else: else:
file.write(" />") file.write(_encode(" />", encoding))
for k, v in xmlns_items: for k, v in xmlns_items:
del namespaces[v] del namespaces[v]
if node.tail: if node.tail:
file.write(_escape_cdata(node.tail, encoding)) file.write(_encode(_escape_cdata(node.tail), encoding))
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# helpers # helpers
...@@ -727,10 +725,7 @@ def _encode(s, encoding): ...@@ -727,10 +725,7 @@ def _encode(s, encoding):
except AttributeError: except AttributeError:
return s # 1.5.2: assume the string uses the right encoding return s # 1.5.2: assume the string uses the right encoding
if sys.version[:3] == "1.5": _escape = re.compile(r"[&<>\"\u0080-\uffff]+")
_escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2
else:
_escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"'))
_escape_map = { _escape_map = {
"&": "&amp;", "&": "&amp;",
...@@ -772,14 +767,9 @@ def _encode_entity(text, pattern=_escape): ...@@ -772,14 +767,9 @@ def _encode_entity(text, pattern=_escape):
# the following functions assume an ascii-compatible encoding # the following functions assume an ascii-compatible encoding
# (or "utf-16") # (or "utf-16")
def _escape_cdata(text, encoding=None): def _escape_cdata(text):
# escape character data # escape character data
try: try:
if encoding:
try:
text = _encode(text, encoding)
except UnicodeError:
return _encode_entity(text)
text = text.replace("&", "&amp;") text = text.replace("&", "&amp;")
text = text.replace("<", "&lt;") text = text.replace("<", "&lt;")
text = text.replace(">", "&gt;") text = text.replace(">", "&gt;")
...@@ -787,14 +777,9 @@ def _escape_cdata(text, encoding=None): ...@@ -787,14 +777,9 @@ def _escape_cdata(text, encoding=None):
except (TypeError, AttributeError): except (TypeError, AttributeError):
_raise_serialization_error(text) _raise_serialization_error(text)
def _escape_attrib(text, encoding=None): def _escape_attrib(text):
# escape attribute value # escape attribute value
try: try:
if encoding:
try:
text = _encode(text, encoding)
except UnicodeError:
return _encode_entity(text)
text = text.replace("&", "&amp;") text = text.replace("&", "&amp;")
text = text.replace("'", "&apos;") # FIXME: overkill text = text.replace("'", "&apos;") # FIXME: overkill
text = text.replace("\"", "&quot;") text = text.replace("\"", "&quot;")
...@@ -982,7 +967,7 @@ def tostring(element, encoding=None): ...@@ -982,7 +967,7 @@ def tostring(element, encoding=None):
file = dummy() file = dummy()
file.write = data.append file.write = data.append
ElementTree(element).write(file, encoding) ElementTree(element).write(file, encoding)
return "".join(data) return b"".join(data)
## ##
# Generic element structure builder. This builder converts a sequence # Generic element structure builder. This builder converts a sequence
...@@ -1114,20 +1099,11 @@ class XMLTreeBuilder: ...@@ -1114,20 +1099,11 @@ class XMLTreeBuilder:
parser.StartElementHandler = self._start_list parser.StartElementHandler = self._start_list
except AttributeError: except AttributeError:
pass pass
encoding = None encoding = "utf-8"
if not parser.returns_unicode:
encoding = "utf-8"
# target.xml(encoding, None) # target.xml(encoding, None)
self._doctype = None self._doctype = None
self.entity = {} self.entity = {}
def _fixtext(self, text):
# convert text string to ascii, if possible
try:
return _encode(text, "ascii")
except UnicodeError:
return text
def _fixname(self, key): def _fixname(self, key):
# expand qname, and convert name string to ascii, if possible # expand qname, and convert name string to ascii, if possible
try: try:
...@@ -1136,7 +1112,7 @@ class XMLTreeBuilder: ...@@ -1136,7 +1112,7 @@ class XMLTreeBuilder:
name = key name = key
if "}" in name: if "}" in name:
name = "{" + name name = "{" + name
self._names[key] = name = self._fixtext(name) self._names[key] = name
return name return name
def _start(self, tag, attrib_in): def _start(self, tag, attrib_in):
...@@ -1144,7 +1120,7 @@ class XMLTreeBuilder: ...@@ -1144,7 +1120,7 @@ class XMLTreeBuilder:
tag = fixname(tag) tag = fixname(tag)
attrib = {} attrib = {}
for key, value in attrib_in.items(): for key, value in attrib_in.items():
attrib[fixname(key)] = self._fixtext(value) attrib[fixname(key)] = value
return self._target.start(tag, attrib) return self._target.start(tag, attrib)
def _start_list(self, tag, attrib_in): def _start_list(self, tag, attrib_in):
...@@ -1153,11 +1129,11 @@ class XMLTreeBuilder: ...@@ -1153,11 +1129,11 @@ class XMLTreeBuilder:
attrib = {} attrib = {}
if attrib_in: if attrib_in:
for i in range(0, len(attrib_in), 2): for i in range(0, len(attrib_in), 2):
attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1]) attrib[fixname(attrib_in[i])] = attrib_in[i+1]
return self._target.start(tag, attrib) return self._target.start(tag, attrib)
def _data(self, text): def _data(self, text):
return self._target.data(self._fixtext(text)) return self._target.data(text)
def _end(self, tag): def _end(self, tag):
return self._target.end(self._fixname(tag)) return self._target.end(self._fixname(tag))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment