Commit fa1c85a1 authored by Eli Bendersky's avatar Eli Bendersky

Issue #15296: Fix minidom.toxml/toprettyxml for non-unicode encodings. Patch...

Issue #15296: Fix minidom.toxml/toprettyxml for non-unicode encodings.  Patch by Serhiy Storchaka, with some minor style adjustments by me.
parent c9cda53e
...@@ -147,12 +147,7 @@ module documentation. This section lists the differences between the API and ...@@ -147,12 +147,7 @@ module documentation. This section lists the differences between the API and
the DOM node. the DOM node.
With an explicit *encoding* [1]_ argument, the result is a byte With an explicit *encoding* [1]_ argument, the result is a byte
string in the specified encoding. It is recommended that you string in the specified encoding.
always specify an encoding; you may use any encoding you like, but
an argument of "utf-8" is the most common choice, avoiding
:exc:`UnicodeError` exceptions in case of unrepresentable text
data.
With no *encoding* argument, the result is a Unicode string, and the With no *encoding* argument, the result is a Unicode string, and the
XML declaration in the resulting string does not specify an XML declaration in the resulting string does not specify an
encoding. Encoding this string in an encoding other than UTF-8 is encoding. Encoding this string in an encoding other than UTF-8 is
......
...@@ -1067,6 +1067,11 @@ class MinidomTest(unittest.TestCase): ...@@ -1067,6 +1067,11 @@ class MinidomTest(unittest.TestCase):
b'<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>') b'<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>')
self.assertEqual(doc.toxml('iso-8859-15'), self.assertEqual(doc.toxml('iso-8859-15'),
b'<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>') b'<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>')
self.assertEqual(doc.toxml('us-ascii'),
b'<?xml version="1.0" encoding="us-ascii"?><foo>&#8364;</foo>')
self.assertEqual(doc.toxml('utf-16'),
'<?xml version="1.0" encoding="utf-16"?>'
'<foo>\u20ac</foo>'.encode('utf-16'))
# Verify that character decoding errors throw exceptions instead # Verify that character decoding errors throw exceptions instead
# of crashing # of crashing
......
...@@ -14,7 +14,6 @@ Todo: ...@@ -14,7 +14,6 @@ Todo:
* SAX 2 namespaces * SAX 2 namespaces
""" """
import codecs
import io import io
import xml.dom import xml.dom
...@@ -47,19 +46,22 @@ class Node(xml.dom.Node): ...@@ -47,19 +46,22 @@ class Node(xml.dom.Node):
return self.toprettyxml("", "", encoding) return self.toprettyxml("", "", encoding)
def toprettyxml(self, indent="\t", newl="\n", encoding=None): def toprettyxml(self, indent="\t", newl="\n", encoding=None):
# indent = the indentation string to prepend, per level if encoding is None:
# newl = the newline string to append writer = io.StringIO()
use_encoding = "utf-8" if encoding is None else encoding else:
writer = codecs.getwriter(use_encoding)(io.BytesIO()) writer = io.TextIOWrapper(io.BytesIO(),
encoding=encoding,
errors="xmlcharrefreplace",
newline='\n')
if self.nodeType == Node.DOCUMENT_NODE: if self.nodeType == Node.DOCUMENT_NODE:
# Can pass encoding only to document, to put it into XML header # Can pass encoding only to document, to put it into XML header
self.writexml(writer, "", indent, newl, encoding) self.writexml(writer, "", indent, newl, encoding)
else: else:
self.writexml(writer, "", indent, newl) self.writexml(writer, "", indent, newl)
if encoding is None: if encoding is None:
return writer.stream.getvalue().decode(use_encoding) return writer.getvalue()
else: else:
return writer.stream.getvalue() return writer.detach().getvalue()
def hasChildNodes(self): def hasChildNodes(self):
return bool(self.childNodes) return bool(self.childNodes)
...@@ -1788,12 +1790,12 @@ class Document(Node, DocumentLS): ...@@ -1788,12 +1790,12 @@ class Document(Node, DocumentLS):
raise xml.dom.NotSupportedErr("cannot import document type nodes") raise xml.dom.NotSupportedErr("cannot import document type nodes")
return _clone_node(node, deep, self) return _clone_node(node, deep, self)
def writexml(self, writer, indent="", addindent="", newl="", def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
encoding = None):
if encoding is None: if encoding is None:
writer.write('<?xml version="1.0" ?>'+newl) writer.write('<?xml version="1.0" ?>'+newl)
else: else:
writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl)) writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
encoding, newl))
for node in self.childNodes: for node in self.childNodes:
node.writexml(writer, indent, addindent, newl) node.writexml(writer, indent, addindent, newl)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment