Patch #1309009, Fix segfault in pyexpat when the XML document is

in latin_1, but Python incorrectly assumes it is in UTF-8 format Will backport.

Patch #1309009, Fix segfault in pyexpat when the XML document is
in latin_1, but Python incorrectly assumes it is in UTF-8 format Will backport.
484d9a40 · Neal Norwitz · aa93517d · 484d9a40 · 484d9a40 · 484d9a40
Commit 484d9a40 authored Sep 30, 2005 by Neal Norwitz
Show whitespace changes
Inline Side-by-side

Showing with 19 additions and 1 deletion

Lib/test/test_minidom.py Lib/test/test_minidom.py +9 -0

Misc/ACKS Misc/ACKS +1 -0

Misc/NEWS Misc/NEWS +3 -0

Modules/pyexpat.c Modules/pyexpat.c +6 -1

No files found.
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -889,6 +889,15 @@ def testEncodings():
            and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>'
            and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>',
            "testEncodings - encoding EURO SIGN")
+    # Verify that character decoding errors throw exceptions instead of crashing
+    try:
+        doc = parseString('<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
+    except UnicodeDecodeError:
+        pass
+    else:
+        print 'parsing with bad encoding should raise a UnicodeDecodeError'
    doc.unlink()
 class UserDataHandler:

--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -305,6 +305,7 @@ Flemming Kj
 Jiba
 Orjan Johansen
 Simon Johnston
+Evan Jones
 Richard Jones
 Irmen de Jong
 Lucas de Jonge

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -153,6 +153,9 @@ present).
 Extension Modules
 -----------------
+- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1,
+  but Python incorrectly assumes it is in UTF-8 format
 - Fix parse errors in the readline module when compiling without threads.
 - Patch #1288833: Removed thread lock from socket.getaddrinfo on

--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -417,6 +417,9 @@ string_intern(xmlparseobject *self, const char* str)
 {
    PyObject *result = STRING_CONV_FUNC(str);
    PyObject *value;
+    /* result can be NULL if the unicode conversion failed. */
+    if (!result)
+	return result;
    if (!self->intern)
 	return result;
    value = PyDict_GetItem(self->intern, result);
@@ -572,7 +575,9 @@ my_StartElementHandler(void *userData,
                Py_DECREF(v);
            }
        }
-	args = Py_BuildValue("(NN)", string_intern(self, name), container);
+        args = string_intern(self, name);
+        if (args != NULL)
+            args = Py_BuildValue("(NN)", args, container);
        if (args == NULL) {
            Py_DECREF(container);
            return;