Commit 484d9a40 authored by Neal Norwitz's avatar Neal Norwitz

Patch #1309009, Fix segfault in pyexpat when the XML document is

in latin_1, but Python incorrectly assumes it is in UTF-8 format

Will backport.
parent aa93517d
...@@ -889,6 +889,15 @@ def testEncodings(): ...@@ -889,6 +889,15 @@ def testEncodings():
and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>' and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>'
and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>', and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>',
"testEncodings - encoding EURO SIGN") "testEncodings - encoding EURO SIGN")
# Verify that character decoding errors throw exceptions instead of crashing
try:
doc = parseString('<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
except UnicodeDecodeError:
pass
else:
print 'parsing with bad encoding should raise a UnicodeDecodeError'
doc.unlink() doc.unlink()
class UserDataHandler: class UserDataHandler:
......
...@@ -305,6 +305,7 @@ Flemming Kj ...@@ -305,6 +305,7 @@ Flemming Kj
Jiba Jiba
Orjan Johansen Orjan Johansen
Simon Johnston Simon Johnston
Evan Jones
Richard Jones Richard Jones
Irmen de Jong Irmen de Jong
Lucas de Jonge Lucas de Jonge
......
...@@ -153,6 +153,9 @@ present). ...@@ -153,6 +153,9 @@ present).
Extension Modules Extension Modules
----------------- -----------------
- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1,
but Python incorrectly assumes it is in UTF-8 format
- Fix parse errors in the readline module when compiling without threads. - Fix parse errors in the readline module when compiling without threads.
- Patch #1288833: Removed thread lock from socket.getaddrinfo on - Patch #1288833: Removed thread lock from socket.getaddrinfo on
......
...@@ -417,6 +417,9 @@ string_intern(xmlparseobject *self, const char* str) ...@@ -417,6 +417,9 @@ string_intern(xmlparseobject *self, const char* str)
{ {
PyObject *result = STRING_CONV_FUNC(str); PyObject *result = STRING_CONV_FUNC(str);
PyObject *value; PyObject *value;
/* result can be NULL if the unicode conversion failed. */
if (!result)
return result;
if (!self->intern) if (!self->intern)
return result; return result;
value = PyDict_GetItem(self->intern, result); value = PyDict_GetItem(self->intern, result);
...@@ -572,7 +575,9 @@ my_StartElementHandler(void *userData, ...@@ -572,7 +575,9 @@ my_StartElementHandler(void *userData,
Py_DECREF(v); Py_DECREF(v);
} }
} }
args = Py_BuildValue("(NN)", string_intern(self, name), container); args = string_intern(self, name);
if (args != NULL)
args = Py_BuildValue("(NN)", args, container);
if (args == NULL) { if (args == NULL) {
Py_DECREF(container); Py_DECREF(container);
return; return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment