Commit ac93bc25 authored by Martin v. Löwis's avatar Martin v. Löwis

When decoding UTF-16, don't assume that the buffer is in native endianness

when checking surrogates.
parent 208efe56
......@@ -1065,16 +1065,16 @@ PyObject *PyUnicode_DecodeUTF16(const char *s,
errmsg = "unexpected end of data";
goto utf16Error;
}
if (0xDC00 <= *q && *q <= 0xDFFF) {
if (0xD800 <= ch && ch <= 0xDBFF) {
Py_UCS2 ch2 = *q++;
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
if (bo == 1)
ch = (ch >> 8) | (ch << 8);
ch2 = (ch2 >> 8) | (ch2 << 8);
#else
if (bo == -1)
ch = (ch >> 8) | (ch << 8);
ch2 = (ch2 >> 8) | (ch2 << 8);
#endif
if (0xD800 <= ch && ch <= 0xDBFF) {
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
#if Py_UNICODE_SIZE == 2
/* This is valid data (a UTF-16 surrogate pair), but
we are not able to store this information since our
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment