Commit 0290c7a8 authored by Antoine Pitrou's avatar Antoine Pitrou

Fix regression on 2-byte wchar_t systems (Windows)

parent 28a08205
...@@ -6252,15 +6252,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, ...@@ -6252,15 +6252,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
end = s + size; end = s + size;
while (s < end) { while (s < end) {
Py_UNICODE uch;
Py_UCS4 ch; Py_UCS4 ch;
/* We copy the raw representation one byte at a time because the /* We copy the raw representation one byte at a time because the
pointer may be unaligned (see test_codeccallbacks). */ pointer may be unaligned (see test_codeccallbacks). */
((char *) &ch)[0] = s[0]; ((char *) &uch)[0] = s[0];
((char *) &ch)[1] = s[1]; ((char *) &uch)[1] = s[1];
#ifdef Py_UNICODE_WIDE #ifdef Py_UNICODE_WIDE
((char *) &ch)[2] = s[2]; ((char *) &uch)[2] = s[2];
((char *) &ch)[3] = s[3]; ((char *) &uch)[3] = s[3];
#endif #endif
ch = uch;
/* We have to sanity check the raw data, otherwise doom looms for /* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */ some malformed UCS-4 data. */
if ( if (
...@@ -6292,10 +6295,12 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, ...@@ -6292,10 +6295,12 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
#ifndef Py_UNICODE_WIDE #ifndef Py_UNICODE_WIDE
if (ch >= 0xD800 && ch <= 0xDBFF && s < end) if (ch >= 0xD800 && ch <= 0xDBFF && s < end)
{ {
Py_UCS4 ch2 = *(Py_UNICODE*)s; Py_UNICODE uch2;
if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) ((char *) &uch2)[0] = s[0];
((char *) &uch2)[1] = s[1];
if (uch2 >= 0xDC00 && uch2 <= 0xDFFF)
{ {
ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; ch = (((uch & 0x3FF)<<10) | (uch2 & 0x3FF)) + 0x10000;
s += Py_UNICODE_SIZE; s += Py_UNICODE_SIZE;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment