Fix regression on 2-byte wchar_t systems (Windows)

0290c7a8 · Antoine Pitrou · 28a08205 · 0290c7a8
Commit 0290c7a8 authored Nov 11, 2011 by Antoine Pitrou
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 7 deletions

Objects/unicodeobject.c Objects/unicodeobject.c +12 -7

No files found.
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6252,15 +6252,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
    end = s + size;
    while (s < end) {
+        Py_UNICODE uch;
        Py_UCS4 ch;
        /* We copy the raw representation one byte at a time because the
           pointer may be unaligned (see test_codeccallbacks). */
-        ((char *) &ch)[0] = s[0];
+        ((char *) &uch)[0] = s[0];
-        ((char *) &ch)[1] = s[1];
+        ((char *) &uch)[1] = s[1];
 #ifdef Py_UNICODE_WIDE
-        ((char *) &ch)[2] = s[2];
+        ((char *) &uch)[2] = s[2];
-        ((char *) &ch)[3] = s[3];
+        ((char *) &uch)[3] = s[3];
 #endif
+        ch = uch;
        /* We have to sanity check the raw data, otherwise doom looms for
           some malformed UCS-4 data. */
        if (
@@ -6292,10 +6295,12 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
 #ifndef Py_UNICODE_WIDE
        if (ch >= 0xD800 && ch <= 0xDBFF && s < end)
        {
-            Py_UCS4 ch2 = *(Py_UNICODE*)s;
+            Py_UNICODE uch2;
-            if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
+            ((char *) &uch2)[0] = s[0];
+            ((char *) &uch2)[1] = s[1];
+            if (uch2 >= 0xDC00 && uch2 <= 0xDFFF)
            {
-                ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
+                ch = (((uch & 0x3FF)<<10) | (uch2 & 0x3FF)) + 0x10000;
                s += Py_UNICODE_SIZE;
            }
        }