Issue #17043: The unicode-internal decoder no longer read past the end of

input buffer.

Issue #17043: The unicode-internal decoder no longer read past the end of
input buffer.
3fd4ab35 · Serhiy Storchaka · df4bb464 · 3fd4ab35 · 3fd4ab35
Commit 3fd4ab35 authored Feb 07, 2013 by Serhiy Storchaka
Show whitespace changes
Inline Side-by-side

Showing with 27 additions and 27 deletions

Misc/NEWS Misc/NEWS +3 -0

Objects/unicodeobject.c Objects/unicodeobject.c +24 -27

No files found.
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2.4
 Core and Builtins
 -----------------

+- Issue #17043: The unicode-internal decoder no longer read past the end of
+  input buffer.
+
 - Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.

 - Issue #10156: In the interpreter's initialization phase, unicode globals

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4392,25 +4392,27 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
    end = s + size;

    while (s < end) {
-        memcpy(p, s, sizeof(Py_UNICODE));
-        /* We have to sanity check the raw data, otherwise doom looms for
-           some malformed UCS-4 data. */
-        if (
-#ifdef Py_UNICODE_WIDE
-            *p > unimax || *p < 0 ||
-#endif
-            end-s < Py_UNICODE_SIZE
-            )
-        {
-            startinpos = s - starts;
        if (end-s < Py_UNICODE_SIZE) {
            endinpos = end-starts;
            reason = "truncated input";
+            goto error;
        }
-            else {
+        memcpy(p, s, sizeof(Py_UNICODE));
+#ifdef Py_UNICODE_WIDE
+        /* We have to sanity check the raw data, otherwise doom looms for
+           some malformed UCS-4 data. */
+        if (*p > unimax || *p < 0) {
            endinpos = s - starts + Py_UNICODE_SIZE;
            reason = "illegal code point (> 0x10FFFF)";
+            goto error;
        }
+#endif
+        p++;
+        s += Py_UNICODE_SIZE;
+        continue;
+
+  error:
+        startinpos = s - starts;
        outpos = p - PyUnicode_AS_UNICODE(v);
        if (unicode_decode_call_errorhandler(
                errors, &errorHandler,
@@ -4420,11 +4422,6 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
            goto onError;
        }
    }
-        else {
-            p++;
-            s += Py_UNICODE_SIZE;
-        }
-    }

    if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
        goto onError;