Issue #17043: The unicode-internal decoder no longer read past the end of

input buffer.

Issue #17043: The unicode-internal decoder no longer read past the end of
input buffer.
3fd4ab35 · Serhiy Storchaka · df4bb464 · 3fd4ab35 · 3fd4ab35
Commit 3fd4ab35 authored Feb 07, 2013 by Serhiy Storchaka
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 27 deletions

Misc/NEWS Misc/NEWS +3 -0

Objects/unicodeobject.c Objects/unicodeobject.c +24 -27

No files found.
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2.4
 Core and Builtins
 -----------------
+- Issue #17043: The unicode-internal decoder no longer read past the end of
+  input buffer.
 - Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.
 - Issue #10156: In the interpreter's initialization phase, unicode globals

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4392,37 +4392,34 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
    end = s + size;
    while (s < end) {
+        if (end-s < Py_UNICODE_SIZE) {
+            endinpos = end-starts;
+            reason = "truncated input";
+            goto error;
+        }
        memcpy(p, s, sizeof(Py_UNICODE));
+#ifdef Py_UNICODE_WIDE
        /* We have to sanity check the raw data, otherwise doom looms for
           some malformed UCS-4 data. */
-        if (
+        if (*p > unimax || *p < 0) {
-#ifdef Py_UNICODE_WIDE
+            endinpos = s - starts + Py_UNICODE_SIZE;
-            *p > unimax || *p < 0 ||
+            reason = "illegal code point (> 0x10FFFF)";
-#endif
+            goto error;
-            end-s < Py_UNICODE_SIZE
-            )
-        {
-            startinpos = s - starts;
-            if (end-s < Py_UNICODE_SIZE) {
-                endinpos = end-starts;
-                reason = "truncated input";
-            }
-            else {
-                endinpos = s - starts + Py_UNICODE_SIZE;
-                reason = "illegal code point (> 0x10FFFF)";
-            }
-            outpos = p - PyUnicode_AS_UNICODE(v);
-            if (unicode_decode_call_errorhandler(
-                    errors, &errorHandler,
-                    "unicode_internal", reason,
-                    &starts, &end, &startinpos, &endinpos, &exc, &s,
-                    &v, &outpos, &p)) {
-                goto onError;
-            }
        }
-        else {
+#endif
-            p++;
+        p++;
-            s += Py_UNICODE_SIZE;
+        s += Py_UNICODE_SIZE;
+        continue;
+  error:
+        startinpos = s - starts;
+        outpos = p - PyUnicode_AS_UNICODE(v);
+        if (unicode_decode_call_errorhandler(
+                errors, &errorHandler,
+                "unicode_internal", reason,
+                &starts, &end, &startinpos, &endinpos, &exc, &s,
+                &v, &outpos, &p)) {
+            goto onError;
        }
    }