bounds check for bad data (thanks amaury)

45c41494 · Philip Jenvey · a20879ff · 45c41494 · 45c41494
Commit 45c41494 authored Oct 26, 2012 by Philip Jenvey
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 3 deletions

Lib/test/test_codecs.py Lib/test/test_codecs.py +2 -0

Python/codecs.c Python/codecs.c +4 -3

No files found.
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -645,6 +645,8 @@ class UTF8Test(ReadTest):
        self.assertEqual(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"),
                         "abc\ud800def")
        self.assertTrue(codecs.lookup_error("surrogatepass"))
+        with self.assertRaises(UnicodeDecodeError):
+            b"abc\xed\xa0".decode("utf-8", "surrogatepass")
 class UTF7Test(ReadTest):
    encoding = "utf-7"

--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -821,9 +821,10 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
        /* Try decoding a single surrogate character. If
           there are more, let the codec call us again. */
        p += start;
-        if ((p[0] & 0xf0) == 0xe0 ||
+        if (strlen(p) > 2 &&
-            (p[1] & 0xc0) == 0x80 ||
+            ((p[0] & 0xf0) == 0xe0 ||
-            (p[2] & 0xc0) == 0x80) {
+             (p[1] & 0xc0) == 0x80 ||
+             (p[2] & 0xc0) == 0x80)) {
            /* it's a three-byte code */
            ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
            if (ch < 0xd800 || ch > 0xdfff)