Merged revisions 75931 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk ........ r75931 | benjamin.peterson | 2009-10-28 20:49:07 -0500 (Wed, 28 Oct 2009) | 5 lines do a backport of r75928 The added test does not fail without the patch, but we still fix the issue of surrogates being used in wide builds where they should not be. ........

Merged revisions 75931 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r75931 | benjamin.peterson | 2009-10-28 20:49:07 -0500 (Wed, 28 Oct 2009) | 5 lines do a backport of r75928 The added test does not fail without the patch, but we still fix the issue of surrogates being used in wide builds where they should not be. ........
c717aec7 · Benjamin Peterson · e7228d3e · c717aec7 · c717aec7
Commit c717aec7 authored Oct 29, 2009 by Benjamin Peterson
Hide whitespace changes
Inline Side-by-side

Showing with 49 additions and 39 deletions

Lib/test/test_pep263.py Lib/test/test_pep263.py +37 -30

Python/ast.c Python/ast.c +12 -9

No files found.
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
-#! -*- coding: koi8-r -*-
-
-import unittest
-from test import test_support
-
-class PEP263Test(unittest.TestCase):
-
-    def test_pep263(self):
-        self.assertEqual(
-            u"".encode("utf-8"),
-            '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
-        )
-        self.assertEqual(
-            u"\".encode("utf-8"),
-            '\\\xd0\x9f'
-        )
-
-    def test_compilestring(self):
-        # see #1882
-        c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
-        d = {}
-        exec c in d
-        self.assertEqual(d['u'], u'\xf3')
-
-
-def test_main():
-    test_support.run_unittest(PEP263Test)
-
-if __name__=="__main__":
-    test_main()
+# -*- coding: koi8-r -*-
+
+import unittest
+from test import test_support
+
+class PEP263Test(unittest.TestCase):
+
+    def test_pep263(self):
+        self.assertEqual(
+            u"".encode("utf-8"),
+            '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
+        )
+        self.assertEqual(
+            u"\".encode("utf-8"),
+            '\\\xd0\x9f'
+        )
+
+    def test_compilestring(self):
+        # see #1882
+        c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
+        d = {}
+        exec c in d
+        self.assertEqual(d['u'], u'\xf3')
+
+
+    def test_issue3297(self):
+        c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
+        d = {}
+        exec(c, d)
+        self.assertEqual(d['a'], d['b'])
+        self.assertEqual(len(d['a']), len(d['b']))
+
+def test_main():
+    test_support.run_unittest(PEP263Test)
+
+if __name__=="__main__":
+    test_main()
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3248,10 +3248,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
                u = NULL;
        } else {
                /* check for integer overflow */
-                if (len > PY_SIZE_MAX / 4)
+                if (len > PY_SIZE_MAX / 6)
                        return NULL;
-                /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
-                u = PyString_FromStringAndSize((char *)NULL, len * 4);
+		/* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+		   "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+                u = PyString_FromStringAndSize((char *)NULL, len * 6);
                if (u == NULL)
                        return NULL;
                p = buf = PyString_AsString(u);
@@ -3268,19 +3269,21 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
                                PyObject *w;
                                char *r;
                                Py_ssize_t rn, i;
-                                w = decode_utf8(c, &s, end, "utf-16-be");
+                                w = decode_utf8(c, &s, end, "utf-32-be");
                                if (w == NULL) {
                                        Py_DECREF(u);
                                        return NULL;
                                }
                                r = PyString_AsString(w);
                                rn = PyString_Size(w);
-                                assert(rn % 2 == 0);
-                                for (i = 0; i < rn; i += 2) {
-                                        sprintf(p, "\\u%02x%02x",
+                                assert(rn % 4 == 0);
+                                for (i = 0; i < rn; i += 4) {
+                                        sprintf(p, "\\U%02x%02x%02x%02x",
                                                r[i + 0] & 0xFF,
-                                                r[i + 1] & 0xFF);
-                                        p += 6;
+                                                r[i + 1] & 0xFF,
+						r[i + 2] & 0xFF,
+						r[i + 3] & 0xFF);
+                                        p += 10;
                                }
                                Py_DECREF(w);
                        } else {