Commit ea0e3b0d authored by Benjamin Peterson's avatar Benjamin Peterson

do a backport of r75928

The added test does not fail without the patch, but we still fix the issue of
surrogates being used in wide builds where they should not be.
parent a7d44001
......@@ -23,6 +23,13 @@ class PEP263Test(unittest.TestCase):
self.assertEqual(d['u'], u'\xf3')
def test_issue3297(self):
c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
d = {}
exec(c, d)
self.assertEqual(d['a'], d['b'])
self.assertEqual(len(d['a']), len(d['b']))
def test_main():
test_support.run_unittest(PEP263Test)
......
......@@ -3289,10 +3289,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
u = NULL;
} else {
/* check for integer overflow */
if (len > PY_SIZE_MAX / 4)
if (len > PY_SIZE_MAX / 6)
return NULL;
/* "\XX" may become "\u005c\uHHLL" (12 bytes) */
u = PyString_FromStringAndSize((char *)NULL, len * 4);
/* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
u = PyString_FromStringAndSize((char *)NULL, len * 6);
if (u == NULL)
return NULL;
p = buf = PyString_AsString(u);
......@@ -3309,19 +3310,21 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
PyObject *w;
char *r;
Py_ssize_t rn, i;
w = decode_utf8(c, &s, end, "utf-16-be");
w = decode_utf8(c, &s, end, "utf-32-be");
if (w == NULL) {
Py_DECREF(u);
return NULL;
}
r = PyString_AsString(w);
rn = PyString_Size(w);
assert(rn % 2 == 0);
for (i = 0; i < rn; i += 2) {
sprintf(p, "\\u%02x%02x",
assert(rn % 4 == 0);
for (i = 0; i < rn; i += 4) {
sprintf(p, "\\U%02x%02x%02x%02x",
r[i + 0] & 0xFF,
r[i + 1] & 0xFF);
p += 6;
r[i + 1] & 0xFF,
r[i + 2] & 0xFF,
r[i + 3] & 0xFF);
p += 10;
}
Py_DECREF(w);
} else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment