Commit 30402549 authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #13333: The UTF-7 decoder now accepts lone surrogates

(the encoder already accepts them).
parent 16ed8683
......@@ -771,10 +771,18 @@ class UnicodeTest(
for (x, y) in utfTests:
self.assertEqual(x.encode('utf-7'), y)
# Unpaired surrogates not supported
self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd\ufffd')
# Unpaired surrogates are passed through
self.assertEqual(u'\uD801'.encode('utf-7'), '+2AE-')
self.assertEqual(u'\uD801x'.encode('utf-7'), '+2AE-x')
self.assertEqual(u'\uDC01'.encode('utf-7'), '+3AE-')
self.assertEqual(u'\uDC01x'.encode('utf-7'), '+3AE-x')
self.assertEqual('+2AE-'.decode('utf-7'), u'\uD801')
self.assertEqual('+2AE-x'.decode('utf-7'), u'\uD801x')
self.assertEqual('+3AE-'.decode('utf-7'), u'\uDC01')
self.assertEqual('+3AE-x'.decode('utf-7'), u'\uDC01x')
self.assertEqual(u'\uD801\U000abcde'.encode('utf-7'), '+2AHab9ze-')
self.assertEqual('+2AHab9ze-'.decode('utf-7'), u'\uD801\U000abcde')
# Direct encoded characters
set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
......@@ -9,6 +9,9 @@ What's New in Python 2.7.3?
Core and Builtins
- Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder
already accepts them).
- Remove Py3k warning for callable.
- Issue #10519: Avoid unnecessary recursive function calls in
......@@ -1628,21 +1628,17 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
*p++ = outCh;
surrogate = 0;
else {
*p++ = surrogate;
surrogate = 0;
errmsg = "second surrogate missing";
goto utf7Error;
else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
if (outCh >= 0xD800 && outCh <= 0xDBFF) {
/* first surrogate */
surrogate = outCh;
else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
errmsg = "unexpected second surrogate";
goto utf7Error;
else {
*p++ = outCh;
......@@ -1652,8 +1648,8 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
inShift = 0;
if (surrogate) {
errmsg = "second surrogate missing at end of shift sequence";
goto utf7Error;
*p++ = surrogate;
surrogate = 0;
if (base64bits > 0) { /* left-over bits */
if (base64bits >= 6) {
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment