Commit 5418ee0b authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #13333: The UTF-7 decoder now accepts lone surrogates

(the encoder already accepts them).
parent c2fe5776
...@@ -1091,10 +1091,18 @@ class UnicodeTest(string_tests.CommonTest, ...@@ -1091,10 +1091,18 @@ class UnicodeTest(string_tests.CommonTest,
for (x, y) in utfTests: for (x, y) in utfTests:
self.assertEqual(x.encode('utf-7'), y) self.assertEqual(x.encode('utf-7'), y)
# Unpaired surrogates not supported # Unpaired surrogates are passed through
self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7') self.assertEqual('\uD801'.encode('utf-7'), b'+2AE-')
self.assertEqual('\uD801x'.encode('utf-7'), b'+2AE-x')
self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd\ufffd') self.assertEqual('\uDC01'.encode('utf-7'), b'+3AE-')
self.assertEqual('\uDC01x'.encode('utf-7'), b'+3AE-x')
self.assertEqual(b'+2AE-'.decode('utf-7'), '\uD801')
self.assertEqual(b'+2AE-x'.decode('utf-7'), '\uD801x')
self.assertEqual(b'+3AE-'.decode('utf-7'), '\uDC01')
self.assertEqual(b'+3AE-x'.decode('utf-7'), '\uDC01x')
self.assertEqual('\uD801\U000abcde'.encode('utf-7'), b'+2AHab9ze-')
self.assertEqual(b'+2AHab9ze-'.decode('utf-7'), '\uD801\U000abcde')
# Issue #2242: crash on some Windows/MSVC versions # Issue #2242: crash on some Windows/MSVC versions
self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1') self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')
......
...@@ -10,6 +10,9 @@ What's New in Python 3.2.3? ...@@ -10,6 +10,9 @@ What's New in Python 3.2.3?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder
already accepts them).
- Issue #13342: input() used to ignore sys.stdin's and sys.stdout's unicode - Issue #13342: input() used to ignore sys.stdin's and sys.stdout's unicode
error handler in interactive mode (when calling into PyOS_Readline()). error handler in interactive mode (when calling into PyOS_Readline()).
......
...@@ -2282,21 +2282,17 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -2282,21 +2282,17 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
*p++ = outCh; *p++ = outCh;
#endif #endif
surrogate = 0; surrogate = 0;
continue;
} }
else { else {
*p++ = surrogate;
surrogate = 0; surrogate = 0;
errmsg = "second surrogate missing";
goto utf7Error;
} }
} }
else if (outCh >= 0xD800 && outCh <= 0xDBFF) { if (outCh >= 0xD800 && outCh <= 0xDBFF) {
/* first surrogate */ /* first surrogate */
surrogate = outCh; surrogate = outCh;
} }
else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
errmsg = "unexpected second surrogate";
goto utf7Error;
}
else { else {
*p++ = outCh; *p++ = outCh;
} }
...@@ -2306,8 +2302,8 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -2306,8 +2302,8 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
inShift = 0; inShift = 0;
s++; s++;
if (surrogate) { if (surrogate) {
errmsg = "second surrogate missing at end of shift sequence"; *p++ = surrogate;
goto utf7Error; surrogate = 0;
} }
if (base64bits > 0) { /* left-over bits */ if (base64bits > 0) { /* left-over bits */
if (base64bits >= 6) { if (base64bits >= 6) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment