Commit 78edf757 authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #13333: The UTF-7 decoder now accepts lone surrogates

(the encoder already accepts them).
parents 9a812cbc 5418ee0b
...@@ -1108,10 +1108,18 @@ class UnicodeTest(string_tests.CommonTest, ...@@ -1108,10 +1108,18 @@ class UnicodeTest(string_tests.CommonTest,
for (x, y) in utfTests: for (x, y) in utfTests:
self.assertEqual(x.encode('utf-7'), y) self.assertEqual(x.encode('utf-7'), y)
# Unpaired surrogates not supported # Unpaired surrogates are passed through
self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7') self.assertEqual('\uD801'.encode('utf-7'), b'+2AE-')
self.assertEqual('\uD801x'.encode('utf-7'), b'+2AE-x')
self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd\ufffd') self.assertEqual('\uDC01'.encode('utf-7'), b'+3AE-')
self.assertEqual('\uDC01x'.encode('utf-7'), b'+3AE-x')
self.assertEqual(b'+2AE-'.decode('utf-7'), '\uD801')
self.assertEqual(b'+2AE-x'.decode('utf-7'), '\uD801x')
self.assertEqual(b'+3AE-'.decode('utf-7'), '\uDC01')
self.assertEqual(b'+3AE-x'.decode('utf-7'), '\uDC01x')
self.assertEqual('\uD801\U000abcde'.encode('utf-7'), b'+2AHab9ze-')
self.assertEqual(b'+2AHab9ze-'.decode('utf-7'), '\uD801\U000abcde')
# Issue #2242: crash on some Windows/MSVC versions # Issue #2242: crash on some Windows/MSVC versions
self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1') self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')
......
...@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1? ...@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder
already accepts them).
- Issue #13389: Full garbage collection passes now clear the freelists for - Issue #13389: Full garbage collection passes now clear the freelists for
list and dict objects. They already cleared other freelists in the list and dict objects. They already cleared other freelists in the
interpreter. interpreter.
......
...@@ -3884,21 +3884,18 @@ PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -3884,21 +3884,18 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
if (unicode_putchar(&unicode, &outpos, ch2) < 0) if (unicode_putchar(&unicode, &outpos, ch2) < 0)
goto onError; goto onError;
surrogate = 0; surrogate = 0;
continue;
} }
else { else {
if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
goto onError;
surrogate = 0; surrogate = 0;
errmsg = "second surrogate missing";
goto utf7Error;
} }
} }
else if (outCh >= 0xD800 && outCh <= 0xDBFF) { if (outCh >= 0xD800 && outCh <= 0xDBFF) {
/* first surrogate */ /* first surrogate */
surrogate = outCh; surrogate = outCh;
} }
else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
errmsg = "unexpected second surrogate";
goto utf7Error;
}
else { else {
if (unicode_putchar(&unicode, &outpos, outCh) < 0) if (unicode_putchar(&unicode, &outpos, outCh) < 0)
goto onError; goto onError;
...@@ -3909,8 +3906,9 @@ PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -3909,8 +3906,9 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
inShift = 0; inShift = 0;
s++; s++;
if (surrogate) { if (surrogate) {
errmsg = "second surrogate missing at end of shift sequence"; if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
goto utf7Error; goto onError;
surrogate = 0;
} }
if (base64bits > 0) { /* left-over bits */ if (base64bits > 0) { /* left-over bits */
if (base64bits >= 6) { if (base64bits >= 6) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment