Commit 35804e4c authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #19279: UTF-7 decoder no more produces illegal strings.

parent 507c591e
...@@ -820,6 +820,36 @@ class UTF7Test(ReadTest, unittest.TestCase): ...@@ -820,6 +820,36 @@ class UTF7Test(ReadTest, unittest.TestCase):
] ]
) )
def test_errors(self):
tests = [
(b'a\xffb', 'a\ufffdb'),
(b'a+IK', 'a\ufffd'),
(b'a+IK-b', 'a\ufffdb'),
(b'a+IK,b', 'a\ufffdb'),
(b'a+IKx', 'a\u20ac\ufffd'),
(b'a+IKx-b', 'a\u20ac\ufffdb'),
(b'a+IKwgr', 'a\u20ac\ufffd'),
(b'a+IKwgr-b', 'a\u20ac\ufffdb'),
(b'a+IKwgr,', 'a\u20ac\ufffd'),
(b'a+IKwgr,-b', 'a\u20ac\ufffd-b'),
(b'a+IKwgrB', 'a\u20ac\u20ac\ufffd'),
(b'a+IKwgrB-b', 'a\u20ac\u20ac\ufffdb'),
(b'a+/,+IKw-b', 'a\ufffd\u20acb'),
(b'a+//,+IKw-b', 'a\ufffd\u20acb'),
(b'a+///,+IKw-b', 'a\uffff\ufffd\u20acb'),
(b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
]
for raw, expected in tests:
with self.subTest(raw=raw):
self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
raw, 'strict', True)
self.assertEqual(raw.decode('utf-7', 'replace'), expected)
def test_nonbmp(self):
self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-')
self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-')
self.assertEqual(b'+2AHcoA-'.decode(self.encoding), '\U000104A0')
class UTF16ExTest(unittest.TestCase): class UTF16ExTest(unittest.TestCase):
def test_errors(self): def test_errors(self):
......
...@@ -12,6 +12,8 @@ What's New in Python 3.3.3 release candidate 1? ...@@ -12,6 +12,8 @@ What's New in Python 3.3.3 release candidate 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #19279: UTF-7 decoder no more produces illegal strings.
- Fix macro expansion of _PyErr_OCCURRED(), and make sure to use it in at - Fix macro expansion of _PyErr_OCCURRED(), and make sure to use it in at
least one place so as to avoid regressions. least one place so as to avoid regressions.
......
...@@ -4359,6 +4359,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -4359,6 +4359,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16)); Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16));
base64bits -= 16; base64bits -= 16;
base64buffer &= (1 << base64bits) - 1; /* clear high bits */ base64buffer &= (1 << base64bits) - 1; /* clear high bits */
assert(outCh <= 0xffff);
if (surrogate) { if (surrogate) {
/* expecting a second surrogate */ /* expecting a second surrogate */
if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) { if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) {
...@@ -4426,6 +4427,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -4426,6 +4427,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
inShift = 1; inShift = 1;
shiftOutStart = outpos; shiftOutStart = outpos;
base64bits = 0; base64bits = 0;
base64buffer = 0;
} }
} }
else if (DECODE_DIRECT(ch)) { /* character decodes as itself */ else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment