Commit f1056723 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #19279: UTF-7 decoder no more produces illegal unicode strings.

parent 6ea3c9b2
......@@ -611,6 +611,35 @@ class UTF7Test(ReadTest):
]
)
def test_errors(self):
tests = [
('a\xffb', u'a\ufffdb'),
('a+IK', u'a\ufffd'),
('a+IK-b', u'a\ufffdb'),
('a+IK,b', u'a\ufffdb'),
('a+IKx', u'a\u20ac\ufffd'),
('a+IKx-b', u'a\u20ac\ufffdb'),
('a+IKwgr', u'a\u20ac\ufffd'),
('a+IKwgr-b', u'a\u20ac\ufffdb'),
('a+IKwgr,', u'a\u20ac\ufffd'),
('a+IKwgr,-b', u'a\u20ac\ufffd-b'),
('a+IKwgrB', u'a\u20ac\u20ac\ufffd'),
('a+IKwgrB-b', u'a\u20ac\u20ac\ufffdb'),
('a+/,+IKw-b', u'a\ufffd\u20acb'),
('a+//,+IKw-b', u'a\ufffd\u20acb'),
('a+///,+IKw-b', u'a\uffff\ufffd\u20acb'),
('a+////,+IKw-b', u'a\uffff\ufffd\u20acb'),
]
for raw, expected in tests:
self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
raw, 'strict', True)
self.assertEqual(raw.decode('utf-7', 'replace'), expected)
def test_nonbmp(self):
self.assertEqual(u'\U000104A0'.encode(self.encoding), '+2AHcoA-')
self.assertEqual(u'\ud801\udca0'.encode(self.encoding), '+2AHcoA-')
self.assertEqual('+2AHcoA-'.decode(self.encoding), u'\U000104A0')
class UTF16ExTest(unittest.TestCase):
def test_errors(self):
......
......@@ -9,6 +9,8 @@ What's New in Python 2.7.6?
Core and Builtins
-----------------
- Issue #19279: UTF-7 decoder no more produces illegal unicode strings.
- Issue #18739: Fix an inconsistency between math.log(n) and math.log(long(n));
the results could be off from one another by a ulp or two.
......
......@@ -1671,6 +1671,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
(base64buffer >> (base64bits-16));
base64bits -= 16;
base64buffer &= (1 << base64bits) - 1; /* clear high bits */
assert(outCh <= 0xffff);
if (surrogate) {
/* expecting a second surrogate */
if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
......@@ -1737,6 +1738,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
inShift = 1;
shiftOutStart = p;
base64bits = 0;
base64buffer = 0;
}
}
else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment