Commit e349bf23 authored by Zackery Spytz's avatar Zackery Spytz Committed by Serhiy Storchaka

bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences...

bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting with "+". (GH-8741)

The UTF-7 decoder now raises UnicodeDecodeError for ill-formed
sequences starting with "+" (as specified in RFC 2152).
parent d3d3171d
......@@ -1020,6 +1020,7 @@ class UTF7Test(ReadTest, unittest.TestCase):
(b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
(b'a+IKw-b\xff', 'a\u20acb\ufffd'),
(b'a+IKw\xffb', 'a\u20ac\ufffdb'),
(b'a+@b', 'a\ufffdb'),
]
for raw, expected in tests:
with self.subTest(raw=raw):
......
......@@ -1630,6 +1630,10 @@ class UnicodeTest(string_tests.CommonTest,
for c in set_o:
self.assertEqual(c.encode('ascii').decode('utf7'), c)
with self.assertRaisesRegex(UnicodeDecodeError,
'ill-formed sequence'):
b'+@'.decode('utf-7')
def test_codecs_utf8(self):
self.assertEqual(''.encode('utf-8'), b'')
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
......
The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed
sequences starting with "+" (as specified in RFC 2152). Patch by Zackery
Spytz.
......@@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
goto onError;
}
else if (s < e && !IS_BASE64(*s)) {
s++;
errmsg = "ill-formed sequence";
goto utf7Error;
}
else { /* begin base64-encoded section */
inShift = 1;
surrogate = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment