Commit 653dece2 authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #4426: The UTF-7 decoder was too strict and didn't accept some legal sequences.

Patch by Nick Barnes and Victor Stinner.
parent 2827709d
......@@ -740,10 +740,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
int encodeSetO, /* force the encoder to encode characters in
Set O, as described in RFC2152 */
int encodeWhiteSpace, /* force the encoder to encode space, tab,
carriage return and linefeed characters */
int base64SetO, /* Encode RFC2152 Set O characters in base64 */
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
const char *errors /* error handling */
);
......
......@@ -521,19 +521,28 @@ class UnicodeTest(
(u'+?', '+-?'),
(ur'\\?', '+AFwAXA?'),
(ur'\\\?', '+AFwAXABc?'),
(ur'++--', '+-+---')
(ur'++--', '+-+---'),
(u'\U000abcde', '+2m/c3g-'), # surrogate pairs
(u'/', '/'),
]
for (x, y) in utfTests:
self.assertEqual(x.encode('utf-7'), y)
# surrogates not supported
# Unpaired surrogates not supported
self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
# Issue #2242: crash on some Windows/MSVC versions
self.assertRaises(UnicodeDecodeError, '+\xc1'.decode, 'utf-7')
self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd\ufffd')
# Direct encoded characters
set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
# Optional direct characters
set_o = '!"#$%&*;<=>@[]^_`{|}'
for c in set_d:
self.assertEqual(c.encode('utf7'), c.encode('ascii'))
self.assertEqual(c.encode('ascii').decode('utf7'), c)
for c in set_o:
self.assertEqual(c.encode('ascii').decode('utf7'), c)
def test_codecs_utf8(self):
self.assertEqual(u''.encode('utf-8'), '')
......
......@@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1
Core and Builtins
-----------------
- Issue #4426: The UTF-7 decoder was too strict and didn't accept some legal
sequences. Patch by Nick Barnes and Victor Stinner.
- Issue #1588: Add complex.__format__. For example,
format(complex(1, 2./3), '.5') now produces a sensible result.
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment