Commit 02adc570 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #28541: Improve test coverage for encoding detection in json library.

Original patch by Eric Appelt.
parent 06d79d02
...@@ -257,7 +257,8 @@ def detect_encoding(b): ...@@ -257,7 +257,8 @@ def detect_encoding(b):
return 'utf-16-be' if b[1] else 'utf-32-be' return 'utf-16-be' if b[1] else 'utf-32-be'
if not b[1]: if not b[1]:
# XX 00 00 00 - utf-32-le # XX 00 00 00 - utf-32-le
# XX 00 XX XX - utf-16-le # XX 00 00 XX - utf-16-le
# XX 00 XX -- - utf-16-le
return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
elif len(b) == 2: elif len(b) == 2:
if not b[0]: if not b[0]:
......
...@@ -65,6 +65,19 @@ class TestUnicode: ...@@ -65,6 +65,19 @@ class TestUnicode:
self.assertEqual(self.loads(bom + encoded), data) self.assertEqual(self.loads(bom + encoded), data)
self.assertEqual(self.loads(encoded), data) self.assertEqual(self.loads(encoded), data)
self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]') self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')
# RFC-7159 and ECMA-404 extend JSON to allow documents that
# consist of only a string, which can present a special case
# not covered by the encoding detection patterns specified in
# RFC-4627 for utf-16-le (XX 00 XX 00).
self.assertEqual(self.loads('"\u2600"'.encode('utf-16-le')),
'\u2600')
# Encoding detection for small (<4) bytes objects
# is implemented as a special case. RFC-7159 and ECMA-404
# allow single codepoint JSON documents which are only two
# bytes in utf-16 encodings w/o BOM.
self.assertEqual(self.loads(b'5\x00'), 5)
self.assertEqual(self.loads(b'\x007'), 7)
self.assertEqual(self.loads(b'57'), 57)
def test_object_pairs_hook_with_unicode(self): def test_object_pairs_hook_with_unicode(self):
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment