Commit 6a9d1db4 authored by Stefan Behnel's avatar Stefan Behnel

Update test_unicode.py from Py3.8.

parent 97b9193a
...@@ -1666,6 +1666,11 @@ class UnicodeTest(CommonTest, ...@@ -1666,6 +1666,11 @@ class UnicodeTest(CommonTest,
for c in set_o: for c in set_o:
self.assertEqual(c.encode('ascii').decode('utf7'), c) self.assertEqual(c.encode('ascii').decode('utf7'), c)
if sys.version_info >= (3, 8):
with self.assertRaisesRegex(UnicodeDecodeError,
'ill-formed sequence'):
b'+@'.decode('utf-7')
def test_codecs_utf8(self): def test_codecs_utf8(self):
self.assertEqual(''.encode('utf-8'), b'') self.assertEqual(''.encode('utf-8'), b'')
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac') self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
...@@ -1771,7 +1776,7 @@ class UnicodeTest(CommonTest, ...@@ -1771,7 +1776,7 @@ class UnicodeTest(CommonTest,
# Issue #8271: during the decoding of an invalid UTF-8 byte sequence, # Issue #8271: during the decoding of an invalid UTF-8 byte sequence,
# only the start byte and the continuation byte(s) are now considered # only the start byte and the continuation byte(s) are now considered
# invalid, instead of the number of bytes specified by the start byte. # invalid, instead of the number of bytes specified by the start byte.
# See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95, # See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95,
# table 3-8, Row 2) for more information about the algorithm used. # table 3-8, Row 2) for more information about the algorithm used.
FFFD = '\ufffd' FFFD = '\ufffd'
sequences = [ sequences = [
...@@ -2136,12 +2141,8 @@ class UnicodeTest(CommonTest, ...@@ -2136,12 +2141,8 @@ class UnicodeTest(CommonTest,
u = chr(c) u = chr(c)
for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
'utf-16-be', 'raw_unicode_escape', 'utf-16-be', 'raw_unicode_escape',
'unicode_escape', 'unicode_internal'): 'unicode_escape'):
with warnings.catch_warnings(): self.assertEqual(str(u.encode(encoding),encoding), u)
# unicode-internal has been deprecated
warnings.simplefilter("ignore", DeprecationWarning)
self.assertEqual(str(u.encode(encoding),encoding), u)
# Roundtrip safety for BMP (just the first 256 chars) # Roundtrip safety for BMP (just the first 256 chars)
for c in range(256): for c in range(256):
...@@ -2157,13 +2158,9 @@ class UnicodeTest(CommonTest, ...@@ -2157,13 +2158,9 @@ class UnicodeTest(CommonTest,
# Roundtrip safety for non-BMP (just a few chars) # Roundtrip safety for non-BMP (just a few chars)
with warnings.catch_warnings(): with warnings.catch_warnings():
# unicode-internal has been deprecated
warnings.simplefilter("ignore", DeprecationWarning)
u = '\U00010001\U00020002\U00030003\U00040004\U00050005' u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
'raw_unicode_escape', 'raw_unicode_escape', 'unicode_escape'):
'unicode_escape', 'unicode_internal'):
self.assertEqual(str(u.encode(encoding),encoding), u) self.assertEqual(str(u.encode(encoding),encoding), u)
# UTF-8 must be roundtrip safe for all code points # UTF-8 must be roundtrip safe for all code points
...@@ -2382,22 +2379,23 @@ class UnicodeTest(CommonTest, ...@@ -2382,22 +2379,23 @@ class UnicodeTest(CommonTest,
self.assertEqual(args[0], text) self.assertEqual(args[0], text)
self.assertEqual(len(args), 1) self.assertEqual(len(args), 1)
@unittest.skipIf(sys.version_info < (3, 8), 'resize test requires Py3.8+')
@support.cpython_only
def test_resize(self): def test_resize(self):
from _testcapi import getargs_u
for length in range(1, 100, 7): for length in range(1, 100, 7):
# generate a fresh string (refcount=1) # generate a fresh string (refcount=1)
text = 'a' * length + 'b' text = 'a' * length + 'b'
with support.check_warnings(('unicode_internal codec has been ' # fill wstr internal field
'deprecated', DeprecationWarning)): abc = getargs_u(text)
# fill wstr internal field self.assertEqual(abc, text)
abc = text.encode('unicode_internal')
self.assertEqual(abc.decode('unicode_internal'), text)
# resize text: wstr field must be cleared and then recomputed # resize text: wstr field must be cleared and then recomputed
text += 'c' text += 'c'
abcdef = text.encode('unicode_internal') abcdef = getargs_u(text)
self.assertNotEqual(abc, abcdef) self.assertNotEqual(abc, abcdef)
self.assertEqual(abcdef.decode('unicode_internal'), text) self.assertEqual(abcdef, text)
def test_compare(self): def test_compare(self):
# Issue #17615 # Issue #17615
...@@ -2714,6 +2712,12 @@ class CAPITest(unittest.TestCase): ...@@ -2714,6 +2712,12 @@ class CAPITest(unittest.TestCase):
check_format('%.%s', check_format('%.%s',
b'%.%s', b'abc') b'%.%s', b'abc')
# Issue #33817: empty strings
check_format('',
b'')
check_format('',
b'%s', b'')
# Test PyUnicode_AsWideChar() # Test PyUnicode_AsWideChar()
@support.cpython_only @support.cpython_only
def test_aswidechar(self): def test_aswidechar(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment