Commit 67e83886 authored by Walter Dörwald's avatar Walter Dörwald

test_unicode.py passes again 9except for problems

with the idna codec.
parent acaa5a16
......@@ -642,17 +642,6 @@ class CommonTest(BaseTest):
self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', 'xyz')
self.checkequal('hello', 'hello', 'strip', 'xyz')
# strip/lstrip/rstrip with unicode arg
if test_support.have_unicode:
self.checkequal(str('hello', 'ascii'), 'xyzzyhelloxyzzy',
'strip', str('xyz', 'ascii'))
self.checkequal(str('helloxyzzy', 'ascii'), 'xyzzyhelloxyzzy',
'lstrip', str('xyz', 'ascii'))
self.checkequal(str('xyzzyhello', 'ascii'), 'xyzzyhelloxyzzy',
'rstrip', str('xyz', 'ascii'))
self.checkequal(str('hello', 'ascii'), 'hello',
'strip', str('xyz', 'ascii'))
self.checkraises(TypeError, 'hello', 'strip', 42, 42)
self.checkraises(TypeError, 'hello', 'lstrip', 42, 42)
self.checkraises(TypeError, 'hello', 'rstrip', 42, 42)
......@@ -956,12 +945,8 @@ class MixinStrUnicodeUserStringTest:
self.checkequal('w x y z', ' ', 'join', Sequence())
self.checkequal('abc', 'a', 'join', ('abc',))
self.checkequal('z', 'a', 'join', UserList(['z']))
if test_support.have_unicode:
self.checkequal(str('a.b.c'), str('.'), 'join', ['a', 'b', 'c'])
self.checkequal(str('a.b.c'), '.', 'join', [str('a'), 'b', 'c'])
self.checkequal(str('a.b.c'), '.', 'join', ['a', str('b'), 'c'])
self.checkequal(str('a.b.c'), '.', 'join', ['a', 'b', str('c')])
self.checkraises(TypeError, '.', 'join', ['a', str('b'), 3])
self.checkequal('a.b.c', '.', 'join', ['a', 'b', 'c'])
self.checkraises(TypeError, '.', 'join', ['a', 'b', 3])
for i in [5, 25, 125]:
self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
['a' * i] * i)
......
......@@ -62,21 +62,21 @@ class UnicodeTest(
def test_repr(self):
if not sys.platform.startswith('java'):
# Test basic sanity of repr()
self.assertEqual(repr('abc'), "u'abc'")
self.assertEqual(repr('ab\\c'), "u'ab\\\\c'")
self.assertEqual(repr('ab\\'), "u'ab\\\\'")
self.assertEqual(repr('\\c'), "u'\\\\c'")
self.assertEqual(repr('\\'), "u'\\\\'")
self.assertEqual(repr('\n'), "u'\\n'")
self.assertEqual(repr('\r'), "u'\\r'")
self.assertEqual(repr('\t'), "u'\\t'")
self.assertEqual(repr('\b'), "u'\\x08'")
self.assertEqual(repr("'\""), """u'\\'"'""")
self.assertEqual(repr("'\""), """u'\\'"'""")
self.assertEqual(repr("'"), '''u"'"''')
self.assertEqual(repr('"'), """u'"'""")
self.assertEqual(repr('abc'), "'abc'")
self.assertEqual(repr('ab\\c'), "'ab\\\\c'")
self.assertEqual(repr('ab\\'), "'ab\\\\'")
self.assertEqual(repr('\\c'), "'\\\\c'")
self.assertEqual(repr('\\'), "'\\\\'")
self.assertEqual(repr('\n'), "'\\n'")
self.assertEqual(repr('\r'), "'\\r'")
self.assertEqual(repr('\t'), "'\\t'")
self.assertEqual(repr('\b'), "'\\x08'")
self.assertEqual(repr("'\""), """'\\'"'""")
self.assertEqual(repr("'\""), """'\\'"'""")
self.assertEqual(repr("'"), '''"'"''')
self.assertEqual(repr('"'), """'"'""")
latin1repr = (
"u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
"'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
"\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
"\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
"JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
......@@ -165,7 +165,7 @@ class UnicodeTest(
self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'})
self.assertRaises(TypeError, 'hello'.translate)
self.assertRaises(TypeError, 'abababc'.translate, {ord('a'):''})
self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
def test_split(self):
string_tests.CommonTest.test_split(self)
......@@ -187,10 +187,6 @@ class UnicodeTest(
self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
def test_strip(self):
string_tests.CommonTest.test_strip(self)
self.assertRaises(UnicodeError, "hello".strip, "\xff")
def test_replace(self):
string_tests.CommonTest.test_replace(self)
......@@ -323,58 +319,27 @@ class UnicodeTest(
self.assert_('a' in 'bdab')
self.assert_('a' in 'bdaba')
self.assert_('a' in 'bdba')
self.assert_('a' in 'bdba')
self.assert_('a' in 'bdba')
self.assert_('a' not in 'bdb')
self.assert_('a' not in 'bdb')
self.assert_('a' in 'bdba')
self.assert_('a' in ('a',1,None))
self.assert_('a' in (1,None,'a'))
self.assert_('a' in (1,None,'a'))
self.assert_('a' in ('a',1,None))
self.assert_('a' in (1,None,'a'))
self.assert_('a' in (1,None,'a'))
self.assert_('a' not in ('x',1,'y'))
self.assert_('a' not in ('x',1,None))
self.assert_('abcd' not in 'abcxxxx')
self.assert_('ab' in 'abcd')
self.assert_('ab' in 'abc')
self.assert_('ab' in 'abc')
self.assert_('ab' in (1,None,'ab'))
self.assert_('' in 'abc')
self.assert_('' in 'abc')
# If the following fails either
# the contains operator does not propagate UnicodeErrors or
# someone has changed the default encoding
self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, '\xe2')
self.assert_('' in '')
self.assert_('' in '')
self.assert_('' in '')
self.assert_('' in 'abc')
self.assert_('' in 'abc')
self.assert_('' in 'abc')
self.assert_('\0' not in 'abc')
self.assert_('\0' not in 'abc')
self.assert_('\0' not in 'abc')
self.assert_('\0' in '\0abc')
self.assert_('\0' in '\0abc')
self.assert_('\0' in '\0abc')
self.assert_('\0' in 'abc\0')
self.assert_('\0' in 'abc\0')
self.assert_('\0' in 'abc\0')
self.assert_('a' in '\0abc')
self.assert_('a' in '\0abc')
self.assert_('a' in '\0abc')
self.assert_('asdf' in 'asdf')
self.assert_('asdf' in 'asdf')
self.assert_('asdf' in 'asdf')
self.assert_('asdf' not in 'asd')
self.assert_('asdf' not in 'asd')
self.assert_('asdf' not in 'asd')
self.assert_('asdf' not in '')
self.assert_('asdf' not in '')
self.assert_('asdf' not in '')
self.assertRaises(TypeError, "abc".__contains__)
......@@ -389,7 +354,7 @@ class UnicodeTest(
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000, 3.57')
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57')
if not sys.platform.startswith('java'):
self.assertEqual("%r, %r" % ("abc", "abc"), "u'abc', 'abc'")
self.assertEqual("%r, %r" % (b"abc", "abc"), "b'abc', 'abc'")
self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def')
self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
......@@ -495,7 +460,7 @@ class UnicodeTest(
)
self.assertEqual(
str('strings are decoded to unicode', 'utf-8', 'strict'),
str(b'strings are decoded to unicode', 'utf-8', 'strict'),
'strings are decoded to unicode'
)
......@@ -513,38 +478,38 @@ class UnicodeTest(
def test_codecs_utf7(self):
utfTests = [
('A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
('Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
('\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
('Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
('+', '+-'),
('+-', '+--'),
('+?', '+-?'),
('\?', '+AFw?'),
('+?', '+-?'),
(r'\\?', '+AFwAXA?'),
(r'\\\?', '+AFwAXABc?'),
(r'++--', '+-+---')
('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example
('Hi Mom -\u263a-!', b'Hi Mom -+Jjo--!'), # RFC2152 example
('\u65E5\u672C\u8A9E', b'+ZeVnLIqe-'), # RFC2152 example
('Item 3 is \u00a31.', b'Item 3 is +AKM-1.'), # RFC2152 example
('+', b'+-'),
('+-', b'+--'),
('+?', b'+-?'),
('\?', b'+AFw?'),
('+?', b'+-?'),
(r'\\?', b'+AFwAXA?'),
(r'\\\?', b'+AFwAXABc?'),
(r'++--', b'+-+---')
]
for (x, y) in utfTests:
self.assertEqual(x.encode('utf-7'), y)
# surrogates not supported
self.assertRaises(UnicodeError, str, '+3ADYAA-', 'utf-7')
self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7')
self.assertEqual(str('+3ADYAA-', 'utf-7', 'replace'), '\ufffd')
self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd')
def test_codecs_utf8(self):
self.assertEqual(''.encode('utf-8'), '')
self.assertEqual('\u20ac'.encode('utf-8'), '\xe2\x82\xac')
self.assertEqual('\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
self.assertEqual('\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
self.assertEqual('\ud800'.encode('utf-8'), '\xed\xa0\x80')
self.assertEqual('\udc00'.encode('utf-8'), '\xed\xb0\x80')
self.assertEqual(''.encode('utf-8'), b'')
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
self.assertEqual('\ud800'.encode('utf-8'), b'\xed\xa0\x80')
self.assertEqual('\udc00'.encode('utf-8'), b'\xed\xb0\x80')
self.assertEqual(
('\ud800\udc02'*1000).encode('utf-8'),
'\xf0\x90\x80\x82'*1000
b'\xf0\x90\x80\x82'*1000
)
self.assertEqual(
'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
......@@ -553,22 +518,22 @@ class UnicodeTest(
'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
' Nunstuck git und'.encode('utf-8'),
'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
'\xe3\x80\x8cWenn ist das Nunstuck git und'
b'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
b'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
b'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
b'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
b'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
b'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
b'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
b'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
b'\xe3\x80\x8cWenn ist das Nunstuck git und'
)
# UTF-8 specific decoding tests
self.assertEqual(str('\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' )
self.assertEqual(str('\xf0\x90\x80\x82', 'utf-8'), '\U00010002' )
self.assertEqual(str('\xe2\x82\xac', 'utf-8'), '\u20ac' )
self.assertEqual(str(b'\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' )
self.assertEqual(str(b'\xf0\x90\x80\x82', 'utf-8'), '\U00010002' )
self.assertEqual(str(b'\xe2\x82\xac', 'utf-8'), '\u20ac' )
# Other possible utf-8 test cases:
# * strict decoding testing for all of the
......@@ -582,14 +547,14 @@ class UnicodeTest(
# Error handling (encoding)
self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii')
self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii','strict')
self.assertEqual('Andr\202 x'.encode('ascii','ignore'), "Andr x")
self.assertEqual('Andr\202 x'.encode('ascii','replace'), "Andr? x")
self.assertEqual('Andr\202 x'.encode('ascii','ignore'), b"Andr x")
self.assertEqual('Andr\202 x'.encode('ascii','replace'), b"Andr? x")
# Error handling (decoding)
self.assertRaises(UnicodeError, str, 'Andr\202 x', 'ascii')
self.assertRaises(UnicodeError, str, 'Andr\202 x', 'ascii','strict')
self.assertEqual(str('Andr\202 x','ascii','ignore'), "Andr x")
self.assertEqual(str('Andr\202 x','ascii','replace'), 'Andr\uFFFD x')
self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii')
self.assertRaises(UnicodeError, str, b'Andr\202 x', 'ascii', 'strict')
self.assertEqual(str(b'Andr\202 x', 'ascii', 'ignore'), "Andr x")
self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')
# Error handling (unknown character names)
self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
......@@ -618,13 +583,13 @@ class UnicodeTest(
def test_codecs(self):
# Encoding
self.assertEqual('hello'.encode('ascii'), 'hello')
self.assertEqual('hello'.encode('utf-7'), 'hello')
self.assertEqual('hello'.encode('utf-8'), 'hello')
self.assertEqual('hello'.encode('utf8'), 'hello')
self.assertEqual('hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
self.assertEqual('hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
self.assertEqual('hello'.encode('latin-1'), 'hello')
self.assertEqual('hello'.encode('ascii'), b'hello')
self.assertEqual('hello'.encode('utf-7'), b'hello')
self.assertEqual('hello'.encode('utf-8'), b'hello')
self.assertEqual('hello'.encode('utf8'), b'hello')
self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
self.assertEqual('hello'.encode('latin-1'), b'hello')
# Roundtrip safety for BMP (just the first 1024 chars)
for c in xrange(1024):
......@@ -663,7 +628,7 @@ class UnicodeTest(
def test_codecs_charmap(self):
# 0-127
s = ''.join(map(chr, xrange(128)))
s = bytes(xrange(128))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
......@@ -691,7 +656,7 @@ class UnicodeTest(
self.assertEqual(str(s, encoding).encode(encoding), s)
# 128-255
s = ''.join(map(chr, xrange(128, 256)))
s = bytes(xrange(128, 256))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
......@@ -805,7 +770,6 @@ class UnicodeTest(
self.assertEqual(str(Foo6("bar")), "foou")
self.assertEqual(str(Foo7("bar")), "foou")
self.assertEqual(str(Foo8("foo")), "foofoo")
self.assertEqual(str(Foo9("foo")), "string")
self.assertEqual(str(Foo9("foo")), "not unicode")
def test_unicode_repr(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment