Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
67e83886
Commit
67e83886
authored
May 05, 2007
by
Walter Dörwald
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
test_unicode.py passes again 9except for problems
with the idna codec.
parent
acaa5a16
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
68 additions
and
119 deletions
+68
-119
Lib/test/string_tests.py
Lib/test/string_tests.py
+2
-17
Lib/test/test_unicode.py
Lib/test/test_unicode.py
+66
-102
No files found.
Lib/test/string_tests.py
View file @
67e83886
...
...
@@ -642,17 +642,6 @@ class CommonTest(BaseTest):
self
.
checkequal
(
'xyzzyhello'
,
'xyzzyhelloxyzzy'
,
'rstrip'
,
'xyz'
)
self
.
checkequal
(
'hello'
,
'hello'
,
'strip'
,
'xyz'
)
# strip/lstrip/rstrip with unicode arg
if
test_support
.
have_unicode
:
self
.
checkequal
(
str
(
'hello'
,
'ascii'
),
'xyzzyhelloxyzzy'
,
'strip'
,
str
(
'xyz'
,
'ascii'
))
self
.
checkequal
(
str
(
'helloxyzzy'
,
'ascii'
),
'xyzzyhelloxyzzy'
,
'lstrip'
,
str
(
'xyz'
,
'ascii'
))
self
.
checkequal
(
str
(
'xyzzyhello'
,
'ascii'
),
'xyzzyhelloxyzzy'
,
'rstrip'
,
str
(
'xyz'
,
'ascii'
))
self
.
checkequal
(
str
(
'hello'
,
'ascii'
),
'hello'
,
'strip'
,
str
(
'xyz'
,
'ascii'
))
self
.
checkraises
(
TypeError
,
'hello'
,
'strip'
,
42
,
42
)
self
.
checkraises
(
TypeError
,
'hello'
,
'lstrip'
,
42
,
42
)
self
.
checkraises
(
TypeError
,
'hello'
,
'rstrip'
,
42
,
42
)
...
...
@@ -956,12 +945,8 @@ class MixinStrUnicodeUserStringTest:
self
.
checkequal
(
'w x y z'
,
' '
,
'join'
,
Sequence
())
self
.
checkequal
(
'abc'
,
'a'
,
'join'
,
(
'abc'
,))
self
.
checkequal
(
'z'
,
'a'
,
'join'
,
UserList
([
'z'
]))
if
test_support
.
have_unicode
:
self
.
checkequal
(
str
(
'a.b.c'
),
str
(
'.'
),
'join'
,
[
'a'
,
'b'
,
'c'
])
self
.
checkequal
(
str
(
'a.b.c'
),
'.'
,
'join'
,
[
str
(
'a'
),
'b'
,
'c'
])
self
.
checkequal
(
str
(
'a.b.c'
),
'.'
,
'join'
,
[
'a'
,
str
(
'b'
),
'c'
])
self
.
checkequal
(
str
(
'a.b.c'
),
'.'
,
'join'
,
[
'a'
,
'b'
,
str
(
'c'
)])
self
.
checkraises
(
TypeError
,
'.'
,
'join'
,
[
'a'
,
str
(
'b'
),
3
])
self
.
checkequal
(
'a.b.c'
,
'.'
,
'join'
,
[
'a'
,
'b'
,
'c'
])
self
.
checkraises
(
TypeError
,
'.'
,
'join'
,
[
'a'
,
'b'
,
3
])
for
i
in
[
5
,
25
,
125
]:
self
.
checkequal
((((
'a'
*
i
)
+
'-'
)
*
i
)[:
-
1
],
'-'
,
'join'
,
[
'a'
*
i
]
*
i
)
...
...
Lib/test/test_unicode.py
View file @
67e83886
...
...
@@ -62,21 +62,21 @@ class UnicodeTest(
def
test_repr
(
self
):
if
not
sys
.
platform
.
startswith
(
'java'
):
# Test basic sanity of repr()
self
.
assertEqual
(
repr
(
'abc'
),
"
u
'abc'"
)
self
.
assertEqual
(
repr
(
'ab
\
\
c'
),
"
u
'ab
\
\
\
\
c'"
)
self
.
assertEqual
(
repr
(
'ab
\
\
'
),
"
u
'ab
\
\
\
\
'"
)
self
.
assertEqual
(
repr
(
'
\
\
c'
),
"
u
'
\
\
\
\
c'"
)
self
.
assertEqual
(
repr
(
'
\
\
'
),
"
u
'
\
\
\
\
'"
)
self
.
assertEqual
(
repr
(
'
\
n
'
),
"
u
'
\
\
n'"
)
self
.
assertEqual
(
repr
(
'
\
r
'
),
"
u
'
\
\
r'"
)
self
.
assertEqual
(
repr
(
'
\
t
'
),
"
u
'
\
\
t'"
)
self
.
assertEqual
(
repr
(
'
\
b
'
),
"
u
'
\
\
x08'"
)
self
.
assertEqual
(
repr
(
"'
\
"
"
),
"""
u
'
\
\
'"'"""
)
self
.
assertEqual
(
repr
(
"'
\
"
"
),
"""
u
'
\
\
'"'"""
)
self
.
assertEqual
(
repr
(
"'"
),
'''
u
"'"'''
)
self
.
assertEqual
(
repr
(
'"'
),
"""
u
'"'"""
)
self
.
assertEqual
(
repr
(
'abc'
),
"'abc'"
)
self
.
assertEqual
(
repr
(
'ab
\
\
c'
),
"'ab
\
\
\
\
c'"
)
self
.
assertEqual
(
repr
(
'ab
\
\
'
),
"'ab
\
\
\
\
'"
)
self
.
assertEqual
(
repr
(
'
\
\
c'
),
"'
\
\
\
\
c'"
)
self
.
assertEqual
(
repr
(
'
\
\
'
),
"'
\
\
\
\
'"
)
self
.
assertEqual
(
repr
(
'
\
n
'
),
"'
\
\
n'"
)
self
.
assertEqual
(
repr
(
'
\
r
'
),
"'
\
\
r'"
)
self
.
assertEqual
(
repr
(
'
\
t
'
),
"'
\
\
t'"
)
self
.
assertEqual
(
repr
(
'
\
b
'
),
"'
\
\
x08'"
)
self
.
assertEqual
(
repr
(
"'
\
"
"
),
"""'
\
\
'"'"""
)
self
.
assertEqual
(
repr
(
"'
\
"
"
),
"""'
\
\
'"'"""
)
self
.
assertEqual
(
repr
(
"'"
),
'''"'"'''
)
self
.
assertEqual
(
repr
(
'"'
),
"""'"'"""
)
latin1repr
=
(
"
u
'
\
\
x00
\
\
x01
\
\
x02
\
\
x03
\
\
x04
\
\
x05
\
\
x06
\
\
x07
\
\
x08
\
\
t
\
\
n
\
\
x0b
\
\
x0c
\
\
r"
"'
\
\
x00
\
\
x01
\
\
x02
\
\
x03
\
\
x04
\
\
x05
\
\
x06
\
\
x07
\
\
x08
\
\
t
\
\
n
\
\
x0b
\
\
x0c
\
\
r"
"
\
\
x0e
\
\
x0f
\
\
x10
\
\
x11
\
\
x12
\
\
x13
\
\
x14
\
\
x15
\
\
x16
\
\
x17
\
\
x18
\
\
x19
\
\
x1a"
"
\
\
x1b
\
\
x1c
\
\
x1d
\
\
x1e
\
\
x1f !
\
"
#$%&
\
\
'()*+,-./0123456789:;<=>?@ABCDEFGHI"
"JKLMNOPQRSTUVWXYZ[
\
\
\
\
]^_`abcdefghijklmnopqrstuvwxyz{|}~
\
\
x7f"
...
...
@@ -165,7 +165,7 @@ class UnicodeTest(
self
.
checkequalnofix
(
'xyyx'
,
'xzx'
,
'translate'
,
{
ord
(
'z'
):
'yy'
})
self
.
assertRaises
(
TypeError
,
'hello'
.
translate
)
self
.
assertRaises
(
TypeError
,
'abababc'
.
translate
,
{
ord
(
'a'
):
''
}
)
self
.
assertRaises
(
TypeError
,
'abababc'
.
translate
,
'abc'
,
'xyz'
)
def
test_split
(
self
):
string_tests
.
CommonTest
.
test_split
(
self
)
...
...
@@ -187,10 +187,6 @@ class UnicodeTest(
self
.
checkequalnofix
(
'abcd'
,
''
,
'join'
,
(
'a'
,
'b'
,
'c'
,
'd'
))
self
.
checkequalnofix
(
'w x y z'
,
' '
,
'join'
,
string_tests
.
Sequence
(
'wxyz'
))
def
test_strip
(
self
):
string_tests
.
CommonTest
.
test_strip
(
self
)
self
.
assertRaises
(
UnicodeError
,
"hello"
.
strip
,
"
\
xff
"
)
def
test_replace
(
self
):
string_tests
.
CommonTest
.
test_replace
(
self
)
...
...
@@ -323,58 +319,27 @@ class UnicodeTest(
self
.
assert_
(
'a'
in
'bdab'
)
self
.
assert_
(
'a'
in
'bdaba'
)
self
.
assert_
(
'a'
in
'bdba'
)
self
.
assert_
(
'a'
in
'bdba'
)
self
.
assert_
(
'a'
in
'bdba'
)
self
.
assert_
(
'a'
not
in
'bdb'
)
self
.
assert_
(
'a'
not
in
'bdb'
)
self
.
assert_
(
'a'
in
'bdba'
)
self
.
assert_
(
'a'
in
(
'a'
,
1
,
None
))
self
.
assert_
(
'a'
in
(
1
,
None
,
'a'
))
self
.
assert_
(
'a'
in
(
1
,
None
,
'a'
))
self
.
assert_
(
'a'
in
(
'a'
,
1
,
None
))
self
.
assert_
(
'a'
in
(
1
,
None
,
'a'
))
self
.
assert_
(
'a'
in
(
1
,
None
,
'a'
))
self
.
assert_
(
'a'
not
in
(
'x'
,
1
,
'y'
))
self
.
assert_
(
'a'
not
in
(
'x'
,
1
,
None
))
self
.
assert_
(
'abcd'
not
in
'abcxxxx'
)
self
.
assert_
(
'ab'
in
'abcd'
)
self
.
assert_
(
'ab'
in
'abc'
)
self
.
assert_
(
'ab'
in
'abc'
)
self
.
assert_
(
'ab'
in
(
1
,
None
,
'ab'
))
self
.
assert_
(
''
in
'abc'
)
self
.
assert_
(
''
in
'abc'
)
# If the following fails either
# the contains operator does not propagate UnicodeErrors or
# someone has changed the default encoding
self
.
assertRaises
(
UnicodeError
,
'g
\
xe2
teau'
.
__contains__
,
'
\
xe2
'
)
self
.
assert_
(
''
in
''
)
self
.
assert_
(
''
in
''
)
self
.
assert_
(
''
in
''
)
self
.
assert_
(
''
in
'abc'
)
self
.
assert_
(
''
in
'abc'
)
self
.
assert_
(
''
in
'abc'
)
self
.
assert_
(
'
\
0
'
not
in
'abc'
)
self
.
assert_
(
'
\
0
'
not
in
'abc'
)
self
.
assert_
(
'
\
0
'
not
in
'abc'
)
self
.
assert_
(
'
\
0
'
in
'
\
0
abc'
)
self
.
assert_
(
'
\
0
'
in
'
\
0
abc'
)
self
.
assert_
(
'
\
0
'
in
'
\
0
abc'
)
self
.
assert_
(
'
\
0
'
in
'abc
\
0
'
)
self
.
assert_
(
'
\
0
'
in
'abc
\
0
'
)
self
.
assert_
(
'
\
0
'
in
'abc
\
0
'
)
self
.
assert_
(
'a'
in
'
\
0
abc'
)
self
.
assert_
(
'a'
in
'
\
0
abc'
)
self
.
assert_
(
'a'
in
'
\
0
abc'
)
self
.
assert_
(
'asdf'
in
'asdf'
)
self
.
assert_
(
'asdf'
in
'asdf'
)
self
.
assert_
(
'asdf'
in
'asdf'
)
self
.
assert_
(
'asdf'
not
in
'asd'
)
self
.
assert_
(
'asdf'
not
in
'asd'
)
self
.
assert_
(
'asdf'
not
in
'asd'
)
self
.
assert_
(
'asdf'
not
in
''
)
self
.
assert_
(
'asdf'
not
in
''
)
self
.
assert_
(
'asdf'
not
in
''
)
self
.
assertRaises
(
TypeError
,
"abc"
.
__contains__
)
...
...
@@ -389,7 +354,7 @@ class UnicodeTest(
self
.
assertEqual
(
"%s, %s, %i, %f, %5.2f"
%
(
"abc"
,
"abc"
,
-
1
,
-
2
,
3.57
),
'abc, abc, -1, -2.000000, 3.57'
)
self
.
assertEqual
(
"%s, %s, %i, %f, %5.2f"
%
(
"abc"
,
"abc"
,
-
1
,
-
2
,
1003.57
),
'abc, abc, -1, -2.000000, 1003.57'
)
if
not
sys
.
platform
.
startswith
(
'java'
):
self
.
assertEqual
(
"%r, %r"
%
(
"abc"
,
"abc"
),
"u
'abc', 'abc'"
)
self
.
assertEqual
(
"%r, %r"
%
(
b"abc"
,
"abc"
),
"b
'abc', 'abc'"
)
self
.
assertEqual
(
"%(x)s, %(y)s"
%
{
'x'
:
"abc"
,
'y'
:
"def"
},
'abc, def'
)
self
.
assertEqual
(
"%(x)s, %(
\
xfc
)s"
%
{
'x'
:
"abc"
,
'
\
xfc
'
:
"def"
},
'abc, def'
)
...
...
@@ -495,7 +460,7 @@ class UnicodeTest(
)
self
.
assertEqual
(
str
(
'strings are decoded to unicode'
,
'utf-8'
,
'strict'
),
str
(
b
'strings are decoded to unicode'
,
'utf-8'
,
'strict'
),
'strings are decoded to unicode'
)
...
...
@@ -513,38 +478,38 @@ class UnicodeTest(
def
test_codecs_utf7
(
self
):
utfTests
=
[
(
'A
\
u2262
\
u0391
.'
,
'A+ImIDkQ.'
),
# RFC2152 example
(
'Hi Mom -
\
u263a
-!'
,
'Hi Mom -+Jjo--!'
),
# RFC2152 example
(
'
\
u65E5
\
u672C
\
u8A9E
'
,
'+ZeVnLIqe-'
),
# RFC2152 example
(
'Item 3 is
\
u00a3
1.'
,
'Item 3 is +AKM-1.'
),
# RFC2152 example
(
'+'
,
'+-'
),
(
'+-'
,
'+--'
),
(
'+?'
,
'+-?'
),
(
'
\
?
'
, '
+
AFw
?
'),
('
+
?
', '
+-
?
'),
(r'
\\
?
', '
+
AFwAXA
?
'),
(r'
\\\
?
', '
+
AFwAXABc
?
'),
(r'
++--
', '
+-+---
')
(
'A
\
u2262
\
u0391
.'
,
b
'A+ImIDkQ.'
),
# RFC2152 example
(
'Hi Mom -
\
u263a
-!'
,
b
'Hi Mom -+Jjo--!'
),
# RFC2152 example
(
'
\
u65E5
\
u672C
\
u8A9E
'
,
b
'+ZeVnLIqe-'
),
# RFC2152 example
(
'Item 3 is
\
u00a3
1.'
,
b
'Item 3 is +AKM-1.'
),
# RFC2152 example
(
'+'
,
b
'+-'
),
(
'+-'
,
b
'+--'
),
(
'+?'
,
b
'+-?'
),
(
'
\
?
'
,
b
'
+
AFw
?
'),
('
+
?
',
b
'
+-
?
'),
(r'
\\
?
',
b
'
+
AFwAXA
?
'),
(r'
\\\
?
',
b
'
+
AFwAXABc
?
'),
(r'
++--
',
b
'
+-+---
')
]
for (x, y) in utfTests:
self.assertEqual(x.encode('
utf
-
7
'), y)
# surrogates not supported
self.assertRaises(UnicodeError, str, '
+
3
ADYAA
-
', '
utf
-
7
')
self.assertRaises(UnicodeError, str,
b
'
+
3
ADYAA
-
', '
utf
-
7
')
self.assertEqual(str('
+
3
ADYAA
-
', '
utf
-
7
', '
replace
'), '
\
ufffd
')
self.assertEqual(str(
b
'
+
3
ADYAA
-
', '
utf
-
7
', '
replace
'), '
\
ufffd
')
def test_codecs_utf8(self):
self.assertEqual(''.encode('
utf
-
8
'), '')
self.assertEqual('
\
u20ac
'.encode('
utf
-
8
'), '
\
xe2
\
x82
\
xac
')
self.assertEqual('
\
ud800
\
udc02
'.encode('
utf
-
8
'), '
\
xf0
\
x90
\
x80
\
x82
')
self.assertEqual('
\
ud84d
\
udc56
'.encode('
utf
-
8
'), '
\
xf0
\
xa3
\
x91
\
x96
')
self.assertEqual('
\
ud800
'.encode('
utf
-
8
'), '
\
xed
\
xa0
\
x80
')
self.assertEqual('
\
udc00
'.encode('
utf
-
8
'), '
\
xed
\
xb0
\
x80
')
self.assertEqual(''.encode('
utf
-
8
'),
b
'')
self.assertEqual('
\
u20ac
'.encode('
utf
-
8
'),
b
'
\
xe2
\
x82
\
xac
')
self.assertEqual('
\
ud800
\
udc02
'.encode('
utf
-
8
'),
b
'
\
xf0
\
x90
\
x80
\
x82
')
self.assertEqual('
\
ud84d
\
udc56
'.encode('
utf
-
8
'),
b
'
\
xf0
\
xa3
\
x91
\
x96
')
self.assertEqual('
\
ud800
'.encode('
utf
-
8
'),
b
'
\
xed
\
xa0
\
x80
')
self.assertEqual('
\
udc00
'.encode('
utf
-
8
'),
b
'
\
xed
\
xb0
\
x80
')
self.assertEqual(
('
\
ud800
\
udc02
'*1000).encode('
utf
-
8
'),
'
\
xf0
\
x90
\
x80
\
x82
'*1000
b
'
\
xf0
\
x90
\
x80
\
x82
'*1000
)
self.assertEqual(
'
\
u6b63
\
u78ba
\
u306b
\
u8a00
\
u3046
\
u3068
\
u7ffb
\
u8a33
\
u306f
'
...
...
@@ -553,22 +518,22 @@ class UnicodeTest(
'
\
u3001
\
u3042
\
u3068
\
u306f
\
u3067
\
u305f
\
u3089
\
u3081
\
u3067
'
'
\
u3059
\
u3002
\
u5b9f
\
u969b
\
u306b
\
u306f
\
u300cWenn
ist
das
'
'
Nunstuck
git
und
'.encode('
utf
-
8
'),
'
\
xe6
\
xad
\
xa3
\
xe7
\
xa2
\
xba
\
xe3
\
x81
\
xab
\
xe8
\
xa8
\
x80
\
xe3
\
x81
'
'
\
x86
\
xe3
\
x81
\
xa8
\
xe7
\
xbf
\
xbb
\
xe8
\
xa8
\
xb3
\
xe3
\
x81
\
xaf
\
xe3
'
'
\
x81
\
x95
\
xe3
\
x82
\
x8c
\
xe3
\
x81
\
xa6
\
xe3
\
x81
\
x84
\
xe3
\
x81
\
xbe
'
'
\
xe3
\
x81
\
x9b
\
xe3
\
x82
\
x93
\
xe3
\
x80
\
x82
\
xe4
\
xb8
\
x80
\
xe9
\
x83
'
'
\
xa8
\
xe3
\
x81
\
xaf
\
xe3
\
x83
\
x89
\
xe3
\
x82
\
xa4
\
xe3
\
x83
\
x84
\
xe8
'
'
\
xaa
\
x9e
\
xe3
\
x81
\
xa7
\
xe3
\
x81
\
x99
\
xe3
\
x81
\
x8c
\
xe3
\
x80
\
x81
'
'
\
xe3
\
x81
\
x82
\
xe3
\
x81
\
xa8
\
xe3
\
x81
\
xaf
\
xe3
\
x81
\
xa7
\
xe3
\
x81
'
'
\
x9f
\
xe3
\
x82
\
x89
\
xe3
\
x82
\
x81
\
xe3
\
x81
\
xa7
\
xe3
\
x81
\
x99
\
xe3
'
'
\
x80
\
x82
\
xe5
\
xae
\
x9f
\
xe9
\
x9a
\
x9b
\
xe3
\
x81
\
xab
\
xe3
\
x81
\
xaf
'
'
\
xe3
\
x80
\
x8cWenn
ist
das
Nunstuck
git
und
'
b
'
\
xe6
\
xad
\
xa3
\
xe7
\
xa2
\
xba
\
xe3
\
x81
\
xab
\
xe8
\
xa8
\
x80
\
xe3
\
x81
'
b
'
\
x86
\
xe3
\
x81
\
xa8
\
xe7
\
xbf
\
xbb
\
xe8
\
xa8
\
xb3
\
xe3
\
x81
\
xaf
\
xe3
'
b
'
\
x81
\
x95
\
xe3
\
x82
\
x8c
\
xe3
\
x81
\
xa6
\
xe3
\
x81
\
x84
\
xe3
\
x81
\
xbe
'
b
'
\
xe3
\
x81
\
x9b
\
xe3
\
x82
\
x93
\
xe3
\
x80
\
x82
\
xe4
\
xb8
\
x80
\
xe9
\
x83
'
b
'
\
xa8
\
xe3
\
x81
\
xaf
\
xe3
\
x83
\
x89
\
xe3
\
x82
\
xa4
\
xe3
\
x83
\
x84
\
xe8
'
b
'
\
xaa
\
x9e
\
xe3
\
x81
\
xa7
\
xe3
\
x81
\
x99
\
xe3
\
x81
\
x8c
\
xe3
\
x80
\
x81
'
b
'
\
xe3
\
x81
\
x82
\
xe3
\
x81
\
xa8
\
xe3
\
x81
\
xaf
\
xe3
\
x81
\
xa7
\
xe3
\
x81
'
b
'
\
x9f
\
xe3
\
x82
\
x89
\
xe3
\
x82
\
x81
\
xe3
\
x81
\
xa7
\
xe3
\
x81
\
x99
\
xe3
'
b
'
\
x80
\
x82
\
xe5
\
xae
\
x9f
\
xe9
\
x9a
\
x9b
\
xe3
\
x81
\
xab
\
xe3
\
x81
\
xaf
'
b
'
\
xe3
\
x80
\
x8cWenn
ist
das
Nunstuck
git
und
'
)
# UTF-8 specific decoding tests
self.assertEqual(str('
\
xf0
\
xa3
\
x91
\
x96
', '
utf
-
8
'), '
\
U00023456
' )
self.assertEqual(str('
\
xf0
\
x90
\
x80
\
x82
', '
utf
-
8
'), '
\
U00010002
' )
self.assertEqual(str('
\
xe2
\
x82
\
xac
', '
utf
-
8
'), '
\
u20ac
' )
self.assertEqual(str(
b
'
\
xf0
\
xa3
\
x91
\
x96
', '
utf
-
8
'), '
\
U00023456
' )
self.assertEqual(str(
b
'
\
xf0
\
x90
\
x80
\
x82
', '
utf
-
8
'), '
\
U00010002
' )
self.assertEqual(str(
b
'
\
xe2
\
x82
\
xac
', '
utf
-
8
'), '
\
u20ac
' )
# Other possible utf-8 test cases:
# * strict decoding testing for all of the
...
...
@@ -582,14 +547,14 @@ class UnicodeTest(
# Error handling (encoding)
self.assertRaises(UnicodeError, '
Andr
\
202
x
'.encode, '
ascii
')
self.assertRaises(UnicodeError, '
Andr
\
202
x
'.encode, '
ascii
','
strict
')
self.assertEqual('
Andr
\
202
x
'.encode('
ascii
','
ignore
'), "Andr x")
self.assertEqual('
Andr
\
202
x
'.encode('
ascii
','
replace
'), "Andr? x")
self.assertEqual('
Andr
\
202
x
'.encode('
ascii
','
ignore
'),
b
"Andr x")
self.assertEqual('
Andr
\
202
x
'.encode('
ascii
','
replace
'),
b
"Andr? x")
# Error handling (decoding)
self.assertRaises(UnicodeError, str, '
Andr
\
202
x
', '
ascii
')
self.assertRaises(UnicodeError, str,
'
Andr
\
202
x
', '
ascii
',
'
strict
')
self.assertEqual(str(
'
Andr
\
202
x
','
ascii
',
'
ignore
'), "Andr x")
self.assertEqual(str(
'
Andr
\
202
x
','
ascii
',
'
replace
'), '
Andr
\
uFFFD
x
')
self.assertRaises(UnicodeError, str,
b
'
Andr
\
202
x
', '
ascii
')
self.assertRaises(UnicodeError, str,
b'
Andr
\
202
x
', '
ascii
',
'
strict
')
self.assertEqual(str(
b'
Andr
\
202
x
', '
ascii
',
'
ignore
'), "Andr x")
self.assertEqual(str(
b'
Andr
\
202
x
', '
ascii
',
'
replace
'), '
Andr
\
uFFFD
x
')
# Error handling (unknown character names)
self.assertEqual("
\
\
N{foo}xx".decode("unicode-escape", "ignore"), "xx")
...
...
@@ -618,13 +583,13 @@ class UnicodeTest(
def test_codecs(self):
# Encoding
self.assertEqual('
hello
'.encode('
ascii
'), '
hello
')
self.assertEqual('
hello
'.encode('
utf
-
7
'), '
hello
')
self.assertEqual('
hello
'.encode('
utf
-
8
'), '
hello
')
self.assertEqual('
hello
'.encode('
utf8
'), '
hello
')
self.assertEqual('
hello
'.encode('
utf
-
16
-
le
'), '
h
\
000
e
\
000
l
\
000
l
\
000
o
\
000
')
self.assertEqual('
hello
'.encode('
utf
-
16
-
be
'), '
\
000
h
\
000
e
\
000
l
\
000
l
\
000
o
')
self.assertEqual('
hello
'.encode('
latin
-
1
'), '
hello
')
self.assertEqual('
hello
'.encode('
ascii
'),
b
'
hello
')
self.assertEqual('
hello
'.encode('
utf
-
7
'),
b
'
hello
')
self.assertEqual('
hello
'.encode('
utf
-
8
'),
b
'
hello
')
self.assertEqual('
hello
'.encode('
utf8
'),
b
'
hello
')
self.assertEqual('
hello
'.encode('
utf
-
16
-
le
'),
b
'
h
\
000
e
\
000
l
\
000
l
\
000
o
\
000
')
self.assertEqual('
hello
'.encode('
utf
-
16
-
be
'),
b
'
\
000
h
\
000
e
\
000
l
\
000
l
\
000
o
')
self.assertEqual('
hello
'.encode('
latin
-
1
'),
b
'
hello
')
# Roundtrip safety for BMP (just the first 1024 chars)
for c in xrange(1024):
...
...
@@ -663,7 +628,7 @@ class UnicodeTest(
def test_codecs_charmap(self):
# 0-127
s =
''.join(map(chr, xrange(128)
))
s =
bytes(xrange(128
))
for encoding in (
'
cp037
', '
cp1026
',
'
cp437
', '
cp500
', '
cp737
', '
cp775
', '
cp850
',
...
...
@@ -691,7 +656,7 @@ class UnicodeTest(
self.assertEqual(str(s, encoding).encode(encoding), s)
# 128-255
s =
''.join(map(chr, xrange(128, 256)
))
s =
bytes(xrange(128, 256
))
for encoding in (
'
cp037
', '
cp1026
',
'
cp437
', '
cp500
', '
cp737
', '
cp775
', '
cp850
',
...
...
@@ -805,7 +770,6 @@ class UnicodeTest(
self.assertEqual(str(Foo6("bar")), "foou")
self.assertEqual(str(Foo7("bar")), "foou")
self.assertEqual(str(Foo8("foo")), "foofoo")
self.assertEqual(str(Foo9("foo")), "string")
self.assertEqual(str(Foo9("foo")), "not unicode")
def test_unicode_repr(self):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment