Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
0ac30f82
Commit
0ac30f82
authored
May 11, 2007
by
Walter Dörwald
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Enhance the punycode decoder so that it can decode
unicode objects. Fix the idna codec and the tests.
parent
1f05a3b7
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
136 additions
and
130 deletions
+136
-130
Lib/encodings/idna.py
Lib/encodings/idna.py
+23
-20
Lib/encodings/punycode.py
Lib/encodings/punycode.py
+4
-2
Lib/test/test_codecs.py
Lib/test/test_codecs.py
+109
-108
No files found.
Lib/encodings/idna.py
View file @
0ac30f82
...
@@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata
...
@@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata
dots
=
re
.
compile
(
"[
\
u002E
\
u3002
\
uFF0E
\
uFF61
]"
)
dots
=
re
.
compile
(
"[
\
u002E
\
u3002
\
uFF0E
\
uFF61
]"
)
# IDNA section 5
# IDNA section 5
ace_prefix
=
"xn--"
ace_prefix
=
b"xn--"
sace_prefix
=
"xn--"
# This assumes query strings, so AllowUnassigned is true
# This assumes query strings, so AllowUnassigned is true
def
nameprep
(
label
):
def
nameprep
(
label
):
...
@@ -87,7 +88,7 @@ def ToASCII(label):
...
@@ -87,7 +88,7 @@ def ToASCII(label):
raise
UnicodeError
(
"label empty or too long"
)
raise
UnicodeError
(
"label empty or too long"
)
# Step 5: Check ACE prefix
# Step 5: Check ACE prefix
if
label
.
startswith
(
ace_prefix
):
if
label
.
startswith
(
s
ace_prefix
):
raise
UnicodeError
(
"Label starts with ACE prefix"
)
raise
UnicodeError
(
"Label starts with ACE prefix"
)
# Step 6: Encode with PUNYCODE
# Step 6: Encode with PUNYCODE
...
@@ -134,7 +135,7 @@ def ToUnicode(label):
...
@@ -134,7 +135,7 @@ def ToUnicode(label):
# Step 7: Compare the result of step 6 with the one of step 3
# Step 7: Compare the result of step 6 with the one of step 3
# label2 will already be in lower case.
# label2 will already be in lower case.
if
label
.
lower
()
!=
label2
:
if
str
(
label
,
"ascii"
).
lower
()
!=
str
(
label2
,
"ascii"
)
:
raise
UnicodeError
(
"IDNA does not round-trip"
,
label
,
label2
)
raise
UnicodeError
(
"IDNA does not round-trip"
,
label
,
label2
)
# Step 8: return the result of step 5
# Step 8: return the result of step 5
...
@@ -143,7 +144,7 @@ def ToUnicode(label):
...
@@ -143,7 +144,7 @@ def ToUnicode(label):
### Codec APIs
### Codec APIs
class
Codec
(
codecs
.
Codec
):
class
Codec
(
codecs
.
Codec
):
def
encode
(
self
,
input
,
errors
=
'strict'
):
def
encode
(
self
,
input
,
errors
=
'strict'
):
if
errors
!=
'strict'
:
if
errors
!=
'strict'
:
# IDNA is quite clear that implementations must be strict
# IDNA is quite clear that implementations must be strict
...
@@ -152,19 +153,21 @@ class Codec(codecs.Codec):
...
@@ -152,19 +153,21 @@ class Codec(codecs.Codec):
if
not
input
:
if
not
input
:
return
b""
,
0
return
b""
,
0
result
=
[]
result
=
b""
labels
=
dots
.
split
(
input
)
labels
=
dots
.
split
(
input
)
if
labels
and
len
(
labels
[
-
1
])
==
0
:
if
labels
and
not
labels
[
-
1
]
:
trailing_dot
=
b'.'
trailing_dot
=
b'.'
del
labels
[
-
1
]
del
labels
[
-
1
]
else
:
else
:
trailing_dot
=
b''
trailing_dot
=
b''
for
label
in
labels
:
for
label
in
labels
:
result
.
append
(
ToASCII
(
label
))
if
result
:
# Join with U+002E
# Join with U+002E
return
b"."
.
join
(
result
)
+
trailing_dot
,
len
(
input
)
result
.
extend
(
b'.'
)
result
.
extend
(
ToASCII
(
label
))
return
result
+
trailing_dot
,
len
(
input
)
def
decode
(
self
,
input
,
errors
=
'strict'
):
def
decode
(
self
,
input
,
errors
=
'strict'
):
if
errors
!=
'strict'
:
if
errors
!=
'strict'
:
raise
UnicodeError
(
"Unsupported error handling "
+
errors
)
raise
UnicodeError
(
"Unsupported error handling "
+
errors
)
...
@@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
...
@@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
raise
UnicodeError
(
"unsupported error handling "
+
errors
)
raise
UnicodeError
(
"unsupported error handling "
+
errors
)
if
not
input
:
if
not
input
:
return
(
""
,
0
)
return
(
b''
,
0
)
labels
=
dots
.
split
(
input
)
labels
=
dots
.
split
(
input
)
trailing_dot
=
''
trailing_dot
=
b
''
if
labels
:
if
labels
:
if
not
labels
[
-
1
]:
if
not
labels
[
-
1
]:
trailing_dot
=
'.'
trailing_dot
=
b
'.'
del
labels
[
-
1
]
del
labels
[
-
1
]
elif
not
final
:
elif
not
final
:
# Keep potentially unfinished label until the next call
# Keep potentially unfinished label until the next call
del
labels
[
-
1
]
del
labels
[
-
1
]
if
labels
:
if
labels
:
trailing_dot
=
'.'
trailing_dot
=
b
'.'
result
=
[]
result
=
b""
size
=
0
size
=
0
for
label
in
labels
:
for
label
in
labels
:
result
.
append
(
ToASCII
(
label
))
if
size
:
if
size
:
# Join with U+002E
result
.
extend
(
b'.'
)
size
+=
1
size
+=
1
result
.
extend
(
ToASCII
(
label
))
size
+=
len
(
label
)
size
+=
len
(
label
)
# Join with U+002E
result
+=
trailing_dot
result
=
"."
.
join
(
result
)
+
trailing_dot
size
+=
len
(
trailing_dot
)
size
+=
len
(
trailing_dot
)
return
(
result
,
size
)
return
(
result
,
size
)
...
@@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
...
@@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
labels
=
dots
.
split
(
input
)
labels
=
dots
.
split
(
input
)
else
:
else
:
# Must be ASCII string
# Must be ASCII string
input
=
str
(
input
)
input
=
str
(
input
,
"ascii"
)
str
(
input
,
"ascii"
)
labels
=
input
.
split
(
"."
)
labels
=
input
.
split
(
"."
)
trailing_dot
=
''
trailing_dot
=
''
...
...
Lib/encodings/punycode.py
View file @
0ac30f82
...
@@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors):
...
@@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors):
return
base
return
base
def
punycode_decode
(
text
,
errors
):
def
punycode_decode
(
text
,
errors
):
if
isinstance
(
text
,
str
):
text
=
text
.
encode
(
"ascii"
)
pos
=
text
.
rfind
(
b"-"
)
pos
=
text
.
rfind
(
b"-"
)
if
pos
==
-
1
:
if
pos
==
-
1
:
base
=
""
base
=
""
...
@@ -194,11 +196,11 @@ def punycode_decode(text, errors):
...
@@ -194,11 +196,11 @@ def punycode_decode(text, errors):
class
Codec
(
codecs
.
Codec
):
class
Codec
(
codecs
.
Codec
):
def
encode
(
self
,
input
,
errors
=
'strict'
):
def
encode
(
self
,
input
,
errors
=
'strict'
):
res
=
punycode_encode
(
input
)
res
=
punycode_encode
(
input
)
return
res
,
len
(
input
)
return
res
,
len
(
input
)
def
decode
(
self
,
input
,
errors
=
'strict'
):
def
decode
(
self
,
input
,
errors
=
'strict'
):
if
errors
not
in
(
'strict'
,
'replace'
,
'ignore'
):
if
errors
not
in
(
'strict'
,
'replace'
,
'ignore'
):
raise
UnicodeError
,
"Unsupported error handling "
+
errors
raise
UnicodeError
,
"Unsupported error handling "
+
errors
res
=
punycode_decode
(
input
,
errors
)
res
=
punycode_decode
(
input
,
errors
)
...
...
Lib/test/test_codecs.py
View file @
0ac30f82
...
@@ -624,6 +624,7 @@ class PunycodeTest(unittest.TestCase):
...
@@ -624,6 +624,7 @@ class PunycodeTest(unittest.TestCase):
def
test_decode
(
self
):
def
test_decode
(
self
):
for
uni
,
puny
in
punycode_testcases
:
for
uni
,
puny
in
punycode_testcases
:
self
.
assertEquals
(
uni
,
puny
.
decode
(
"punycode"
))
self
.
assertEquals
(
uni
,
puny
.
decode
(
"punycode"
))
self
.
assertEquals
(
uni
,
puny
.
decode
(
"ascii"
).
decode
(
"punycode"
))
class
UnicodeInternalTest
(
unittest
.
TestCase
):
class
UnicodeInternalTest
(
unittest
.
TestCase
):
def
test_bug1251300
(
self
):
def
test_bug1251300
(
self
):
...
@@ -676,154 +677,154 @@ class UnicodeInternalTest(unittest.TestCase):
...
@@ -676,154 +677,154 @@ class UnicodeInternalTest(unittest.TestCase):
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
nameprep_tests
=
[
nameprep_tests
=
[
# 3.1 Map to nothing.
# 3.1 Map to nothing.
(
'foo
\
xc2
\
xad
\
xcd
\
x8f
\
xe1
\
xa0
\
x86
\
xe1
\
xa0
\
x8b
bar'
(
b
'foo
\
xc2
\
xad
\
xcd
\
x8f
\
xe1
\
xa0
\
x86
\
xe1
\
xa0
\
x8b
bar'
'
\
xe2
\
x80
\
x8b
\
xe2
\
x81
\
xa0
baz
\
xef
\
xb8
\
x80
\
xef
\
xb8
\
x88
\
xef
'
b
'
\
xe2
\
x80
\
x8b
\
xe2
\
x81
\
xa0
baz
\
xef
\
xb8
\
x80
\
xef
\
xb8
\
x88
\
xef
'
'
\
xb8
\
x8f
\
xef
\
xbb
\
xbf
'
,
b
'
\
xb8
\
x8f
\
xef
\
xbb
\
xbf
'
,
'foobarbaz'
),
b
'foobarbaz'
),
# 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
# 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
(
'CAFE'
,
(
b
'CAFE'
,
'cafe'
),
b
'cafe'
),
# 3.3 Case folding 8bit U+00DF (german sharp s).
# 3.3 Case folding 8bit U+00DF (german sharp s).
# The original test case is bogus; it says \xc3\xdf
# The original test case is bogus; it says \xc3\xdf
(
'
\
xc3
\
x9f
'
,
(
b
'
\
xc3
\
x9f
'
,
'ss'
),
b
'ss'
),
# 3.4 Case folding U+0130 (turkish capital I with dot).
# 3.4 Case folding U+0130 (turkish capital I with dot).
(
'
\
xc4
\
xb0
'
,
(
b
'
\
xc4
\
xb0
'
,
'i
\
xcc
\
x87
'
),
b
'i
\
xcc
\
x87
'
),
# 3.5 Case folding multibyte U+0143 U+037A.
# 3.5 Case folding multibyte U+0143 U+037A.
(
'
\
xc5
\
x83
\
xcd
\
xba
'
,
(
b
'
\
xc5
\
x83
\
xcd
\
xba
'
,
'
\
xc5
\
x84
\
xce
\
xb9
'
),
b
'
\
xc5
\
x84
\
xce
\
xb9
'
),
# 3.6 Case folding U+2121 U+33C6 U+1D7BB.
# 3.6 Case folding U+2121 U+33C6 U+1D7BB.
# XXX: skip this as it fails in UCS-2 mode
# XXX: skip this as it fails in UCS-2 mode
#('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
#('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
# 'telc\xe2\x88\x95kg\xcf\x83'),
# 'telc\xe2\x88\x95kg\xcf\x83'),
(
None
,
None
),
(
None
,
None
),
# 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
# 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
(
'j
\
xcc
\
x8c
\
xc2
\
xa0
\
xc2
\
xaa
'
,
(
b
'j
\
xcc
\
x8c
\
xc2
\
xa0
\
xc2
\
xaa
'
,
'
\
xc7
\
xb0
a'
),
b
'
\
xc7
\
xb0
a'
),
# 3.8 Case folding U+1FB7 and normalization.
# 3.8 Case folding U+1FB7 and normalization.
(
'
\
xe1
\
xbe
\
xb7
'
,
(
b
'
\
xe1
\
xbe
\
xb7
'
,
'
\
xe1
\
xbe
\
xb6
\
xce
\
xb9
'
),
b
'
\
xe1
\
xbe
\
xb6
\
xce
\
xb9
'
),
# 3.9 Self-reverting case folding U+01F0 and normalization.
# 3.9 Self-reverting case folding U+01F0 and normalization.
# The original test case is bogus, it says `\xc7\xf0'
# The original test case is bogus, it says `\xc7\xf0'
(
'
\
xc7
\
xb0
'
,
(
b
'
\
xc7
\
xb0
'
,
'
\
xc7
\
xb0
'
),
b
'
\
xc7
\
xb0
'
),
# 3.10 Self-reverting case folding U+0390 and normalization.
# 3.10 Self-reverting case folding U+0390 and normalization.
(
'
\
xce
\
x90
'
,
(
b
'
\
xce
\
x90
'
,
'
\
xce
\
x90
'
),
b
'
\
xce
\
x90
'
),
# 3.11 Self-reverting case folding U+03B0 and normalization.
# 3.11 Self-reverting case folding U+03B0 and normalization.
(
'
\
xce
\
xb0
'
,
(
b
'
\
xce
\
xb0
'
,
'
\
xce
\
xb0
'
),
b
'
\
xce
\
xb0
'
),
# 3.12 Self-reverting case folding U+1E96 and normalization.
# 3.12 Self-reverting case folding U+1E96 and normalization.
(
'
\
xe1
\
xba
\
x96
'
,
(
b
'
\
xe1
\
xba
\
x96
'
,
'
\
xe1
\
xba
\
x96
'
),
b
'
\
xe1
\
xba
\
x96
'
),
# 3.13 Self-reverting case folding U+1F56 and normalization.
# 3.13 Self-reverting case folding U+1F56 and normalization.
(
'
\
xe1
\
xbd
\
x96
'
,
(
b
'
\
xe1
\
xbd
\
x96
'
,
'
\
xe1
\
xbd
\
x96
'
),
b
'
\
xe1
\
xbd
\
x96
'
),
# 3.14 ASCII space character U+0020.
# 3.14 ASCII space character U+0020.
(
' '
,
(
b
' '
,
' '
),
b
' '
),
# 3.15 Non-ASCII 8bit space character U+00A0.
# 3.15 Non-ASCII 8bit space character U+00A0.
(
'
\
xc2
\
xa0
'
,
(
b
'
\
xc2
\
xa0
'
,
' '
),
b
' '
),
# 3.16 Non-ASCII multibyte space character U+1680.
# 3.16 Non-ASCII multibyte space character U+1680.
(
'
\
xe1
\
x9a
\
x80
'
,
(
b
'
\
xe1
\
x9a
\
x80
'
,
None
),
None
),
# 3.17 Non-ASCII multibyte space character U+2000.
# 3.17 Non-ASCII multibyte space character U+2000.
(
'
\
xe2
\
x80
\
x80
'
,
(
b
'
\
xe2
\
x80
\
x80
'
,
' '
),
b
' '
),
# 3.18 Zero Width Space U+200b.
# 3.18 Zero Width Space U+200b.
(
'
\
xe2
\
x80
\
x8b
'
,
(
b
'
\
xe2
\
x80
\
x8b
'
,
''
),
b
''
),
# 3.19 Non-ASCII multibyte space character U+3000.
# 3.19 Non-ASCII multibyte space character U+3000.
(
'
\
xe3
\
x80
\
x80
'
,
(
b
'
\
xe3
\
x80
\
x80
'
,
' '
),
b
' '
),
# 3.20 ASCII control characters U+0010 U+007F.
# 3.20 ASCII control characters U+0010 U+007F.
(
'
\
x10
\
x7f
'
,
(
b
'
\
x10
\
x7f
'
,
'
\
x10
\
x7f
'
),
b
'
\
x10
\
x7f
'
),
# 3.21 Non-ASCII 8bit control character U+0085.
# 3.21 Non-ASCII 8bit control character U+0085.
(
'
\
xc2
\
x85
'
,
(
b
'
\
xc2
\
x85
'
,
None
),
None
),
# 3.22 Non-ASCII multibyte control character U+180E.
# 3.22 Non-ASCII multibyte control character U+180E.
(
'
\
xe1
\
xa0
\
x8e
'
,
(
b
'
\
xe1
\
xa0
\
x8e
'
,
None
),
None
),
# 3.23 Zero Width No-Break Space U+FEFF.
# 3.23 Zero Width No-Break Space U+FEFF.
(
'
\
xef
\
xbb
\
xbf
'
,
(
b
'
\
xef
\
xbb
\
xbf
'
,
''
),
b
''
),
# 3.24 Non-ASCII control character U+1D175.
# 3.24 Non-ASCII control character U+1D175.
(
'
\
xf0
\
x9d
\
x85
\
xb5
'
,
(
b
'
\
xf0
\
x9d
\
x85
\
xb5
'
,
None
),
None
),
# 3.25 Plane 0 private use character U+F123.
# 3.25 Plane 0 private use character U+F123.
(
'
\
xef
\
x84
\
xa3
'
,
(
b
'
\
xef
\
x84
\
xa3
'
,
None
),
None
),
# 3.26 Plane 15 private use character U+F1234.
# 3.26 Plane 15 private use character U+F1234.
(
'
\
xf3
\
xb1
\
x88
\
xb4
'
,
(
b
'
\
xf3
\
xb1
\
x88
\
xb4
'
,
None
),
None
),
# 3.27 Plane 16 private use character U+10F234.
# 3.27 Plane 16 private use character U+10F234.
(
'
\
xf4
\
x8f
\
x88
\
xb4
'
,
(
b
'
\
xf4
\
x8f
\
x88
\
xb4
'
,
None
),
None
),
# 3.28 Non-character code point U+8FFFE.
# 3.28 Non-character code point U+8FFFE.
(
'
\
xf2
\
x8f
\
xbf
\
xbe
'
,
(
b
'
\
xf2
\
x8f
\
xbf
\
xbe
'
,
None
),
None
),
# 3.29 Non-character code point U+10FFFF.
# 3.29 Non-character code point U+10FFFF.
(
'
\
xf4
\
x8f
\
xbf
\
xbf
'
,
(
b
'
\
xf4
\
x8f
\
xbf
\
xbf
'
,
None
),
None
),
# 3.30 Surrogate code U+DF42.
# 3.30 Surrogate code U+DF42.
(
'
\
xed
\
xbd
\
x82
'
,
(
b
'
\
xed
\
xbd
\
x82
'
,
None
),
None
),
# 3.31 Non-plain text character U+FFFD.
# 3.31 Non-plain text character U+FFFD.
(
'
\
xef
\
xbf
\
xbd
'
,
(
b
'
\
xef
\
xbf
\
xbd
'
,
None
),
None
),
# 3.32 Ideographic description character U+2FF5.
# 3.32 Ideographic description character U+2FF5.
(
'
\
xe2
\
xbf
\
xb5
'
,
(
b
'
\
xe2
\
xbf
\
xb5
'
,
None
),
None
),
# 3.33 Display property character U+0341.
# 3.33 Display property character U+0341.
(
'
\
xcd
\
x81
'
,
(
b
'
\
xcd
\
x81
'
,
'
\
xcc
\
x81
'
),
b
'
\
xcc
\
x81
'
),
# 3.34 Left-to-right mark U+200E.
# 3.34 Left-to-right mark U+200E.
(
'
\
xe2
\
x80
\
x8e
'
,
(
b
'
\
xe2
\
x80
\
x8e
'
,
None
),
None
),
# 3.35 Deprecated U+202A.
# 3.35 Deprecated U+202A.
(
'
\
xe2
\
x80
\
xaa
'
,
(
b
'
\
xe2
\
x80
\
xaa
'
,
None
),
None
),
# 3.36 Language tagging character U+E0001.
# 3.36 Language tagging character U+E0001.
(
'
\
xf3
\
xa0
\
x80
\
x81
'
,
(
b
'
\
xf3
\
xa0
\
x80
\
x81
'
,
None
),
None
),
# 3.37 Language tagging character U+E0042.
# 3.37 Language tagging character U+E0042.
(
'
\
xf3
\
xa0
\
x81
\
x82
'
,
(
b
'
\
xf3
\
xa0
\
x81
\
x82
'
,
None
),
None
),
# 3.38 Bidi: RandALCat character U+05BE and LCat characters.
# 3.38 Bidi: RandALCat character U+05BE and LCat characters.
(
'foo
\
xd6
\
xbe
bar'
,
(
b
'foo
\
xd6
\
xbe
bar'
,
None
),
None
),
# 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
# 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
(
'foo
\
xef
\
xb5
\
x90
bar'
,
(
b
'foo
\
xef
\
xb5
\
x90
bar'
,
None
),
None
),
# 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
# 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
(
'foo
\
xef
\
xb9
\
xb6
bar'
,
(
b
'foo
\
xef
\
xb9
\
xb6
bar'
,
'foo
\
xd9
\
x8e
bar'
),
b
'foo
\
xd9
\
x8e
bar'
),
# 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
# 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
(
'
\
xd8
\
xa7
1'
,
(
b
'
\
xd8
\
xa7
1'
,
None
),
None
),
# 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
# 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
(
'
\
xd8
\
xa7
1
\
xd8
\
xa8
'
,
(
b
'
\
xd8
\
xa7
1
\
xd8
\
xa8
'
,
'
\
xd8
\
xa7
1
\
xd8
\
xa8
'
),
b
'
\
xd8
\
xa7
1
\
xd8
\
xa8
'
),
# 3.43 Unassigned code point U+E0002.
# 3.43 Unassigned code point U+E0002.
# Skip this test as we allow unassigned
# Skip this test as we allow unassigned
#('\xf3\xa0\x80\x82',
#(
b
'\xf3\xa0\x80\x82',
# None),
# None),
(
None
,
None
),
(
None
,
None
),
# 3.44 Larger test (shrinking).
# 3.44 Larger test (shrinking).
# Original test case reads \xc3\xdf
# Original test case reads \xc3\xdf
(
'X
\
xc2
\
xad
\
xc3
\
x9f
\
xc4
\
xb0
\
xe2
\
x84
\
xa1
j
\
xcc
\
x8c
\
xc2
\
xa0
\
xc2
'
(
b
'X
\
xc2
\
xad
\
xc3
\
x9f
\
xc4
\
xb0
\
xe2
\
x84
\
xa1
j
\
xcc
\
x8c
\
xc2
\
xa0
\
xc2
'
'
\
xaa
\
xce
\
xb0
\
xe2
\
x80
\
x80
'
,
b
'
\
xaa
\
xce
\
xb0
\
xe2
\
x80
\
x80
'
,
'xssi
\
xcc
\
x87
tel
\
xc7
\
xb0
a
\
xce
\
xb0
'
),
b
'xssi
\
xcc
\
x87
tel
\
xc7
\
xb0
a
\
xce
\
xb0
'
),
# 3.45 Larger test (expanding).
# 3.45 Larger test (expanding).
# Original test case reads \xc3\x9f
# Original test case reads \xc3\x9f
(
'X
\
xc3
\
x9f
\
xe3
\
x8c
\
x96
\
xc4
\
xb0
\
xe2
\
x84
\
xa1
\
xe2
\
x92
\
x9f
\
xe3
\
x8c
'
(
b
'X
\
xc3
\
x9f
\
xe3
\
x8c
\
x96
\
xc4
\
xb0
\
xe2
\
x84
\
xa1
\
xe2
\
x92
\
x9f
\
xe3
\
x8c
'
'
\
x80
'
,
b
'
\
x80
'
,
'xss
\
xe3
\
x82
\
xad
\
xe3
\
x83
\
xad
\
xe3
\
x83
\
xa1
\
xe3
\
x83
\
xbc
\
xe3
'
b
'xss
\
xe3
\
x82
\
xad
\
xe3
\
x83
\
xad
\
xe3
\
x83
\
xa1
\
xe3
\
x83
\
xbc
\
xe3
'
'
\
x83
\
x88
\
xe3
\
x83
\
xab
i
\
xcc
\
x87
tel
\
x28
d
\
x29
\
xe3
\
x82
'
b
'
\
x83
\
x88
\
xe3
\
x83
\
xab
i
\
xcc
\
x87
tel
\
x28
d
\
x29
\
xe3
\
x82
'
'
\
xa2
\
xe3
\
x83
\
x91
\
xe3
\
x83
\
xbc
\
xe3
\
x83
\
x88
'
)
b
'
\
xa2
\
xe3
\
x83
\
x91
\
xe3
\
x83
\
xbc
\
xe3
\
x83
\
x88
'
)
]
]
...
@@ -848,16 +849,16 @@ class NameprepTest(unittest.TestCase):
...
@@ -848,16 +849,16 @@ class NameprepTest(unittest.TestCase):
class
IDNACodecTest
(
unittest
.
TestCase
):
class
IDNACodecTest
(
unittest
.
TestCase
):
def
test_builtin_decode
(
self
):
def
test_builtin_decode
(
self
):
self
.
assertEquals
(
str
(
"python.org"
,
"idna"
),
"python.org"
)
self
.
assertEquals
(
str
(
b
"python.org"
,
"idna"
),
"python.org"
)
self
.
assertEquals
(
str
(
"python.org."
,
"idna"
),
"python.org."
)
self
.
assertEquals
(
str
(
b
"python.org."
,
"idna"
),
"python.org."
)
self
.
assertEquals
(
str
(
"xn--pythn-mua.org"
,
"idna"
),
"pyth
\
xf6
n.org"
)
self
.
assertEquals
(
str
(
b
"xn--pythn-mua.org"
,
"idna"
),
"pyth
\
xf6
n.org"
)
self
.
assertEquals
(
str
(
"xn--pythn-mua.org."
,
"idna"
),
"pyth
\
xf6
n.org."
)
self
.
assertEquals
(
str
(
b
"xn--pythn-mua.org."
,
"idna"
),
"pyth
\
xf6
n.org."
)
def
test_builtin_encode
(
self
):
def
test_builtin_encode
(
self
):
self
.
assertEquals
(
"python.org"
.
encode
(
"idna"
),
"python.org"
)
self
.
assertEquals
(
"python.org"
.
encode
(
"idna"
),
b
"python.org"
)
self
.
assertEquals
(
"python.org."
.
encode
(
"idna"
),
"python.org."
)
self
.
assertEquals
(
"python.org."
.
encode
(
"idna"
),
b
"python.org."
)
self
.
assertEquals
(
"pyth
\
xf6
n.org"
.
encode
(
"idna"
),
"xn--pythn-mua.org"
)
self
.
assertEquals
(
"pyth
\
xf6
n.org"
.
encode
(
"idna"
),
b
"xn--pythn-mua.org"
)
self
.
assertEquals
(
"pyth
\
xf6
n.org."
.
encode
(
"idna"
),
"xn--pythn-mua.org."
)
self
.
assertEquals
(
"pyth
\
xf6
n.org."
.
encode
(
"idna"
),
b
"xn--pythn-mua.org."
)
def
test_stream
(
self
):
def
test_stream
(
self
):
r
=
codecs
.
getreader
(
"idna"
)(
io
.
BytesIO
(
b"abc"
))
r
=
codecs
.
getreader
(
"idna"
)(
io
.
BytesIO
(
b"abc"
))
...
@@ -866,61 +867,61 @@ class IDNACodecTest(unittest.TestCase):
...
@@ -866,61 +867,61 @@ class IDNACodecTest(unittest.TestCase):
def
test_incremental_decode
(
self
):
def
test_incremental_decode
(
self
):
self
.
assertEquals
(
self
.
assertEquals
(
""
.
join
(
codecs
.
iterdecode
(
"python.org"
,
"idna"
)),
""
.
join
(
codecs
.
iterdecode
(
(
bytes
(
chr
(
c
))
for
c
in
b"python.org"
)
,
"idna"
)),
"python.org"
"python.org"
)
)
self
.
assertEquals
(
self
.
assertEquals
(
""
.
join
(
codecs
.
iterdecode
(
"python.org."
,
"idna"
)),
""
.
join
(
codecs
.
iterdecode
(
(
bytes
(
chr
(
c
))
for
c
in
b"python.org."
)
,
"idna"
)),
"python.org."
"python.org."
)
)
self
.
assertEquals
(
self
.
assertEquals
(
""
.
join
(
codecs
.
iterdecode
(
"xn--pythn-mua.org."
,
"idna"
)),
""
.
join
(
codecs
.
iterdecode
(
(
bytes
(
chr
(
c
))
for
c
in
b"xn--pythn-mua.org."
)
,
"idna"
)),
"pyth
\
xf6
n.org."
"pyth
\
xf6
n.org."
)
)
self
.
assertEquals
(
self
.
assertEquals
(
""
.
join
(
codecs
.
iterdecode
(
"xn--pythn-mua.org."
,
"idna"
)),
""
.
join
(
codecs
.
iterdecode
(
(
bytes
(
chr
(
c
))
for
c
in
b"xn--pythn-mua.org."
)
,
"idna"
)),
"pyth
\
xf6
n.org."
"pyth
\
xf6
n.org."
)
)
decoder
=
codecs
.
getincrementaldecoder
(
"idna"
)()
decoder
=
codecs
.
getincrementaldecoder
(
"idna"
)()
self
.
assertEquals
(
decoder
.
decode
(
"xn--xam"
,
),
""
)
self
.
assertEquals
(
decoder
.
decode
(
b
"xn--xam"
,
),
""
)
self
.
assertEquals
(
decoder
.
decode
(
"ple-9ta.o"
,
),
"
\
xe4
xample."
)
self
.
assertEquals
(
decoder
.
decode
(
b
"ple-9ta.o"
,
),
"
\
xe4
xample."
)
self
.
assertEquals
(
decoder
.
decode
(
"rg"
),
""
)
self
.
assertEquals
(
decoder
.
decode
(
b
"rg"
),
""
)
self
.
assertEquals
(
decoder
.
decode
(
""
,
True
),
"org"
)
self
.
assertEquals
(
decoder
.
decode
(
b
""
,
True
),
"org"
)
decoder
.
reset
()
decoder
.
reset
()
self
.
assertEquals
(
decoder
.
decode
(
"xn--xam"
,
),
""
)
self
.
assertEquals
(
decoder
.
decode
(
b
"xn--xam"
,
),
""
)
self
.
assertEquals
(
decoder
.
decode
(
"ple-9ta.o"
,
),
"
\
xe4
xample."
)
self
.
assertEquals
(
decoder
.
decode
(
b
"ple-9ta.o"
,
),
"
\
xe4
xample."
)
self
.
assertEquals
(
decoder
.
decode
(
"rg."
),
"org."
)
self
.
assertEquals
(
decoder
.
decode
(
b
"rg."
),
"org."
)
self
.
assertEquals
(
decoder
.
decode
(
""
,
True
),
""
)
self
.
assertEquals
(
decoder
.
decode
(
b
""
,
True
),
""
)
def
test_incremental_encode
(
self
):
def
test_incremental_encode
(
self
):
self
.
assertEquals
(
self
.
assertEquals
(
""
.
join
(
codecs
.
iterencode
(
"python.org"
,
"idna"
)),
b
""
.
join
(
codecs
.
iterencode
(
"python.org"
,
"idna"
)),
"python.org"
b
"python.org"
)
)
self
.
assertEquals
(
self
.
assertEquals
(
""
.
join
(
codecs
.
iterencode
(
"python.org."
,
"idna"
)),
b
""
.
join
(
codecs
.
iterencode
(
"python.org."
,
"idna"
)),
"python.org."
b
"python.org."
)
)
self
.
assertEquals
(
self
.
assertEquals
(
""
.
join
(
codecs
.
iterencode
(
"pyth
\
xf6
n.org."
,
"idna"
)),
b
""
.
join
(
codecs
.
iterencode
(
"pyth
\
xf6
n.org."
,
"idna"
)),
"xn--pythn-mua.org."
b
"xn--pythn-mua.org."
)
)
self
.
assertEquals
(
self
.
assertEquals
(
""
.
join
(
codecs
.
iterencode
(
"pyth
\
xf6
n.org."
,
"idna"
)),
b
""
.
join
(
codecs
.
iterencode
(
"pyth
\
xf6
n.org."
,
"idna"
)),
"xn--pythn-mua.org."
b
"xn--pythn-mua.org."
)
)
encoder
=
codecs
.
getincrementalencoder
(
"idna"
)()
encoder
=
codecs
.
getincrementalencoder
(
"idna"
)()
self
.
assertEquals
(
encoder
.
encode
(
"
\
xe4
x"
),
""
)
self
.
assertEquals
(
encoder
.
encode
(
"
\
xe4
x"
),
b
""
)
self
.
assertEquals
(
encoder
.
encode
(
"ample.org"
),
"xn--xample-9ta."
)
self
.
assertEquals
(
encoder
.
encode
(
"ample.org"
),
b
"xn--xample-9ta."
)
self
.
assertEquals
(
encoder
.
encode
(
""
,
True
),
"org"
)
self
.
assertEquals
(
encoder
.
encode
(
""
,
True
),
b
"org"
)
encoder
.
reset
()
encoder
.
reset
()
self
.
assertEquals
(
encoder
.
encode
(
"
\
xe4
x"
),
""
)
self
.
assertEquals
(
encoder
.
encode
(
"
\
xe4
x"
),
b
""
)
self
.
assertEquals
(
encoder
.
encode
(
"ample.org."
),
"xn--xample-9ta.org."
)
self
.
assertEquals
(
encoder
.
encode
(
"ample.org."
),
b
"xn--xample-9ta.org."
)
self
.
assertEquals
(
encoder
.
encode
(
""
,
True
),
""
)
self
.
assertEquals
(
encoder
.
encode
(
""
,
True
),
b
""
)
class
CodecsModuleTest
(
unittest
.
TestCase
):
class
CodecsModuleTest
(
unittest
.
TestCase
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment