Commit 0ac30f82 authored by Walter Dörwald's avatar Walter Dörwald

Enhance the punycode decoder so that it can decode

unicode objects.

Fix the idna codec and the tests.
parent 1f05a3b7
...@@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata ...@@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata
dots = re.compile("[\u002E\u3002\uFF0E\uFF61]") dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
# IDNA section 5 # IDNA section 5
ace_prefix = "xn--" ace_prefix = b"xn--"
sace_prefix = "xn--"
# This assumes query strings, so AllowUnassigned is true # This assumes query strings, so AllowUnassigned is true
def nameprep(label): def nameprep(label):
...@@ -87,7 +88,7 @@ def ToASCII(label): ...@@ -87,7 +88,7 @@ def ToASCII(label):
raise UnicodeError("label empty or too long") raise UnicodeError("label empty or too long")
# Step 5: Check ACE prefix # Step 5: Check ACE prefix
if label.startswith(ace_prefix): if label.startswith(sace_prefix):
raise UnicodeError("Label starts with ACE prefix") raise UnicodeError("Label starts with ACE prefix")
# Step 6: Encode with PUNYCODE # Step 6: Encode with PUNYCODE
...@@ -134,7 +135,7 @@ def ToUnicode(label): ...@@ -134,7 +135,7 @@ def ToUnicode(label):
# Step 7: Compare the result of step 6 with the one of step 3 # Step 7: Compare the result of step 6 with the one of step 3
# label2 will already be in lower case. # label2 will already be in lower case.
if label.lower() != label2: if str(label, "ascii").lower() != str(label2, "ascii"):
raise UnicodeError("IDNA does not round-trip", label, label2) raise UnicodeError("IDNA does not round-trip", label, label2)
# Step 8: return the result of step 5 # Step 8: return the result of step 5
...@@ -143,7 +144,7 @@ def ToUnicode(label): ...@@ -143,7 +144,7 @@ def ToUnicode(label):
### Codec APIs ### Codec APIs
class Codec(codecs.Codec): class Codec(codecs.Codec):
def encode(self,input,errors='strict'): def encode(self, input, errors='strict'):
if errors != 'strict': if errors != 'strict':
# IDNA is quite clear that implementations must be strict # IDNA is quite clear that implementations must be strict
...@@ -152,19 +153,21 @@ class Codec(codecs.Codec): ...@@ -152,19 +153,21 @@ class Codec(codecs.Codec):
if not input: if not input:
return b"", 0 return b"", 0
result = [] result = b""
labels = dots.split(input) labels = dots.split(input)
if labels and len(labels[-1])==0: if labels and not labels[-1]:
trailing_dot = b'.' trailing_dot = b'.'
del labels[-1] del labels[-1]
else: else:
trailing_dot = b'' trailing_dot = b''
for label in labels: for label in labels:
result.append(ToASCII(label)) if result:
# Join with U+002E # Join with U+002E
return b".".join(result)+trailing_dot, len(input) result.extend(b'.')
result.extend(ToASCII(label))
return result+trailing_dot, len(input)
def decode(self,input,errors='strict'): def decode(self, input, errors='strict'):
if errors != 'strict': if errors != 'strict':
raise UnicodeError("Unsupported error handling "+errors) raise UnicodeError("Unsupported error handling "+errors)
...@@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): ...@@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
raise UnicodeError("unsupported error handling "+errors) raise UnicodeError("unsupported error handling "+errors)
if not input: if not input:
return ("", 0) return (b'', 0)
labels = dots.split(input) labels = dots.split(input)
trailing_dot = '' trailing_dot = b''
if labels: if labels:
if not labels[-1]: if not labels[-1]:
trailing_dot = '.' trailing_dot = b'.'
del labels[-1] del labels[-1]
elif not final: elif not final:
# Keep potentially unfinished label until the next call # Keep potentially unfinished label until the next call
del labels[-1] del labels[-1]
if labels: if labels:
trailing_dot = '.' trailing_dot = b'.'
result = [] result = b""
size = 0 size = 0
for label in labels: for label in labels:
result.append(ToASCII(label))
if size: if size:
# Join with U+002E
result.extend(b'.')
size += 1 size += 1
result.extend(ToASCII(label))
size += len(label) size += len(label)
# Join with U+002E result += trailing_dot
result = ".".join(result) + trailing_dot
size += len(trailing_dot) size += len(trailing_dot)
return (result, size) return (result, size)
...@@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder): ...@@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
labels = dots.split(input) labels = dots.split(input)
else: else:
# Must be ASCII string # Must be ASCII string
input = str(input) input = str(input, "ascii")
str(input, "ascii")
labels = input.split(".") labels = input.split(".")
trailing_dot = '' trailing_dot = ''
......
...@@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors): ...@@ -181,6 +181,8 @@ def insertion_sort(base, extended, errors):
return base return base
def punycode_decode(text, errors): def punycode_decode(text, errors):
if isinstance(text, str):
text = text.encode("ascii")
pos = text.rfind(b"-") pos = text.rfind(b"-")
if pos == -1: if pos == -1:
base = "" base = ""
...@@ -194,11 +196,11 @@ def punycode_decode(text, errors): ...@@ -194,11 +196,11 @@ def punycode_decode(text, errors):
class Codec(codecs.Codec): class Codec(codecs.Codec):
def encode(self,input,errors='strict'): def encode(self, input, errors='strict'):
res = punycode_encode(input) res = punycode_encode(input)
return res, len(input) return res, len(input)
def decode(self,input,errors='strict'): def decode(self, input, errors='strict'):
if errors not in ('strict', 'replace', 'ignore'): if errors not in ('strict', 'replace', 'ignore'):
raise UnicodeError, "Unsupported error handling "+errors raise UnicodeError, "Unsupported error handling "+errors
res = punycode_decode(input, errors) res = punycode_decode(input, errors)
......
...@@ -624,6 +624,7 @@ class PunycodeTest(unittest.TestCase): ...@@ -624,6 +624,7 @@ class PunycodeTest(unittest.TestCase):
def test_decode(self): def test_decode(self):
for uni, puny in punycode_testcases: for uni, puny in punycode_testcases:
self.assertEquals(uni, puny.decode("punycode")) self.assertEquals(uni, puny.decode("punycode"))
self.assertEquals(uni, puny.decode("ascii").decode("punycode"))
class UnicodeInternalTest(unittest.TestCase): class UnicodeInternalTest(unittest.TestCase):
def test_bug1251300(self): def test_bug1251300(self):
...@@ -676,154 +677,154 @@ class UnicodeInternalTest(unittest.TestCase): ...@@ -676,154 +677,154 @@ class UnicodeInternalTest(unittest.TestCase):
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
nameprep_tests = [ nameprep_tests = [
# 3.1 Map to nothing. # 3.1 Map to nothing.
('foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar' (b'foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
'\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef' b'\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
'\xb8\x8f\xef\xbb\xbf', b'\xb8\x8f\xef\xbb\xbf',
'foobarbaz'), b'foobarbaz'),
# 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045. # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
('CAFE', (b'CAFE',
'cafe'), b'cafe'),
# 3.3 Case folding 8bit U+00DF (german sharp s). # 3.3 Case folding 8bit U+00DF (german sharp s).
# The original test case is bogus; it says \xc3\xdf # The original test case is bogus; it says \xc3\xdf
('\xc3\x9f', (b'\xc3\x9f',
'ss'), b'ss'),
# 3.4 Case folding U+0130 (turkish capital I with dot). # 3.4 Case folding U+0130 (turkish capital I with dot).
('\xc4\xb0', (b'\xc4\xb0',
'i\xcc\x87'), b'i\xcc\x87'),
# 3.5 Case folding multibyte U+0143 U+037A. # 3.5 Case folding multibyte U+0143 U+037A.
('\xc5\x83\xcd\xba', (b'\xc5\x83\xcd\xba',
'\xc5\x84 \xce\xb9'), b'\xc5\x84 \xce\xb9'),
# 3.6 Case folding U+2121 U+33C6 U+1D7BB. # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
# XXX: skip this as it fails in UCS-2 mode # XXX: skip this as it fails in UCS-2 mode
#('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb', #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
# 'telc\xe2\x88\x95kg\xcf\x83'), # 'telc\xe2\x88\x95kg\xcf\x83'),
(None, None), (None, None),
# 3.7 Normalization of U+006a U+030c U+00A0 U+00AA. # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
('j\xcc\x8c\xc2\xa0\xc2\xaa', (b'j\xcc\x8c\xc2\xa0\xc2\xaa',
'\xc7\xb0 a'), b'\xc7\xb0 a'),
# 3.8 Case folding U+1FB7 and normalization. # 3.8 Case folding U+1FB7 and normalization.
('\xe1\xbe\xb7', (b'\xe1\xbe\xb7',
'\xe1\xbe\xb6\xce\xb9'), b'\xe1\xbe\xb6\xce\xb9'),
# 3.9 Self-reverting case folding U+01F0 and normalization. # 3.9 Self-reverting case folding U+01F0 and normalization.
# The original test case is bogus, it says `\xc7\xf0' # The original test case is bogus, it says `\xc7\xf0'
('\xc7\xb0', (b'\xc7\xb0',
'\xc7\xb0'), b'\xc7\xb0'),
# 3.10 Self-reverting case folding U+0390 and normalization. # 3.10 Self-reverting case folding U+0390 and normalization.
('\xce\x90', (b'\xce\x90',
'\xce\x90'), b'\xce\x90'),
# 3.11 Self-reverting case folding U+03B0 and normalization. # 3.11 Self-reverting case folding U+03B0 and normalization.
('\xce\xb0', (b'\xce\xb0',
'\xce\xb0'), b'\xce\xb0'),
# 3.12 Self-reverting case folding U+1E96 and normalization. # 3.12 Self-reverting case folding U+1E96 and normalization.
('\xe1\xba\x96', (b'\xe1\xba\x96',
'\xe1\xba\x96'), b'\xe1\xba\x96'),
# 3.13 Self-reverting case folding U+1F56 and normalization. # 3.13 Self-reverting case folding U+1F56 and normalization.
('\xe1\xbd\x96', (b'\xe1\xbd\x96',
'\xe1\xbd\x96'), b'\xe1\xbd\x96'),
# 3.14 ASCII space character U+0020. # 3.14 ASCII space character U+0020.
(' ', (b' ',
' '), b' '),
# 3.15 Non-ASCII 8bit space character U+00A0. # 3.15 Non-ASCII 8bit space character U+00A0.
('\xc2\xa0', (b'\xc2\xa0',
' '), b' '),
# 3.16 Non-ASCII multibyte space character U+1680. # 3.16 Non-ASCII multibyte space character U+1680.
('\xe1\x9a\x80', (b'\xe1\x9a\x80',
None), None),
# 3.17 Non-ASCII multibyte space character U+2000. # 3.17 Non-ASCII multibyte space character U+2000.
('\xe2\x80\x80', (b'\xe2\x80\x80',
' '), b' '),
# 3.18 Zero Width Space U+200b. # 3.18 Zero Width Space U+200b.
('\xe2\x80\x8b', (b'\xe2\x80\x8b',
''), b''),
# 3.19 Non-ASCII multibyte space character U+3000. # 3.19 Non-ASCII multibyte space character U+3000.
('\xe3\x80\x80', (b'\xe3\x80\x80',
' '), b' '),
# 3.20 ASCII control characters U+0010 U+007F. # 3.20 ASCII control characters U+0010 U+007F.
('\x10\x7f', (b'\x10\x7f',
'\x10\x7f'), b'\x10\x7f'),
# 3.21 Non-ASCII 8bit control character U+0085. # 3.21 Non-ASCII 8bit control character U+0085.
('\xc2\x85', (b'\xc2\x85',
None), None),
# 3.22 Non-ASCII multibyte control character U+180E. # 3.22 Non-ASCII multibyte control character U+180E.
('\xe1\xa0\x8e', (b'\xe1\xa0\x8e',
None), None),
# 3.23 Zero Width No-Break Space U+FEFF. # 3.23 Zero Width No-Break Space U+FEFF.
('\xef\xbb\xbf', (b'\xef\xbb\xbf',
''), b''),
# 3.24 Non-ASCII control character U+1D175. # 3.24 Non-ASCII control character U+1D175.
('\xf0\x9d\x85\xb5', (b'\xf0\x9d\x85\xb5',
None), None),
# 3.25 Plane 0 private use character U+F123. # 3.25 Plane 0 private use character U+F123.
('\xef\x84\xa3', (b'\xef\x84\xa3',
None), None),
# 3.26 Plane 15 private use character U+F1234. # 3.26 Plane 15 private use character U+F1234.
('\xf3\xb1\x88\xb4', (b'\xf3\xb1\x88\xb4',
None), None),
# 3.27 Plane 16 private use character U+10F234. # 3.27 Plane 16 private use character U+10F234.
('\xf4\x8f\x88\xb4', (b'\xf4\x8f\x88\xb4',
None), None),
# 3.28 Non-character code point U+8FFFE. # 3.28 Non-character code point U+8FFFE.
('\xf2\x8f\xbf\xbe', (b'\xf2\x8f\xbf\xbe',
None), None),
# 3.29 Non-character code point U+10FFFF. # 3.29 Non-character code point U+10FFFF.
('\xf4\x8f\xbf\xbf', (b'\xf4\x8f\xbf\xbf',
None), None),
# 3.30 Surrogate code U+DF42. # 3.30 Surrogate code U+DF42.
('\xed\xbd\x82', (b'\xed\xbd\x82',
None), None),
# 3.31 Non-plain text character U+FFFD. # 3.31 Non-plain text character U+FFFD.
('\xef\xbf\xbd', (b'\xef\xbf\xbd',
None), None),
# 3.32 Ideographic description character U+2FF5. # 3.32 Ideographic description character U+2FF5.
('\xe2\xbf\xb5', (b'\xe2\xbf\xb5',
None), None),
# 3.33 Display property character U+0341. # 3.33 Display property character U+0341.
('\xcd\x81', (b'\xcd\x81',
'\xcc\x81'), b'\xcc\x81'),
# 3.34 Left-to-right mark U+200E. # 3.34 Left-to-right mark U+200E.
('\xe2\x80\x8e', (b'\xe2\x80\x8e',
None), None),
# 3.35 Deprecated U+202A. # 3.35 Deprecated U+202A.
('\xe2\x80\xaa', (b'\xe2\x80\xaa',
None), None),
# 3.36 Language tagging character U+E0001. # 3.36 Language tagging character U+E0001.
('\xf3\xa0\x80\x81', (b'\xf3\xa0\x80\x81',
None), None),
# 3.37 Language tagging character U+E0042. # 3.37 Language tagging character U+E0042.
('\xf3\xa0\x81\x82', (b'\xf3\xa0\x81\x82',
None), None),
# 3.38 Bidi: RandALCat character U+05BE and LCat characters. # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
('foo\xd6\xbebar', (b'foo\xd6\xbebar',
None), None),
# 3.39 Bidi: RandALCat character U+FD50 and LCat characters. # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
('foo\xef\xb5\x90bar', (b'foo\xef\xb5\x90bar',
None), None),
# 3.40 Bidi: RandALCat character U+FB38 and LCat characters. # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
('foo\xef\xb9\xb6bar', (b'foo\xef\xb9\xb6bar',
'foo \xd9\x8ebar'), b'foo \xd9\x8ebar'),
# 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031. # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
('\xd8\xa71', (b'\xd8\xa71',
None), None),
# 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628. # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
('\xd8\xa71\xd8\xa8', (b'\xd8\xa71\xd8\xa8',
'\xd8\xa71\xd8\xa8'), b'\xd8\xa71\xd8\xa8'),
# 3.43 Unassigned code point U+E0002. # 3.43 Unassigned code point U+E0002.
# Skip this test as we allow unassigned # Skip this test as we allow unassigned
#('\xf3\xa0\x80\x82', #(b'\xf3\xa0\x80\x82',
# None), # None),
(None, None), (None, None),
# 3.44 Larger test (shrinking). # 3.44 Larger test (shrinking).
# Original test case reads \xc3\xdf # Original test case reads \xc3\xdf
('X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2' (b'X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
'\xaa\xce\xb0\xe2\x80\x80', b'\xaa\xce\xb0\xe2\x80\x80',
'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '), b'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
# 3.45 Larger test (expanding). # 3.45 Larger test (expanding).
# Original test case reads \xc3\x9f # Original test case reads \xc3\x9f
('X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c' (b'X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
'\x80', b'\x80',
'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3' b'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
'\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82' b'\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
'\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88') b'\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
] ]
...@@ -848,16 +849,16 @@ class NameprepTest(unittest.TestCase): ...@@ -848,16 +849,16 @@ class NameprepTest(unittest.TestCase):
class IDNACodecTest(unittest.TestCase): class IDNACodecTest(unittest.TestCase):
def test_builtin_decode(self): def test_builtin_decode(self):
self.assertEquals(str("python.org", "idna"), "python.org") self.assertEquals(str(b"python.org", "idna"), "python.org")
self.assertEquals(str("python.org.", "idna"), "python.org.") self.assertEquals(str(b"python.org.", "idna"), "python.org.")
self.assertEquals(str("xn--pythn-mua.org", "idna"), "pyth\xf6n.org") self.assertEquals(str(b"xn--pythn-mua.org", "idna"), "pyth\xf6n.org")
self.assertEquals(str("xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.") self.assertEquals(str(b"xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.")
def test_builtin_encode(self): def test_builtin_encode(self):
self.assertEquals("python.org".encode("idna"), "python.org") self.assertEquals("python.org".encode("idna"), b"python.org")
self.assertEquals("python.org.".encode("idna"), "python.org.") self.assertEquals("python.org.".encode("idna"), b"python.org.")
self.assertEquals("pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org") self.assertEquals("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
self.assertEquals("pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.") self.assertEquals("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
def test_stream(self): def test_stream(self):
r = codecs.getreader("idna")(io.BytesIO(b"abc")) r = codecs.getreader("idna")(io.BytesIO(b"abc"))
...@@ -866,61 +867,61 @@ class IDNACodecTest(unittest.TestCase): ...@@ -866,61 +867,61 @@ class IDNACodecTest(unittest.TestCase):
def test_incremental_decode(self): def test_incremental_decode(self):
self.assertEquals( self.assertEquals(
"".join(codecs.iterdecode("python.org", "idna")), "".join(codecs.iterdecode((bytes(chr(c)) for c in b"python.org"), "idna")),
"python.org" "python.org"
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterdecode("python.org.", "idna")), "".join(codecs.iterdecode((bytes(chr(c)) for c in b"python.org."), "idna")),
"python.org." "python.org."
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")), "".join(codecs.iterdecode((bytes(chr(c)) for c in b"xn--pythn-mua.org."), "idna")),
"pyth\xf6n.org." "pyth\xf6n.org."
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")), "".join(codecs.iterdecode((bytes(chr(c)) for c in b"xn--pythn-mua.org."), "idna")),
"pyth\xf6n.org." "pyth\xf6n.org."
) )
decoder = codecs.getincrementaldecoder("idna")() decoder = codecs.getincrementaldecoder("idna")()
self.assertEquals(decoder.decode("xn--xam", ), "") self.assertEquals(decoder.decode(b"xn--xam", ), "")
self.assertEquals(decoder.decode("ple-9ta.o", ), "\xe4xample.") self.assertEquals(decoder.decode(b"ple-9ta.o", ), "\xe4xample.")
self.assertEquals(decoder.decode("rg"), "") self.assertEquals(decoder.decode(b"rg"), "")
self.assertEquals(decoder.decode("", True), "org") self.assertEquals(decoder.decode(b"", True), "org")
decoder.reset() decoder.reset()
self.assertEquals(decoder.decode("xn--xam", ), "") self.assertEquals(decoder.decode(b"xn--xam", ), "")
self.assertEquals(decoder.decode("ple-9ta.o", ), "\xe4xample.") self.assertEquals(decoder.decode(b"ple-9ta.o", ), "\xe4xample.")
self.assertEquals(decoder.decode("rg."), "org.") self.assertEquals(decoder.decode(b"rg."), "org.")
self.assertEquals(decoder.decode("", True), "") self.assertEquals(decoder.decode(b"", True), "")
def test_incremental_encode(self): def test_incremental_encode(self):
self.assertEquals( self.assertEquals(
"".join(codecs.iterencode("python.org", "idna")), b"".join(codecs.iterencode("python.org", "idna")),
"python.org" b"python.org"
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterencode("python.org.", "idna")), b"".join(codecs.iterencode("python.org.", "idna")),
"python.org." b"python.org."
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterencode("pyth\xf6n.org.", "idna")), b"".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
"xn--pythn-mua.org." b"xn--pythn-mua.org."
) )
self.assertEquals( self.assertEquals(
"".join(codecs.iterencode("pyth\xf6n.org.", "idna")), b"".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
"xn--pythn-mua.org." b"xn--pythn-mua.org."
) )
encoder = codecs.getincrementalencoder("idna")() encoder = codecs.getincrementalencoder("idna")()
self.assertEquals(encoder.encode("\xe4x"), "") self.assertEquals(encoder.encode("\xe4x"), b"")
self.assertEquals(encoder.encode("ample.org"), "xn--xample-9ta.") self.assertEquals(encoder.encode("ample.org"), b"xn--xample-9ta.")
self.assertEquals(encoder.encode("", True), "org") self.assertEquals(encoder.encode("", True), b"org")
encoder.reset() encoder.reset()
self.assertEquals(encoder.encode("\xe4x"), "") self.assertEquals(encoder.encode("\xe4x"), b"")
self.assertEquals(encoder.encode("ample.org."), "xn--xample-9ta.org.") self.assertEquals(encoder.encode("ample.org."), b"xn--xample-9ta.org.")
self.assertEquals(encoder.encode("", True), "") self.assertEquals(encoder.encode("", True), b"")
class CodecsModuleTest(unittest.TestCase): class CodecsModuleTest(unittest.TestCase):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment