Commit 5d0a4df5 authored by Guido van Rossum's avatar Guido van Rossum

Merged revisions 59041-59055 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r59044 | neal.norwitz | 2007-11-18 17:46:20 -0800 (Sun, 18 Nov 2007) | 1 line

  Use a slightly more recent version than 1.5.2b2.
........
  r59047 | walter.doerwald | 2007-11-19 04:14:05 -0800 (Mon, 19 Nov 2007) | 2 lines

  Fix typo in comment.
........
  r59049 | walter.doerwald | 2007-11-19 04:41:10 -0800 (Mon, 19 Nov 2007) | 4 lines

  Fix for #1444: utf_8_sig.StreamReader was (indirectly through decode())
  calling codecs.utf_8_decode() with final==True, which falled with incomplete
  byte sequences. Fix and test by James G. Sack.
........
  r59051 | nick.coghlan | 2007-11-19 05:56:27 -0800 (Mon, 19 Nov 2007) | 1 line

  Enable some test_cmd_line_script debugging output to investigate failure on Mac OSX buildbot
........
  r59053 | facundo.batista | 2007-11-19 08:30:24 -0800 (Mon, 19 Nov 2007) | 3 lines


  Fixed detail in add_type() explanation (issue 1463).
........
  r59054 | guido.van.rossum | 2007-11-19 09:35:24 -0800 (Mon, 19 Nov 2007) | 2 lines

  Make this work stand-alone, too.
........
  r59055 | guido.van.rossum | 2007-11-19 09:50:22 -0800 (Mon, 19 Nov 2007) | 3 lines

  Fix the OSX failures in this test -- they were due to /tmp being a symlink
  to /private/tmp.  Adding a call to os.path.realpath() to temp_dir() fixed it.
........
parent 75cce6dd
...@@ -96,8 +96,8 @@ behavior of the module. ...@@ -96,8 +96,8 @@ behavior of the module.
extension is already known, the new type will replace the old one. When the type extension is already known, the new type will replace the old one. When the type
is already known the extension will be added to the list of known extensions. is already known the extension will be added to the list of known extensions.
When *strict* is the mapping will added to the official MIME types, otherwise to When *strict* is True (the default), the mapping will added to the official MIME
the non-standard ones. types, otherwise to the non-standard ones.
.. data:: inited .. data:: inited
......
...@@ -103,12 +103,18 @@ class StreamReader(codecs.StreamReader): ...@@ -103,12 +103,18 @@ class StreamReader(codecs.StreamReader):
pass pass
def decode(self, input, errors='strict'): def decode(self, input, errors='strict'):
if len(input) < 3 and codecs.BOM_UTF8.startswith(input): if len(input) < 3:
# not enough data to decide if this is a BOM if codecs.BOM_UTF8.startswith(input):
# => try again on the next call # not enough data to decide if this is a BOM
return ("", 0) # => try again on the next call
return ("", 0)
elif input[:3] == codecs.BOM_UTF8:
self.decode = codecs.utf_8_decode
(output, consumed) = codecs.utf_8_decode(input[3:],errors)
return (output, consumed+3)
# (else) no BOM present
self.decode = codecs.utf_8_decode self.decode = codecs.utf_8_decode
return decode(input, errors) return codecs.utf_8_decode(input, errors)
### encodings module API ### encodings module API
......
...@@ -29,6 +29,7 @@ def _run_python(*args): ...@@ -29,6 +29,7 @@ def _run_python(*args):
@contextlib.contextmanager @contextlib.contextmanager
def temp_dir(): def temp_dir():
dirname = tempfile.mkdtemp() dirname = tempfile.mkdtemp()
dirname = os.path.realpath(dirname)
try: try:
yield dirname yield dirname
finally: finally:
...@@ -82,7 +83,7 @@ def _make_test_zip(zip_dir, zip_basename, script_name): ...@@ -82,7 +83,7 @@ def _make_test_zip(zip_dir, zip_basename, script_name):
zip_file.close() zip_file.close()
# if verbose: # if verbose:
# zip_file = zipfile.ZipFile(zip_name, 'r') # zip_file = zipfile.ZipFile(zip_name, 'r')
# print "Contents of %r:" % zip_name # print("Contents of %r:" % zip_name)
# zip_file.printdir() # zip_file.printdir()
# zip_file.close() # zip_file.close()
return zip_name return zip_name
...@@ -90,9 +91,9 @@ def _make_test_zip(zip_dir, zip_basename, script_name): ...@@ -90,9 +91,9 @@ def _make_test_zip(zip_dir, zip_basename, script_name):
class CmdLineTest(unittest.TestCase): class CmdLineTest(unittest.TestCase):
def _check_script(self, script_name, expected_file, expected_argv0): def _check_script(self, script_name, expected_file, expected_argv0):
exit_code, data = _run_python(script_name) exit_code, data = _run_python(script_name)
# if verbose: if verbose:
# print "Output from test script %r:" % script_name print("Output from test script %r:" % script_name)
# print data print(data)
self.assertEqual(exit_code, 0, data) self.assertEqual(exit_code, 0, data)
printed_file = '__file__==%r' % expected_file printed_file = '__file__==%r' % expected_file
printed_argv0 = 'sys.argv[0]==%r' % expected_argv0 printed_argv0 = 'sys.argv[0]==%r' % expected_argv0
......
...@@ -59,7 +59,7 @@ class MixInCheckStateHandling: ...@@ -59,7 +59,7 @@ class MixInCheckStateHandling:
class ReadTest(unittest.TestCase, MixInCheckStateHandling): class ReadTest(unittest.TestCase, MixInCheckStateHandling):
def check_partial(self, input, partialresults): def check_partial(self, input, partialresults):
# get a StreamReader for the encoding and feed the bytestring version # get a StreamReader for the encoding and feed the bytestring version
# of input to the reader byte by byte. Read every available from # of input to the reader byte by byte. Read everything available from
# the StreamReader and check that the results equal the appropriate # the StreamReader and check that the results equal the appropriate
# entries from partialresults. # entries from partialresults.
q = Queue(b"") q = Queue(b"")
...@@ -618,10 +618,53 @@ class UTF8SigTest(ReadTest): ...@@ -618,10 +618,53 @@ class UTF8SigTest(ReadTest):
s = "spam" s = "spam"
self.assertEqual(d.decode(s.encode("utf-8-sig")), s) self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
def test_decoder_state(self): def test_stream_bom(self):
u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff" unistring = "ABC\u00A1\u2200XYZ"
self.check_state_handling_decode(self.encoding, bytestring = codecs.BOM_UTF8 + b"ABC\xC2\xA1\xE2\x88\x80XYZ"
u, u.encode(self.encoding))
reader = codecs.getreader("utf-8-sig")
for sizehint in [None] + list(range(1, 11)) + \
[64, 128, 256, 512, 1024]:
istream = reader(io.BytesIO(bytestring))
ostream = io.StringIO()
while 1:
if sizehint is not None:
data = istream.read(sizehint)
else:
data = istream.read()
if not data:
break
ostream.write(data)
got = ostream.getvalue()
self.assertEqual(got, unistring)
def test_stream_bare(self):
unistring = "ABC\u00A1\u2200XYZ"
bytestring = b"ABC\xC2\xA1\xE2\x88\x80XYZ"
reader = codecs.getreader("utf-8-sig")
for sizehint in [None] + list(range(1, 11)) + \
[64, 128, 256, 512, 1024]:
istream = reader(io.BytesIO(bytestring))
ostream = io.StringIO()
while 1:
if sizehint is not None:
data = istream.read(sizehint)
else:
data = istream.read()
if not data:
break
ostream.write(data)
got = ostream.getvalue()
self.assertEqual(got, unistring)
class EscapeDecodeTest(unittest.TestCase):
def test_empty(self):
self.assertEquals(codecs.escape_decode(""), ("", 0))
class RecodingTest(unittest.TestCase): class RecodingTest(unittest.TestCase):
def test_recoding(self): def test_recoding(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment