Commit b506dc32 authored by Lars Gustäbel's avatar Lars Gustäbel

Completed str/unicode unification.

All tests pass, but maybe some tests have become unnecessary now.
Removed PaxUnicodeTest, added MiscTest.

TarFile.extractfile() returns a binary file object which can be used
with a TextIOWrapper for text I/O.
parent cd869d8d
This diff is collapsed.
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import sys import sys
import os import os
import io
import shutil import shutil
import tempfile import tempfile
import StringIO import StringIO
...@@ -64,8 +65,8 @@ class UstarReadTest(ReadTest): ...@@ -64,8 +65,8 @@ class UstarReadTest(ReadTest):
def test_fileobj_readlines(self): def test_fileobj_readlines(self):
self.tar.extract("ustar/regtype", TEMPDIR) self.tar.extract("ustar/regtype", TEMPDIR)
tarinfo = self.tar.getmember("ustar/regtype") tarinfo = self.tar.getmember("ustar/regtype")
fobj1 = open(os.path.join(TEMPDIR, "ustar/regtype"), "rU") fobj1 = open(os.path.join(TEMPDIR, "ustar/regtype"), "r")
fobj2 = self.tar.extractfile(tarinfo) fobj2 = io.TextIOWrapper(self.tar.extractfile(tarinfo))
lines1 = fobj1.readlines() lines1 = fobj1.readlines()
lines2 = fobj2.readlines() lines2 = fobj2.readlines()
...@@ -83,7 +84,7 @@ class UstarReadTest(ReadTest): ...@@ -83,7 +84,7 @@ class UstarReadTest(ReadTest):
fobj1 = open(os.path.join(TEMPDIR, "ustar/regtype"), "rU") fobj1 = open(os.path.join(TEMPDIR, "ustar/regtype"), "rU")
fobj2 = self.tar.extractfile(tarinfo) fobj2 = self.tar.extractfile(tarinfo)
lines1 = fobj1.readlines() lines1 = fobj1.readlines()
lines2 = [line for line in fobj2] lines2 = list(io.TextIOWrapper(fobj2))
self.assert_(lines1 == lines2, self.assert_(lines1 == lines2,
"fileobj.__iter__() failed") "fileobj.__iter__() failed")
...@@ -115,11 +116,11 @@ class UstarReadTest(ReadTest): ...@@ -115,11 +116,11 @@ class UstarReadTest(ReadTest):
fobj.seek(0, 2) fobj.seek(0, 2)
self.assertEqual(tarinfo.size, fobj.tell(), self.assertEqual(tarinfo.size, fobj.tell(),
"seek() to file's end failed") "seek() to file's end failed")
self.assert_(fobj.read() == "", self.assert_(fobj.read() == b"",
"read() at file's end did not return empty string") "read() at file's end did not return empty string")
fobj.seek(-tarinfo.size, 2) fobj.seek(-tarinfo.size, 2)
self.assertEqual(0, fobj.tell(), self.assertEqual(0, fobj.tell(),
"relative seek() to file's start failed") "relative seek() to file's end failed")
fobj.seek(512) fobj.seek(512)
s1 = fobj.readlines() s1 = fobj.readlines()
fobj.seek(512) fobj.seek(512)
...@@ -245,13 +246,13 @@ class DetectReadTest(unittest.TestCase): ...@@ -245,13 +246,13 @@ class DetectReadTest(unittest.TestCase):
def _testfunc_file(self, name, mode): def _testfunc_file(self, name, mode):
try: try:
tarfile.open(name, mode) tarfile.open(name, mode)
except tarfile.ReadError: except tarfile.ReadError as e:
self.fail() self.fail()
def _testfunc_fileobj(self, name, mode): def _testfunc_fileobj(self, name, mode):
try: try:
tarfile.open(name, mode, fileobj=open(name, "rb")) tarfile.open(name, mode, fileobj=open(name, "rb"))
except tarfile.ReadError: except tarfile.ReadError as e:
self.fail() self.fail()
def _test_modes(self, testfunc): def _test_modes(self, testfunc):
...@@ -393,7 +394,7 @@ class LongnameTest(ReadTest): ...@@ -393,7 +394,7 @@ class LongnameTest(ReadTest):
tarinfo = self.tar.getmember(longname) tarinfo = self.tar.getmember(longname)
offset = tarinfo.offset offset = tarinfo.offset
self.tar.fileobj.seek(offset) self.tar.fileobj.seek(offset)
fobj = StringIO.StringIO(self.tar.fileobj.read(3 * 512)) fobj = io.BytesIO(self.tar.fileobj.read(3 * 512))
self.assertRaises(tarfile.ReadError, tarfile.open, name="foo.tar", fileobj=fobj) self.assertRaises(tarfile.ReadError, tarfile.open, name="foo.tar", fileobj=fobj)
def test_header_offset(self): def test_header_offset(self):
...@@ -401,9 +402,9 @@ class LongnameTest(ReadTest): ...@@ -401,9 +402,9 @@ class LongnameTest(ReadTest):
# the preceding extended header. # the preceding extended header.
longname = self.subdir + "/" + "123/" * 125 + "longname" longname = self.subdir + "/" + "123/" * 125 + "longname"
offset = self.tar.getmember(longname).offset offset = self.tar.getmember(longname).offset
fobj = open(tarname) fobj = open(tarname, "rb")
fobj.seek(offset) fobj.seek(offset)
tarinfo = tarfile.TarInfo.frombuf(fobj.read(512)) tarinfo = tarfile.TarInfo.frombuf(fobj.read(512), "iso8859-1", "strict")
self.assertEqual(tarinfo.type, self.longnametype) self.assertEqual(tarinfo.type, self.longnametype)
...@@ -764,10 +765,10 @@ class PaxWriteTest(GNUWriteTest): ...@@ -764,10 +765,10 @@ class PaxWriteTest(GNUWriteTest):
self.assertEqual(tar.pax_headers, pax_headers) self.assertEqual(tar.pax_headers, pax_headers)
self.assertEqual(tar.getmembers()[0].pax_headers, pax_headers) self.assertEqual(tar.getmembers()[0].pax_headers, pax_headers)
# Test if all the fields are unicode. # Test if all the fields are strings.
for key, val in tar.pax_headers.items(): for key, val in tar.pax_headers.items():
self.assert_(type(key) is unicode) self.assert_(type(key) is not bytes)
self.assert_(type(val) is unicode) self.assert_(type(val) is not bytes)
if key in tarfile.PAX_NUMBER_FIELDS: if key in tarfile.PAX_NUMBER_FIELDS:
try: try:
tarfile.PAX_NUMBER_FIELDS[key](val) tarfile.PAX_NUMBER_FIELDS[key](val)
...@@ -815,20 +816,14 @@ class UstarUnicodeTest(unittest.TestCase): ...@@ -815,20 +816,14 @@ class UstarUnicodeTest(unittest.TestCase):
tar.close() tar.close()
tar = tarfile.open(tmpname, encoding=encoding) tar = tarfile.open(tmpname, encoding=encoding)
self.assert_(type(tar.getnames()[0]) is not unicode) self.assert_(type(tar.getnames()[0]) is not bytes)
self.assertEqual(tar.getmembers()[0].name, name.encode(encoding)) self.assertEqual(tar.getmembers()[0].name, name)
tar.close() tar.close()
def test_unicode_filename_error(self): def test_unicode_filename_error(self):
tar = tarfile.open(tmpname, "w", format=self.format, encoding="ascii", errors="strict") tar = tarfile.open(tmpname, "w", format=self.format, encoding="ascii", errors="strict")
tarinfo = tarfile.TarInfo() tarinfo = tarfile.TarInfo()
tarinfo.name = ""
if self.format == tarfile.PAX_FORMAT:
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
else:
tar.addfile(tarinfo)
tarinfo.name = "" tarinfo.name = ""
self.assertRaises(UnicodeError, tar.addfile, tarinfo) self.assertRaises(UnicodeError, tar.addfile, tarinfo)
...@@ -851,7 +846,7 @@ class UstarUnicodeTest(unittest.TestCase): ...@@ -851,7 +846,7 @@ class UstarUnicodeTest(unittest.TestCase):
t.uname = name t.uname = name
t.gname = name t.gname = name
fobj = StringIO.StringIO() fobj = io.BytesIO()
tar = tarfile.open("foo.tar", mode="w", fileobj=fobj, format=self.format, encoding="iso8859-1") tar = tarfile.open("foo.tar", mode="w", fileobj=fobj, format=self.format, encoding="iso8859-1")
tar.addfile(t) tar.addfile(t)
tar.close() tar.close()
...@@ -862,46 +857,12 @@ class UstarUnicodeTest(unittest.TestCase): ...@@ -862,46 +857,12 @@ class UstarUnicodeTest(unittest.TestCase):
self.assertEqual(t.uname, "") self.assertEqual(t.uname, "")
self.assertEqual(t.gname, "") self.assertEqual(t.gname, "")
class GNUUnicodeTest(UstarUnicodeTest): class GNUUnicodeTest(UstarUnicodeTest):
format = tarfile.GNU_FORMAT format = tarfile.GNU_FORMAT
class PaxUnicodeTest(UstarUnicodeTest):
format = tarfile.PAX_FORMAT
def _create_unicode_name(self, name):
tar = tarfile.open(tmpname, "w", format=self.format)
t = tarfile.TarInfo()
t.pax_headers["path"] = name
tar.addfile(t)
tar.close()
def test_error_handlers(self):
# Test if the unicode error handlers work correctly for characters
# that cannot be expressed in a given encoding.
self._create_unicode_name("")
for handler, name in (("utf-8", "".encode("utf8")),
("replace", "???"), ("ignore", "")):
tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
errors=handler)
self.assertEqual(tar.getnames()[0], name)
self.assertRaises(UnicodeError, tarfile.open, tmpname,
encoding="ascii", errors="strict")
def test_error_handler_utf8(self):
# Create a pathname that has one component representable using
# iso8859-1 and the other only in iso8859-15.
self._create_unicode_name("/")
tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
errors="utf-8")
self.assertEqual(tar.getnames()[0], "/" + "".encode("utf8"))
class AppendTest(unittest.TestCase): class AppendTest(unittest.TestCase):
# Test append mode (cp. patch #1652681). # Test append mode (cp. patch #1652681).
...@@ -1028,6 +989,19 @@ class LimitsTest(unittest.TestCase): ...@@ -1028,6 +989,19 @@ class LimitsTest(unittest.TestCase):
tarinfo.tobuf(tarfile.PAX_FORMAT) tarinfo.tobuf(tarfile.PAX_FORMAT)
class MiscTest(unittest.TestCase):
def test_char_fields(self):
self.assertEqual(tarfile.stn("foo", 8, "ascii", "strict"), b"foo\0\0\0\0\0")
self.assertEqual(tarfile.stn("foobar", 3, "ascii", "strict"), b"foo")
self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"), "foo")
self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"), "foo")
def test_number_fields(self):
self.assertEqual(tarfile.itn(1), b"0000001\x00")
self.assertEqual(tarfile.itn(0xffffffff), b"\x80\x00\x00\x00\xff\xff\xff\xff")
class GzipMiscReadTest(MiscReadTest): class GzipMiscReadTest(MiscReadTest):
tarname = gzipname tarname = gzipname
mode = "r:gz" mode = "r:gz"
...@@ -1075,9 +1049,9 @@ def test_main(): ...@@ -1075,9 +1049,9 @@ def test_main():
PaxWriteTest, PaxWriteTest,
UstarUnicodeTest, UstarUnicodeTest,
GNUUnicodeTest, GNUUnicodeTest,
PaxUnicodeTest,
AppendTest, AppendTest,
LimitsTest, LimitsTest,
MiscTest,
] ]
if hasattr(os, "link"): if hasattr(os, "link"):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment