Commit 78be7df9 authored by Martin v. Löwis's avatar Martin v. Löwis

Patch #918101: Add tarfile open mode r|* for auto-detection of the

stream compression; add, for symmetry reasons, r:* as a synonym of r.
parent 409d8f2e
...@@ -32,7 +32,7 @@ Some facts and figures: ...@@ -32,7 +32,7 @@ Some facts and figures:
it defaults to \code{'r'}. Here is a full list of mode combinations: it defaults to \code{'r'}. Here is a full list of mode combinations:
\begin{tableii}{c|l}{code}{mode}{action} \begin{tableii}{c|l}{code}{mode}{action}
\lineii{'r'}{Open for reading with transparent compression (recommended).} \lineii{'r' or 'r:*'}{Open for reading with transparent compression (recommended).}
\lineii{'r:'}{Open for reading exclusively without compression.} \lineii{'r:'}{Open for reading exclusively without compression.}
\lineii{'r:gz'}{Open for reading with gzip compression.} \lineii{'r:gz'}{Open for reading with gzip compression.}
\lineii{'r:bz2'}{Open for reading with bzip2 compression.} \lineii{'r:bz2'}{Open for reading with bzip2 compression.}
...@@ -65,6 +65,7 @@ Some facts and figures: ...@@ -65,6 +65,7 @@ Some facts and figures:
(section~\ref{tar-examples}). The currently possible modes: (section~\ref{tar-examples}). The currently possible modes:
\begin{tableii}{c|l}{code}{Mode}{Action} \begin{tableii}{c|l}{code}{Mode}{Action}
\lineii{'r|*'}{Open a \emph{stream} of tar blocks for reading with transparent compression.}
\lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.} \lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
\lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.} \lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
\lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.} \lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}
......
...@@ -274,7 +274,7 @@ class _Stream: ...@@ -274,7 +274,7 @@ class _Stream:
_Stream is intended to be used only internally. _Stream is intended to be used only internally.
""" """
def __init__(self, name, mode, type, fileobj, bufsize): def __init__(self, name, mode, comptype, fileobj, bufsize):
"""Construct a _Stream object. """Construct a _Stream object.
""" """
self._extfileobj = True self._extfileobj = True
...@@ -282,16 +282,22 @@ class _Stream: ...@@ -282,16 +282,22 @@ class _Stream:
fileobj = _LowLevelFile(name, mode) fileobj = _LowLevelFile(name, mode)
self._extfileobj = False self._extfileobj = False
self.name = name or "" if comptype == '*':
self.mode = mode # Enable transparent compression detection for the
self.type = type # stream interface
self.fileobj = fileobj fileobj = _StreamProxy(fileobj)
self.bufsize = bufsize comptype = fileobj.getcomptype()
self.buf = ""
self.pos = 0L self.name = name or ""
self.closed = False self.mode = mode
self.comptype = comptype
if type == "gz": self.fileobj = fileobj
self.bufsize = bufsize
self.buf = ""
self.pos = 0L
self.closed = False
if comptype == "gz":
try: try:
import zlib import zlib
except ImportError: except ImportError:
...@@ -303,7 +309,7 @@ class _Stream: ...@@ -303,7 +309,7 @@ class _Stream:
else: else:
self._init_write_gz() self._init_write_gz()
if type == "bz2": if comptype == "bz2":
try: try:
import bz2 import bz2
except ImportError: except ImportError:
...@@ -315,7 +321,7 @@ class _Stream: ...@@ -315,7 +321,7 @@ class _Stream:
self.cmp = bz2.BZ2Compressor() self.cmp = bz2.BZ2Compressor()
def __del__(self): def __del__(self):
if not self.closed: if hasattr(self, "closed") and not self.closed:
self.close() self.close()
def _init_write_gz(self): def _init_write_gz(self):
...@@ -334,10 +340,10 @@ class _Stream: ...@@ -334,10 +340,10 @@ class _Stream:
def write(self, s): def write(self, s):
"""Write string s to the stream. """Write string s to the stream.
""" """
if self.type == "gz": if self.comptype == "gz":
self.crc = self.zlib.crc32(s, self.crc) self.crc = self.zlib.crc32(s, self.crc)
self.pos += len(s) self.pos += len(s)
if self.type != "tar": if self.comptype != "tar":
s = self.cmp.compress(s) s = self.cmp.compress(s)
self.__write(s) self.__write(s)
...@@ -357,12 +363,16 @@ class _Stream: ...@@ -357,12 +363,16 @@ class _Stream:
if self.closed: if self.closed:
return return
if self.mode == "w" and self.type != "tar": if self.mode == "w" and self.comptype != "tar":
self.buf += self.cmp.flush() self.buf += self.cmp.flush()
if self.mode == "w" and self.buf: if self.mode == "w" and self.buf:
blocks, remainder = divmod(len(self.buf), self.bufsize)
if remainder > 0:
self.buf += NUL * (self.bufsize - remainder)
self.fileobj.write(self.buf) self.fileobj.write(self.buf)
self.buf = "" self.buf = ""
if self.type == "gz": if self.comptype == "gz":
self.fileobj.write(struct.pack("<l", self.crc)) self.fileobj.write(struct.pack("<l", self.crc))
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL)) self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
...@@ -441,7 +451,7 @@ class _Stream: ...@@ -441,7 +451,7 @@ class _Stream:
def _read(self, size): def _read(self, size):
"""Return size bytes from the stream. """Return size bytes from the stream.
""" """
if self.type == "tar": if self.comptype == "tar":
return self.__read(size) return self.__read(size)
c = len(self.dbuf) c = len(self.dbuf)
...@@ -474,6 +484,30 @@ class _Stream: ...@@ -474,6 +484,30 @@ class _Stream:
return t[:size] return t[:size]
# class _Stream # class _Stream
class _StreamProxy(object):
"""Small proxy class that enables transparent compression
detection for the Stream interface (mode 'r|*').
"""
def __init__(self, fileobj):
self.fileobj = fileobj
self.buf = self.fileobj.read(BLOCKSIZE)
def read(self, size):
self.read = self.fileobj.read
return self.buf
def getcomptype(self):
if self.buf.startswith("\037\213\010"):
return "gz"
if self.buf.startswith("BZh91"):
return "bz2"
return "tar"
def close(self):
self.fileobj.close()
# class StreamProxy
#------------------------ #------------------------
# Extraction file object # Extraction file object
#------------------------ #------------------------
...@@ -879,7 +913,7 @@ class TarFile(object): ...@@ -879,7 +913,7 @@ class TarFile(object):
an appropriate TarFile class. an appropriate TarFile class.
mode: mode:
'r' open for reading with transparent compression 'r' or 'r:*' open for reading with transparent compression
'r:' open for reading exclusively uncompressed 'r:' open for reading exclusively uncompressed
'r:gz' open for reading with gzip compression 'r:gz' open for reading with gzip compression
'r:bz2' open for reading with bzip2 compression 'r:bz2' open for reading with bzip2 compression
...@@ -887,6 +921,8 @@ class TarFile(object): ...@@ -887,6 +921,8 @@ class TarFile(object):
'w' or 'w:' open for writing without compression 'w' or 'w:' open for writing without compression
'w:gz' open for writing with gzip compression 'w:gz' open for writing with gzip compression
'w:bz2' open for writing with bzip2 compression 'w:bz2' open for writing with bzip2 compression
'r|*' open a stream of tar blocks with transparent compression
'r|' open an uncompressed stream of tar blocks for reading 'r|' open an uncompressed stream of tar blocks for reading
'r|gz' open a gzip compressed stream of tar blocks 'r|gz' open a gzip compressed stream of tar blocks
'r|bz2' open a bzip2 compressed stream of tar blocks 'r|bz2' open a bzip2 compressed stream of tar blocks
...@@ -898,7 +934,17 @@ class TarFile(object): ...@@ -898,7 +934,17 @@ class TarFile(object):
if not name and not fileobj: if not name and not fileobj:
raise ValueError, "nothing to open" raise ValueError, "nothing to open"
if ":" in mode: if mode in ("r", "r:*"):
# Find out which *open() is appropriate for opening the file.
for comptype in cls.OPEN_METH:
func = getattr(cls, cls.OPEN_METH[comptype])
try:
return func(name, "r", fileobj)
except (ReadError, CompressionError):
continue
raise ReadError, "file could not be opened successfully"
elif ":" in mode:
filemode, comptype = mode.split(":", 1) filemode, comptype = mode.split(":", 1)
filemode = filemode or "r" filemode = filemode or "r"
comptype = comptype or "tar" comptype = comptype or "tar"
...@@ -924,16 +970,6 @@ class TarFile(object): ...@@ -924,16 +970,6 @@ class TarFile(object):
t._extfileobj = False t._extfileobj = False
return t return t
elif mode == "r":
# Find out which *open() is appropriate for opening the file.
for comptype in cls.OPEN_METH:
func = getattr(cls, cls.OPEN_METH[comptype])
try:
return func(name, "r", fileobj)
except (ReadError, CompressionError):
continue
raise ReadError, "file could not be opened successfully"
elif mode in "aw": elif mode in "aw":
return cls.taropen(name, mode, fileobj) return cls.taropen(name, mode, fileobj)
......
...@@ -181,6 +181,18 @@ class ReadStreamTest(ReadTest): ...@@ -181,6 +181,18 @@ class ReadStreamTest(ReadTest):
stream.close() stream.close()
class ReadAsteriskTest(ReadTest):
def setUp(self):
mode = self.mode + self.sep + "*"
self.tar = tarfile.open(tarname(self.comp), mode)
class ReadStreamAsteriskTest(ReadStreamTest):
def setUp(self):
mode = self.mode + self.sep + "*"
self.tar = tarfile.open(tarname(self.comp), mode)
class WriteTest(BaseTest): class WriteTest(BaseTest):
mode = 'w' mode = 'w'
...@@ -336,6 +348,11 @@ class WriteTestGzip(WriteTest): ...@@ -336,6 +348,11 @@ class WriteTestGzip(WriteTest):
comp = "gz" comp = "gz"
class WriteStreamTestGzip(WriteStreamTest): class WriteStreamTestGzip(WriteStreamTest):
comp = "gz" comp = "gz"
class ReadAsteriskTestGzip(ReadAsteriskTest):
comp = "gz"
class ReadStreamAsteriskTestGzip(ReadStreamAsteriskTest):
comp = "gz"
# Filemode test cases # Filemode test cases
...@@ -355,6 +372,10 @@ if bz2: ...@@ -355,6 +372,10 @@ if bz2:
comp = "bz2" comp = "bz2"
class WriteStreamTestBzip2(WriteStreamTestGzip): class WriteStreamTestBzip2(WriteStreamTestGzip):
comp = "bz2" comp = "bz2"
class ReadAsteriskTestBzip2(ReadAsteriskTest):
comp = "bz2"
class ReadStreamAsteriskTestBzip2(ReadStreamAsteriskTest):
comp = "bz2"
# If importing gzip failed, discard the Gzip TestCases. # If importing gzip failed, discard the Gzip TestCases.
if not gzip: if not gzip:
...@@ -375,6 +396,8 @@ def test_main(): ...@@ -375,6 +396,8 @@ def test_main():
FileModeTest, FileModeTest,
ReadTest, ReadTest,
ReadStreamTest, ReadStreamTest,
ReadAsteriskTest,
ReadStreamAsteriskTest,
WriteTest, WriteTest,
WriteStreamTest, WriteStreamTest,
WriteGNULongTest, WriteGNULongTest,
...@@ -386,13 +409,15 @@ def test_main(): ...@@ -386,13 +409,15 @@ def test_main():
if gzip: if gzip:
tests.extend([ tests.extend([
ReadTestGzip, ReadStreamTestGzip, ReadTestGzip, ReadStreamTestGzip,
WriteTestGzip, WriteStreamTestGzip WriteTestGzip, WriteStreamTestGzip,
ReadAsteriskTestGzip, ReadStreamAsteriskTestGzip
]) ])
if bz2: if bz2:
tests.extend([ tests.extend([
ReadTestBzip2, ReadStreamTestBzip2, ReadTestBzip2, ReadStreamTestBzip2,
WriteTestBzip2, WriteStreamTestBzip2 WriteTestBzip2, WriteStreamTestBzip2,
ReadAsteriskTestBzip2, ReadStreamAsteriskTestBzip2
]) ])
try: try:
test_support.run_unittest(*tests) test_support.run_unittest(*tests)
......
...@@ -78,6 +78,9 @@ Extension Modules ...@@ -78,6 +78,9 @@ Extension Modules
Library Library
------- -------
- Patch #918101: Add tarfile open mode r|* for auto-detection of the
stream compression; add, for symmetry reasons, r:* as a synonym of r.
- Patch #1043890: Add extractall method to tarfile. - Patch #1043890: Add extractall method to tarfile.
- Patch #1075887: Don't require MSVC in distutils if there is nothing - Patch #1075887: Don't require MSVC in distutils if there is nothing
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment