Commit b3bd624a authored by Georg Brandl's avatar Georg Brandl

Back out patch for #1159051, which caused backwards compatibility problems.

parent 64949fa2
...@@ -65,6 +65,9 @@ def write32u(output, value): ...@@ -65,6 +65,9 @@ def write32u(output, value):
# or unsigned. # or unsigned.
output.write(struct.pack("<L", value)) output.write(struct.pack("<L", value))
def read32(input):
return struct.unpack("<I", input.read(4))[0]
class _PaddedFile: class _PaddedFile:
"""Minimal read-only file object that prepends a string to the contents """Minimal read-only file object that prepends a string to the contents
of an actual file. Shouldn't be used outside of gzip.py, as it lacks of an actual file. Shouldn't be used outside of gzip.py, as it lacks
...@@ -278,32 +281,28 @@ class GzipFile(io.BufferedIOBase): ...@@ -278,32 +281,28 @@ class GzipFile(io.BufferedIOBase):
self.crc = zlib.crc32(b"") & 0xffffffff self.crc = zlib.crc32(b"") & 0xffffffff
self.size = 0 self.size = 0
def _read_exact(self, n):
data = self.fileobj.read(n)
while len(data) < n:
b = self.fileobj.read(n - len(data))
if not b:
raise EOFError("Compressed file ended before the "
"end-of-stream marker was reached")
data += b
return data
def _read_gzip_header(self): def _read_gzip_header(self):
magic = self.fileobj.read(2) magic = self.fileobj.read(2)
if magic == b'': if magic == b'':
return False raise EOFError("Reached EOF")
if magic != b'\037\213': if magic != b'\037\213':
raise IOError('Not a gzipped file') raise IOError('Not a gzipped file')
method, flag, self.mtime = struct.unpack("<BBIxx", self._read_exact(8)) method = ord( self.fileobj.read(1) )
if method != 8: if method != 8:
raise IOError('Unknown compression method') raise IOError('Unknown compression method')
flag = ord( self.fileobj.read(1) )
self.mtime = read32(self.fileobj)
# extraflag = self.fileobj.read(1)
# os = self.fileobj.read(1)
self.fileobj.read(2)
if flag & FEXTRA: if flag & FEXTRA:
# Read & discard the extra field, if present # Read & discard the extra field, if present
extra_len, = struct.unpack("<H", self._read_exact(2)) xlen = ord(self.fileobj.read(1))
self._read_exact(extra_len) xlen = xlen + 256*ord(self.fileobj.read(1))
self.fileobj.read(xlen)
if flag & FNAME: if flag & FNAME:
# Read and discard a null-terminated string containing the filename # Read and discard a null-terminated string containing the filename
while True: while True:
...@@ -317,13 +316,12 @@ class GzipFile(io.BufferedIOBase): ...@@ -317,13 +316,12 @@ class GzipFile(io.BufferedIOBase):
if not s or s==b'\000': if not s or s==b'\000':
break break
if flag & FHCRC: if flag & FHCRC:
self._read_exact(2) # Read & discard the 16-bit header CRC self.fileobj.read(2) # Read & discard the 16-bit header CRC
unused = self.fileobj.unused() unused = self.fileobj.unused()
if unused: if unused:
uncompress = self.decompress.decompress(unused) uncompress = self.decompress.decompress(unused)
self._add_read_data(uncompress) self._add_read_data(uncompress)
return True
def write(self,data): def write(self,data):
self._check_closed() self._check_closed()
...@@ -357,16 +355,20 @@ class GzipFile(io.BufferedIOBase): ...@@ -357,16 +355,20 @@ class GzipFile(io.BufferedIOBase):
readsize = 1024 readsize = 1024
if size < 0: # get the whole thing if size < 0: # get the whole thing
while self._read(readsize): try:
readsize = min(self.max_read_chunk, readsize * 2) while True:
size = self.extrasize self._read(readsize)
readsize = min(self.max_read_chunk, readsize * 2)
except EOFError:
size = self.extrasize
else: # just get some more of it else: # just get some more of it
while size > self.extrasize: try:
if not self._read(readsize): while size > self.extrasize:
if size > self.extrasize: self._read(readsize)
size = self.extrasize readsize = min(self.max_read_chunk, readsize * 2)
break except EOFError:
readsize = min(self.max_read_chunk, readsize * 2) if size > self.extrasize:
size = self.extrasize
offset = self.offset - self.extrastart offset = self.offset - self.extrastart
chunk = self.extrabuf[offset: offset + size] chunk = self.extrabuf[offset: offset + size]
...@@ -384,9 +386,12 @@ class GzipFile(io.BufferedIOBase): ...@@ -384,9 +386,12 @@ class GzipFile(io.BufferedIOBase):
if self.extrasize <= 0 and self.fileobj is None: if self.extrasize <= 0 and self.fileobj is None:
return b'' return b''
# For certain input data, a single call to _read() may not return try:
# any data. In this case, retry until we get some data or reach EOF. # For certain input data, a single call to _read() may not return
while self.extrasize <= 0 and self._read(): # any data. In this case, retry until we get some data or reach EOF.
while self.extrasize <= 0:
self._read()
except EOFError:
pass pass
if size < 0 or size > self.extrasize: if size < 0 or size > self.extrasize:
size = self.extrasize size = self.extrasize
...@@ -409,9 +414,12 @@ class GzipFile(io.BufferedIOBase): ...@@ -409,9 +414,12 @@ class GzipFile(io.BufferedIOBase):
if self.extrasize == 0: if self.extrasize == 0:
if self.fileobj is None: if self.fileobj is None:
return b'' return b''
# Ensure that we don't return b"" if we haven't reached EOF. try:
# 1024 is the same buffering heuristic used in read() # Ensure that we don't return b"" if we haven't reached EOF.
while self.extrasize == 0 and self._read(max(n, 1024)): while self.extrasize == 0:
# 1024 is the same buffering heuristic used in read()
self._read(max(n, 1024))
except EOFError:
pass pass
offset = self.offset - self.extrastart offset = self.offset - self.extrastart
remaining = self.extrasize remaining = self.extrasize
...@@ -424,14 +432,13 @@ class GzipFile(io.BufferedIOBase): ...@@ -424,14 +432,13 @@ class GzipFile(io.BufferedIOBase):
def _read(self, size=1024): def _read(self, size=1024):
if self.fileobj is None: if self.fileobj is None:
return False raise EOFError("Reached EOF")
if self._new_member: if self._new_member:
# If the _new_member flag is set, we have to # If the _new_member flag is set, we have to
# jump to the next member, if there is one. # jump to the next member, if there is one.
self._init_read() self._init_read()
if not self._read_gzip_header(): self._read_gzip_header()
return False
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
self._new_member = False self._new_member = False
...@@ -448,7 +455,7 @@ class GzipFile(io.BufferedIOBase): ...@@ -448,7 +455,7 @@ class GzipFile(io.BufferedIOBase):
self.fileobj.prepend(self.decompress.unused_data, True) self.fileobj.prepend(self.decompress.unused_data, True)
self._read_eof() self._read_eof()
self._add_read_data( uncompress ) self._add_read_data( uncompress )
return False raise EOFError('Reached EOF')
uncompress = self.decompress.decompress(buf) uncompress = self.decompress.decompress(buf)
self._add_read_data( uncompress ) self._add_read_data( uncompress )
...@@ -464,7 +471,6 @@ class GzipFile(io.BufferedIOBase): ...@@ -464,7 +471,6 @@ class GzipFile(io.BufferedIOBase):
# a new member on the next call # a new member on the next call
self._read_eof() self._read_eof()
self._new_member = True self._new_member = True
return True
def _add_read_data(self, data): def _add_read_data(self, data):
self.crc = zlib.crc32(data, self.crc) & 0xffffffff self.crc = zlib.crc32(data, self.crc) & 0xffffffff
...@@ -479,7 +485,8 @@ class GzipFile(io.BufferedIOBase): ...@@ -479,7 +485,8 @@ class GzipFile(io.BufferedIOBase):
# We check the that the computed CRC and size of the # We check the that the computed CRC and size of the
# uncompressed data matches the stored values. Note that the size # uncompressed data matches the stored values. Note that the size
# stored is the true file size mod 2**32. # stored is the true file size mod 2**32.
crc32, isize = struct.unpack("<II", self._read_exact(8)) crc32 = read32(self.fileobj)
isize = read32(self.fileobj) # may exceed 2GB
if crc32 != self.crc: if crc32 != self.crc:
raise IOError("CRC check failed %s != %s" % (hex(crc32), raise IOError("CRC check failed %s != %s" % (hex(crc32),
hex(self.crc))) hex(self.crc)))
......
...@@ -577,20 +577,6 @@ class BZ2FileTest(BaseTest): ...@@ -577,20 +577,6 @@ class BZ2FileTest(BaseTest):
bz2f.seek(-150, 1) bz2f.seek(-150, 1)
self.assertEqual(bz2f.read(), self.TEXT[500-150:]) self.assertEqual(bz2f.read(), self.TEXT[500-150:])
def test_read_truncated(self):
# Drop the eos_magic field (6 bytes) and CRC (4 bytes).
truncated = self.DATA[:-10]
with BZ2File(BytesIO(truncated)) as f:
self.assertRaises(EOFError, f.read)
with BZ2File(BytesIO(truncated)) as f:
self.assertEqual(f.read(len(self.TEXT)), self.TEXT)
self.assertRaises(EOFError, f.read, 1)
# Incomplete 4-byte file header, and block header of at least 146 bits.
for i in range(22):
with BZ2File(BytesIO(truncated[:i])) as f:
self.assertRaises(EOFError, f.read, 1)
class BZ2CompressorTest(BaseTest): class BZ2CompressorTest(BaseTest):
def testCompress(self): def testCompress(self):
bz2c = BZ2Compressor() bz2c = BZ2Compressor()
......
...@@ -389,20 +389,6 @@ class TestGzip(BaseTest): ...@@ -389,20 +389,6 @@ class TestGzip(BaseTest):
datac = gzip.compress(data) datac = gzip.compress(data)
self.assertEqual(gzip.decompress(datac), data) self.assertEqual(gzip.decompress(datac), data)
def test_read_truncated(self):
data = data1*50
# Drop the CRC (4 bytes) and file size (4 bytes).
truncated = gzip.compress(data)[:-8]
with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
self.assertRaises(EOFError, f.read)
with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
self.assertEqual(f.read(len(data)), data)
self.assertRaises(EOFError, f.read, 1)
# Incomplete 10-byte header.
for i in range(2, 10):
with gzip.GzipFile(fileobj=io.BytesIO(truncated[:i])) as f:
self.assertRaises(EOFError, f.read, 1)
def test_read_with_extra(self): def test_read_with_extra(self):
# Gzip data with an extra field # Gzip data with an extra field
gzdata = (b'\x1f\x8b\x08\x04\xb2\x17cQ\x02\xff' gzdata = (b'\x1f\x8b\x08\x04\xb2\x17cQ\x02\xff'
......
...@@ -669,20 +669,6 @@ class FileTestCase(unittest.TestCase): ...@@ -669,20 +669,6 @@ class FileTestCase(unittest.TestCase):
with LZMAFile(BytesIO(COMPRESSED_XZ[:128])) as f: with LZMAFile(BytesIO(COMPRESSED_XZ[:128])) as f:
self.assertRaises(EOFError, f.read) self.assertRaises(EOFError, f.read)
def test_read_truncated(self):
# Drop stream footer: CRC (4 bytes), index size (4 bytes),
# flags (2 bytes) and magic number (2 bytes).
truncated = COMPRESSED_XZ[:-12]
with LZMAFile(BytesIO(truncated)) as f:
self.assertRaises(EOFError, f.read)
with LZMAFile(BytesIO(truncated)) as f:
self.assertEqual(f.read(len(INPUT)), INPUT)
self.assertRaises(EOFError, f.read, 1)
# Incomplete 12-byte header.
for i in range(12):
with LZMAFile(BytesIO(truncated[:i])) as f:
self.assertRaises(EOFError, f.read, 1)
def test_read_bad_args(self): def test_read_bad_args(self):
f = LZMAFile(BytesIO(COMPRESSED_XZ)) f = LZMAFile(BytesIO(COMPRESSED_XZ))
f.close() f.close()
......
...@@ -628,9 +628,6 @@ Library ...@@ -628,9 +628,6 @@ Library
current directory on Unix and no longer searches a relative file path with current directory on Unix and no longer searches a relative file path with
a directory part in PATH directories. Patch by Thomas Kluyver. a directory part in PATH directories. Patch by Thomas Kluyver.
- Issue #1159051: GzipFile now raises EOFError when reading a corrupted file
with truncated header or footer.
- Issue #16993: shutil.which() now preserves the case of the path and extension - Issue #16993: shutil.which() now preserves the case of the path and extension
on Windows. on Windows.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment