Commit aff10545 authored by Georg Brandl's avatar Georg Brandl

Issue #1159051: Back out a fix for handling corrupted gzip files that

broke backwards compatibility.
parent 2a118003
...@@ -33,6 +33,9 @@ def write32u(output, value): ...@@ -33,6 +33,9 @@ def write32u(output, value):
# or unsigned. # or unsigned.
output.write(struct.pack("<L", value)) output.write(struct.pack("<L", value))
def read32(input):
return struct.unpack("<I", input.read(4))[0]
def open(filename, mode="rb", compresslevel=9): def open(filename, mode="rb", compresslevel=9):
"""Shorthand for GzipFile(filename, mode, compresslevel). """Shorthand for GzipFile(filename, mode, compresslevel).
...@@ -256,32 +259,27 @@ class GzipFile(io.BufferedIOBase): ...@@ -256,32 +259,27 @@ class GzipFile(io.BufferedIOBase):
self.crc = zlib.crc32(b"") & 0xffffffff self.crc = zlib.crc32(b"") & 0xffffffff
self.size = 0 self.size = 0
def _read_exact(self, n):
data = self.fileobj.read(n)
while len(data) < n:
b = self.fileobj.read(n - len(data))
if not b:
raise EOFError("Compressed file ended before the "
"end-of-stream marker was reached")
data += b
return data
def _read_gzip_header(self): def _read_gzip_header(self):
magic = self.fileobj.read(2) magic = self.fileobj.read(2)
if magic == b'': if magic == b'':
return False raise EOFError("Reached EOF")
if magic != b'\037\213': if magic != b'\037\213':
raise IOError('Not a gzipped file') raise IOError('Not a gzipped file')
method = ord( self.fileobj.read(1) )
method, flag, self.mtime = struct.unpack("<BBIxx", self._read_exact(8))
if method != 8: if method != 8:
raise IOError('Unknown compression method') raise IOError('Unknown compression method')
flag = ord( self.fileobj.read(1) )
self.mtime = read32(self.fileobj)
# extraflag = self.fileobj.read(1)
# os = self.fileobj.read(1)
self.fileobj.read(2)
if flag & FEXTRA: if flag & FEXTRA:
# Read & discard the extra field, if present # Read & discard the extra field, if present
extra_len, = struct.unpack("<H", self._read_exact(2)) xlen = ord(self.fileobj.read(1))
self._read_exact(extra_len) xlen = xlen + 256*ord(self.fileobj.read(1))
self.fileobj.read(xlen)
if flag & FNAME: if flag & FNAME:
# Read and discard a null-terminated string containing the filename # Read and discard a null-terminated string containing the filename
while True: while True:
...@@ -295,13 +293,12 @@ class GzipFile(io.BufferedIOBase): ...@@ -295,13 +293,12 @@ class GzipFile(io.BufferedIOBase):
if not s or s==b'\000': if not s or s==b'\000':
break break
if flag & FHCRC: if flag & FHCRC:
self._read_exact(2) # Read & discard the 16-bit header CRC self.fileobj.read(2) # Read & discard the 16-bit header CRC
unused = self.fileobj.unused() unused = self.fileobj.unused()
if unused: if unused:
uncompress = self.decompress.decompress(unused) uncompress = self.decompress.decompress(unused)
self._add_read_data(uncompress) self._add_read_data(uncompress)
return True
def write(self,data): def write(self,data):
self._check_closed() self._check_closed()
...@@ -335,16 +332,20 @@ class GzipFile(io.BufferedIOBase): ...@@ -335,16 +332,20 @@ class GzipFile(io.BufferedIOBase):
readsize = 1024 readsize = 1024
if size < 0: # get the whole thing if size < 0: # get the whole thing
while self._read(readsize): try:
while True:
self._read(readsize)
readsize = min(self.max_read_chunk, readsize * 2) readsize = min(self.max_read_chunk, readsize * 2)
except EOFError:
size = self.extrasize size = self.extrasize
else: # just get some more of it else: # just get some more of it
try:
while size > self.extrasize: while size > self.extrasize:
if not self._read(readsize): self._read(readsize)
readsize = min(self.max_read_chunk, readsize * 2)
except EOFError:
if size > self.extrasize: if size > self.extrasize:
size = self.extrasize size = self.extrasize
break
readsize = min(self.max_read_chunk, readsize * 2)
offset = self.offset - self.extrastart offset = self.offset - self.extrastart
chunk = self.extrabuf[offset: offset + size] chunk = self.extrabuf[offset: offset + size]
...@@ -365,9 +366,12 @@ class GzipFile(io.BufferedIOBase): ...@@ -365,9 +366,12 @@ class GzipFile(io.BufferedIOBase):
if self.extrasize == 0: if self.extrasize == 0:
if self.fileobj is None: if self.fileobj is None:
return b'' return b''
try:
# Ensure that we don't return b"" if we haven't reached EOF. # Ensure that we don't return b"" if we haven't reached EOF.
while self.extrasize == 0:
# 1024 is the same buffering heuristic used in read() # 1024 is the same buffering heuristic used in read()
while self.extrasize == 0 and self._read(max(n, 1024)): self._read(max(n, 1024))
except EOFError:
pass pass
offset = self.offset - self.extrastart offset = self.offset - self.extrastart
remaining = self.extrasize remaining = self.extrasize
...@@ -380,14 +384,13 @@ class GzipFile(io.BufferedIOBase): ...@@ -380,14 +384,13 @@ class GzipFile(io.BufferedIOBase):
def _read(self, size=1024): def _read(self, size=1024):
if self.fileobj is None: if self.fileobj is None:
return False raise EOFError("Reached EOF")
if self._new_member: if self._new_member:
# If the _new_member flag is set, we have to # If the _new_member flag is set, we have to
# jump to the next member, if there is one. # jump to the next member, if there is one.
self._init_read() self._init_read()
if not self._read_gzip_header(): self._read_gzip_header()
return False
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
self._new_member = False self._new_member = False
...@@ -404,7 +407,7 @@ class GzipFile(io.BufferedIOBase): ...@@ -404,7 +407,7 @@ class GzipFile(io.BufferedIOBase):
self.fileobj.prepend(self.decompress.unused_data, True) self.fileobj.prepend(self.decompress.unused_data, True)
self._read_eof() self._read_eof()
self._add_read_data( uncompress ) self._add_read_data( uncompress )
return False raise EOFError('Reached EOF')
uncompress = self.decompress.decompress(buf) uncompress = self.decompress.decompress(buf)
self._add_read_data( uncompress ) self._add_read_data( uncompress )
...@@ -420,7 +423,6 @@ class GzipFile(io.BufferedIOBase): ...@@ -420,7 +423,6 @@ class GzipFile(io.BufferedIOBase):
# a new member on the next call # a new member on the next call
self._read_eof() self._read_eof()
self._new_member = True self._new_member = True
return True
def _add_read_data(self, data): def _add_read_data(self, data):
self.crc = zlib.crc32(data, self.crc) & 0xffffffff self.crc = zlib.crc32(data, self.crc) & 0xffffffff
...@@ -435,7 +437,8 @@ class GzipFile(io.BufferedIOBase): ...@@ -435,7 +437,8 @@ class GzipFile(io.BufferedIOBase):
# We check the that the computed CRC and size of the # We check the that the computed CRC and size of the
# uncompressed data matches the stored values. Note that the size # uncompressed data matches the stored values. Note that the size
# stored is the true file size mod 2**32. # stored is the true file size mod 2**32.
crc32, isize = struct.unpack("<II", self._read_exact(8)) crc32 = read32(self.fileobj)
isize = read32(self.fileobj) # may exceed 2GB
if crc32 != self.crc: if crc32 != self.crc:
raise IOError("CRC check failed %s != %s" % (hex(crc32), raise IOError("CRC check failed %s != %s" % (hex(crc32),
hex(self.crc))) hex(self.crc)))
......
...@@ -292,24 +292,6 @@ class BZ2FileTest(BaseTest): ...@@ -292,24 +292,6 @@ class BZ2FileTest(BaseTest):
self.assertRaises(ValueError, f.readline) self.assertRaises(ValueError, f.readline)
self.assertRaises(ValueError, f.readlines) self.assertRaises(ValueError, f.readlines)
def test_read_truncated(self):
# Drop the eos_magic field (6 bytes) and CRC (4 bytes).
truncated = self.DATA[:-10]
with open(self.filename, 'wb') as f:
f.write(truncated)
with BZ2File(self.filename) as f:
self.assertRaises(EOFError, f.read)
with BZ2File(self.filename) as f:
self.assertEqual(f.read(len(self.TEXT)), self.TEXT)
self.assertRaises(EOFError, f.read, 1)
# Incomplete 4-byte file header, and block header of at least 146 bits.
for i in range(22):
with open(self.filename, 'wb') as f:
f.write(truncated[:i])
with BZ2File(self.filename) as f:
self.assertRaises(EOFError, f.read, 1)
class BZ2CompressorTest(BaseTest): class BZ2CompressorTest(BaseTest):
def testCompress(self): def testCompress(self):
# "Test BZ2Compressor.compress()/flush()" # "Test BZ2Compressor.compress()/flush()"
......
...@@ -365,19 +365,6 @@ class TestGzip(unittest.TestCase): ...@@ -365,19 +365,6 @@ class TestGzip(unittest.TestCase):
datac = gzip.compress(data) datac = gzip.compress(data)
self.assertEqual(gzip.decompress(datac), data) self.assertEqual(gzip.decompress(datac), data)
def test_read_truncated(self):
data = data1*50
# Drop the CRC (4 bytes) and file size (4 bytes).
truncated = gzip.compress(data)[:-8]
with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
self.assertRaises(EOFError, f.read)
with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
self.assertEqual(f.read(len(data)), data)
self.assertRaises(EOFError, f.read, 1)
# Incomplete 10-byte header.
for i in range(2, 10):
with gzip.GzipFile(fileobj=io.BytesIO(truncated[:i])) as f:
self.assertRaises(EOFError, f.read, 1)
def test_read_with_extra(self): def test_read_with_extra(self):
# Gzip data with an extra field # Gzip data with an extra field
......
...@@ -14,11 +14,12 @@ Library ...@@ -14,11 +14,12 @@ Library
which were omitted in 3.2.4 when updating the bundled version of which were omitted in 3.2.4 when updating the bundled version of
libffi used by ctypes. libffi used by ctypes.
- Issue #17666: Fix reading gzip files with an extra field.
- Issue #15535: Fix namedtuple pickles which were picking up the OrderedDict - Issue #15535: Fix namedtuple pickles which were picking up the OrderedDict
instead of just the underlying tuple. instead of just the underlying tuple.
- Issue #1159051: Back out a fix for handling corrupted gzip files that
broke backwards compatibility.
Build Build
----- -----
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment