Commit 5d16917c authored by Tim Peters's avatar Tim Peters

Another round on SF patch 618135: gzip.py and files > 2G

The last round boosted "the limit" from 2GB to 4GB.  This round gets
rid of the 4GB limit.  For files > 4GB, gzip stores just the last 32
bits of the file size, and now we play along with that too.  Tested
by hand (on a 6+GB file) on Win2K.

Boosting from 2GB to 4GB was arguably enough "a bugfix".  Going beyond
that smells more like "new feature" to me.
parent 3c23813d
...@@ -24,6 +24,10 @@ def U32(i): ...@@ -24,6 +24,10 @@ def U32(i):
i += 1L << 32 i += 1L << 32
return i return i
def LOWU32(i):
"""Return the low-order 32 bits of an int, as a non-negative int."""
return i & 0xFFFFFFFFL
def write32(output, value): def write32(output, value):
output.write(struct.pack("<l", value)) output.write(struct.pack("<l", value))
...@@ -295,21 +299,22 @@ class GzipFile: ...@@ -295,21 +299,22 @@ class GzipFile:
# We've read to the end of the file, so we have to rewind in order # We've read to the end of the file, so we have to rewind in order
# to reread the 8 bytes containing the CRC and the file size. # to reread the 8 bytes containing the CRC and the file size.
# We check the that the computed CRC and size of the # We check the that the computed CRC and size of the
# uncompressed data matches the stored values. # uncompressed data matches the stored values. Note that the size
# stored is the true file size mod 2**32.
self.fileobj.seek(-8, 1) self.fileobj.seek(-8, 1)
crc32 = read32(self.fileobj) crc32 = read32(self.fileobj)
isize = U32(read32(self.fileobj)) # may exceed 2GB isize = U32(read32(self.fileobj)) # may exceed 2GB
if U32(crc32) != U32(self.crc): if U32(crc32) != U32(self.crc):
raise ValueError, "CRC check failed" raise ValueError, "CRC check failed"
elif isize != self.size: elif isize != LOWU32(self.size):
raise ValueError, "Incorrect length of data produced" raise ValueError, "Incorrect length of data produced"
def close(self): def close(self):
if self.mode == WRITE: if self.mode == WRITE:
self.fileobj.write(self.compress.flush()) self.fileobj.write(self.compress.flush())
write32(self.fileobj, self.crc) write32(self.fileobj, self.crc)
# self.size may exceed 2GB # self.size may exceed 2GB, or even 4GB
write32u(self.fileobj, self.size) write32u(self.fileobj, LOWU32(self.size))
self.fileobj = None self.fileobj = None
elif self.mode == READ: elif self.mode == READ:
self.fileobj = None self.fileobj = None
......
...@@ -355,9 +355,12 @@ Extension modules ...@@ -355,9 +355,12 @@ Extension modules
Library Library
------- -------
- gzip.py now handles files exceeding 2GB. Note that 4GB is still a - gzip.py now handles files exceeding 2GB. Files over 4GB also work
fundamental limitation of the underlying gzip file format (it only now (provided the OS supports it, and Python is configured with large
has 32 bits to record the file size). file support), but in that case the underlying gzip file format can
record only the least-significant 32 bits of the file size, so that
some tools working with gzipped files may report an incorrect file
size.
- xml.sax.saxutils.unescape has been added, to replace entity references - xml.sax.saxutils.unescape has been added, to replace entity references
with their entity value. with their entity value.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment