Commit b09b844a authored by Martin v. Löwis's avatar Martin v. Löwis

Merged revisions 64688 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r64688 | martin.v.loewis | 2008-07-03 14:51:14 +0200 (Do, 03 Jul 2008) | 9 lines

  Patch #1622: Correct interpretation of various ZIP header fields.

  Also fixes
  - Issue #1526: Allow more than 64k files to be added to Zip64 file.

  - Issue #1746: Correct handling of zipfile archive comments (previously
    archives with comments over 4k were flagged as invalid). Allow writing
    Zip files with archives by setting the 'comment' attribute of a ZipFile.
........
parent 451a356f
...@@ -269,7 +269,7 @@ ZipFile Objects ...@@ -269,7 +269,7 @@ ZipFile Objects
member of the given :class:`ZipInfo` instance. By default, the member of the given :class:`ZipInfo` instance. By default, the
:class:`ZipInfo` constructor sets this member to :const:`ZIP_STORED`. :class:`ZipInfo` constructor sets this member to :const:`ZIP_STORED`.
The following data attribute is also available: The following data attributes are also available:
.. attribute:: ZipFile.debug .. attribute:: ZipFile.debug
...@@ -278,6 +278,12 @@ The following data attribute is also available: ...@@ -278,6 +278,12 @@ The following data attribute is also available:
output) to ``3`` (the most output). Debugging information is written to output) to ``3`` (the most output). Debugging information is written to
``sys.stdout``. ``sys.stdout``.
.. attribute:: ZipFile.comment
The comment text associated with the ZIP file. If assigning a comment to a
:class:`ZipFile` instance created with mode 'a' or 'w', this should be a
string no longer than 65535 bytes. Comments longer than this will be
truncated in the written archive when :meth:`ZipFile.close` is called.
.. _pyzipfile-objects: .. _pyzipfile-objects:
......
...@@ -699,6 +699,55 @@ class OtherTests(unittest.TestCase): ...@@ -699,6 +699,55 @@ class OtherTests(unittest.TestCase):
zipf.writestr("foo.txt\x00qqq", b"O, for a Muse of Fire!") zipf.writestr("foo.txt\x00qqq", b"O, for a Muse of Fire!")
self.assertEqual(zipf.namelist(), ['foo.txt']) self.assertEqual(zipf.namelist(), ['foo.txt'])
def test_StructSizes(self):
# check that ZIP internal structure sizes are calculated correctly
self.assertEqual(zipfile.sizeEndCentDir, 22)
self.assertEqual(zipfile.sizeCentralDir, 46)
self.assertEqual(zipfile.sizeEndCentDir64, 56)
self.assertEqual(zipfile.sizeEndCentDir64Locator, 20)
def testComments(self):
# This test checks that comments on the archive are handled properly
# check default comment is empty
zipf = zipfile.ZipFile(TESTFN, mode="w")
self.assertEqual(zipf.comment, b'')
zipf.writestr("foo.txt", "O, for a Muse of Fire!")
zipf.close()
zipfr = zipfile.ZipFile(TESTFN, mode="r")
self.assertEqual(zipfr.comment, b'')
zipfr.close()
# check a simple short comment
comment = b'Bravely taking to his feet, he beat a very brave retreat.'
zipf = zipfile.ZipFile(TESTFN, mode="w")
zipf.comment = comment
zipf.writestr("foo.txt", "O, for a Muse of Fire!")
zipf.close()
zipfr = zipfile.ZipFile(TESTFN, mode="r")
self.assertEqual(zipfr.comment, comment)
zipfr.close()
# check a comment of max length
comment2 = ''.join(['%d' % (i**3 % 10) for i in range((1 << 16)-1)])
comment2 = comment2.encode("ascii")
zipf = zipfile.ZipFile(TESTFN, mode="w")
zipf.comment = comment2
zipf.writestr("foo.txt", "O, for a Muse of Fire!")
zipf.close()
zipfr = zipfile.ZipFile(TESTFN, mode="r")
self.assertEqual(zipfr.comment, comment2)
zipfr.close()
# check a comment that is too long is truncated
zipf = zipfile.ZipFile(TESTFN, mode="w")
zipf.comment = comment2 + b'oops'
zipf.writestr("foo.txt", "O, for a Muse of Fire!")
zipf.close()
zipfr = zipfile.ZipFile(TESTFN, mode="r")
self.assertEqual(zipfr.comment, comment2)
zipfr.close()
def tearDown(self): def tearDown(self):
support.unlink(TESTFN) support.unlink(TESTFN)
support.unlink(TESTFN2) support.unlink(TESTFN2)
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# The support.requires call is the only reason for keeping this separate # The support.requires call is the only reason for keeping this separate
# from test_zipfile # from test_zipfile
from test import support from test import support
# XXX(nnorwitz): disable this test by looking for extra largfile resource # XXX(nnorwitz): disable this test by looking for extra largfile resource
# which doesn't exist. This test takes over 30 minutes to run in general # which doesn't exist. This test takes over 30 minutes to run in general
# and requires more disk space than most of the buildbots. # and requires more disk space than most of the buildbots.
...@@ -92,8 +93,31 @@ class TestsWithSourceFile(unittest.TestCase): ...@@ -92,8 +93,31 @@ class TestsWithSourceFile(unittest.TestCase):
if os.path.exists(fname): if os.path.exists(fname):
os.remove(fname) os.remove(fname)
class OtherTests(unittest.TestCase):
def testMoreThan64kFiles(self):
# This test checks that more than 64k files can be added to an archive,
# and that the resulting archive can be read properly by ZipFile
zipf = zipfile.ZipFile(TESTFN, mode="w")
zipf.debug = 100
numfiles = (1 << 16) * 3/2
for i in xrange(numfiles):
zipf.writestr("foo%08d" % i, "%d" % (i**3 % 57))
self.assertEqual(len(zipf.namelist()), numfiles)
zipf.close()
zipf2 = zipfile.ZipFile(TESTFN, mode="r")
self.assertEqual(len(zipf2.namelist()), numfiles)
for i in xrange(numfiles):
self.assertEqual(zipf2.read("foo%08d" % i), "%d" % (i**3 % 57))
zipf.close()
def tearDown(self):
test_support.unlink(TESTFN)
test_support.unlink(TESTFN2)
def test_main(): def test_main():
run_unittest(TestsWithSourceFile) run_unittest(TestsWithSourceFile, OtherTests)
if __name__ == "__main__": if __name__ == "__main__":
test_main() test_main()
...@@ -29,31 +29,79 @@ class LargeZipFile(Exception): ...@@ -29,31 +29,79 @@ class LargeZipFile(Exception):
error = BadZipfile # The exception raised by this module error = BadZipfile # The exception raised by this module
ZIP64_LIMIT= (1 << 31) - 1 ZIP64_LIMIT= (1 << 31) - 1
ZIP_FILECOUNT_LIMIT = 1 << 16
ZIP_MAX_COMMENT = (1 << 16) - 1
# constants for Zip file compression methods # constants for Zip file compression methods
ZIP_STORED = 0 ZIP_STORED = 0
ZIP_DEFLATED = 8 ZIP_DEFLATED = 8
# Other ZIP compression methods not supported # Other ZIP compression methods not supported
# Here are some struct module formats for reading headers # Below are some formats and associated data for reading/writing headers using
structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes # the struct module. The names and structures of headers/records are those used
stringEndArchive = b"PK\005\006" # magic number for end of archive record # in the PKWARE description of the ZIP file format:
structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
stringCentralDir = b"PK\001\002" # magic number for central directory # (URL valid as of January 2008)
structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes
stringFileHeader = b"PK\003\004" # magic number for file header # The "end of central directory" structure, magic number, size, and indices
structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes # (section V.I in the format document)
stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header structEndCentDir = b"<4s4H2LH"
structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes magicEndCentDir = b"PK\005\006"
stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header sizeEndCentDir = struct.calcsize(structEndCentDir)
_ECD_SIGNATURE = 0
_ECD_DISK_NUMBER = 1
_ECD_DISK_START = 2
_ECD_ENTRIES_THIS_DISK = 3
_ECD_ENTRIES_TOTAL = 4
_ECD_SIZE = 5
_ECD_OFFSET = 6
_ECD_COMMENT_SIZE = 7
# These last two indices are not part of the structure as defined in the
# spec, but they are used internally by this module as a convenience
_ECD_COMMENT = 8
_ECD_LOCATION = 9
# The "central directory" structure, magic number, size, and indices
# of entries in the structure (section V.F in the format document)
structCentralDir = "<4s4B4HL2L5H2L"
magicCentralDir = b"PK\001\002"
sizeCentralDir = struct.calcsize(structCentralDir)
# The "local file header" structure, magic number, size, and indices
# (section V.A in the format document)
structFileHeader = "<4s2B4HL2L2H"
magicFileHeader = b"PK\003\004"
sizeFileHeader = struct.calcsize(structFileHeader)
# The "Zip64 end of central directory locator" structure, magic number, and size
structEndCentDir64Locator = "<4sLQL"
magicEndCentDir64Locator = b"PK\x06\x07"
sizeEndCentDir64Locator = struct.calcsize(structEndCentDir64Locator)
# The "Zip64 end of central directory" record, magic number, size, and indices
# (section V.G in the format document)
structEndCentDir64 = "<4sQ2H2L4Q"
magicEndCentDir64 = b"PK\x06\x06"
sizeEndCentDir64 = struct.calcsize(structEndCentDir64)
_CD64_SIGNATURE = 0
_CD64_DIRECTORY_RECSIZE = 1
_CD64_CREATE_VERSION = 2
_CD64_EXTRACT_VERSION = 3
_CD64_DISK_NUMBER = 4
_CD64_DISK_NUMBER_START = 5
_CD64_NUMBER_ENTRIES_THIS_DISK = 6
_CD64_NUMBER_ENTRIES_TOTAL = 7
_CD64_DIRECTORY_SIZE = 8
_CD64_OFFSET_START_CENTDIR = 9
# indexes of entries in the central directory structure # indexes of entries in the central directory structure
_CD_SIGNATURE = 0 _CD_SIGNATURE = 0
_CD_CREATE_VERSION = 1 _CD_CREATE_VERSION = 1
_CD_CREATE_SYSTEM = 2 _CD_CREATE_SYSTEM = 2
_CD_EXTRACT_VERSION = 3 _CD_EXTRACT_VERSION = 3
_CD_EXTRACT_SYSTEM = 4 # is this meaningful? _CD_EXTRACT_SYSTEM = 4
_CD_FLAG_BITS = 5 _CD_FLAG_BITS = 5
_CD_COMPRESS_TYPE = 6 _CD_COMPRESS_TYPE = 6
_CD_TIME = 7 _CD_TIME = 7
...@@ -69,10 +117,15 @@ _CD_INTERNAL_FILE_ATTRIBUTES = 16 ...@@ -69,10 +117,15 @@ _CD_INTERNAL_FILE_ATTRIBUTES = 16
_CD_EXTERNAL_FILE_ATTRIBUTES = 17 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
_CD_LOCAL_HEADER_OFFSET = 18 _CD_LOCAL_HEADER_OFFSET = 18
# indexes of entries in the local file header structure # The "local file header" structure, magic number, size, and indices
# (section V.A in the format document)
structFileHeader = "<4s2B4HL2L2H"
magicFileHeader = b"PK\003\004"
sizeFileHeader = struct.calcsize(structFileHeader)
_FH_SIGNATURE = 0 _FH_SIGNATURE = 0
_FH_EXTRACT_VERSION = 1 _FH_EXTRACT_VERSION = 1
_FH_EXTRACT_SYSTEM = 2 # is this meaningful? _FH_EXTRACT_SYSTEM = 2
_FH_GENERAL_PURPOSE_FLAG_BITS = 3 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
_FH_COMPRESSION_METHOD = 4 _FH_COMPRESSION_METHOD = 4
_FH_LAST_MOD_TIME = 5 _FH_LAST_MOD_TIME = 5
...@@ -83,6 +136,28 @@ _FH_UNCOMPRESSED_SIZE = 9 ...@@ -83,6 +136,28 @@ _FH_UNCOMPRESSED_SIZE = 9
_FH_FILENAME_LENGTH = 10 _FH_FILENAME_LENGTH = 10
_FH_EXTRA_FIELD_LENGTH = 11 _FH_EXTRA_FIELD_LENGTH = 11
# The "Zip64 end of central directory locator" structure, magic number, and size
structEndCentDir64Locator = "<4sLQL"
magicEndCentDir64Locator = b"PK\x06\x07"
sizeEndCentDir64Locator = struct.calcsize(structEndCentDir64Locator)
# The "Zip64 end of central directory" record, magic number, size, and indices
# (section V.G in the format document)
structEndCentDir64 = "<4sQ2H2L4Q"
magicEndCentDir64 = b"PK\x06\x06"
sizeEndCentDir64 = struct.calcsize(structEndCentDir64)
_CD64_SIGNATURE = 0
_CD64_DIRECTORY_RECSIZE = 1
_CD64_CREATE_VERSION = 2
_CD64_EXTRACT_VERSION = 3
_CD64_DISK_NUMBER = 4
_CD64_DISK_NUMBER_START = 5
_CD64_NUMBER_ENTRIES_THIS_DISK = 6
_CD64_NUMBER_ENTRIES_TOTAL = 7
_CD64_DIRECTORY_SIZE = 8
_CD64_OFFSET_START_CENTDIR = 9
def is_zipfile(filename): def is_zipfile(filename):
"""Quickly see if file is a ZIP file by checking the magic number.""" """Quickly see if file is a ZIP file by checking the magic number."""
try: try:
...@@ -99,33 +174,31 @@ def _EndRecData64(fpin, offset, endrec): ...@@ -99,33 +174,31 @@ def _EndRecData64(fpin, offset, endrec):
""" """
Read the ZIP64 end-of-archive records and use that to update endrec Read the ZIP64 end-of-archive records and use that to update endrec
""" """
locatorSize = struct.calcsize(structEndArchive64Locator) fpin.seek(offset - sizeEndCentDir64Locator, 2)
fpin.seek(offset - locatorSize, 2) data = fpin.read(sizeEndCentDir64Locator)
data = fpin.read(locatorSize) sig, diskno, reloff, disks = struct.unpack(structEndCentDir64Locator, data)
sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) if sig != magicEndCentDir64Locator:
if sig != stringEndArchive64Locator:
return endrec return endrec
if diskno != 0 or disks != 1: if diskno != 0 or disks != 1:
raise BadZipfile("zipfiles that span multiple disks are not supported") raise BadZipfile("zipfiles that span multiple disks are not supported")
# Assume no 'zip64 extensible data' # Assume no 'zip64 extensible data'
endArchiveSize = struct.calcsize(structEndArchive64) fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
fpin.seek(offset - locatorSize - endArchiveSize, 2) data = fpin.read(sizeEndCentDir64)
data = fpin.read(endArchiveSize)
sig, sz, create_version, read_version, disk_num, disk_dir, \ sig, sz, create_version, read_version, disk_num, disk_dir, \
dircount, dircount2, dirsize, diroffset = \ dircount, dircount2, dirsize, diroffset = \
struct.unpack(structEndArchive64, data) struct.unpack(structEndCentDir64, data)
if sig != stringEndArchive64: if sig != magicEndCentDir64:
return endrec return endrec
# Update the original endrec using data from the ZIP64 record # Update the original endrec using data from the ZIP64 record
endrec[1] = disk_num endrec[_ECD_DISK_NUMBER] = disk_num
endrec[2] = disk_dir endrec[_ECD_DISK_START] = disk_dir
endrec[3] = dircount endrec[_ECD_ENTRIES_THIS_DISK] = dircount
endrec[4] = dircount2 endrec[_ECD_ENTRIES_TOTAL] = dircount2
endrec[5] = dirsize endrec[_ECD_SIZE] = dirsize
endrec[6] = diroffset endrec[_ECD_OFFSET] = diroffset
return endrec return endrec
...@@ -134,38 +207,59 @@ def _EndRecData(fpin): ...@@ -134,38 +207,59 @@ def _EndRecData(fpin):
The data is a list of the nine items in the ZIP "End of central dir" The data is a list of the nine items in the ZIP "End of central dir"
record followed by a tenth item, the file seek offset of this record.""" record followed by a tenth item, the file seek offset of this record."""
fpin.seek(-22, 2) # Assume no archive comment.
filesize = fpin.tell() + 22 # Get file size # Determine file size
fpin.seek(0, 2)
filesize = fpin.tell()
# Check to see if this is ZIP file with no archive comment (the
# "end of central directory" structure should be the last item in the
# file if this is the case).
fpin.seek(-sizeEndCentDir, 2)
data = fpin.read() data = fpin.read()
if data[0:4] == stringEndArchive and data[-2:] == b"\000\000": if data[0:4] == magicEndCentDir and data[-2:] == b"\000\000":
endrec = struct.unpack(structEndArchive, data) # the signature is correct and there's no comment, unpack structure
endrec = list(endrec) endrec = struct.unpack(structEndCentDir, data)
endrec.append("") # Append the archive comment endrec=list(endrec)
endrec.append(filesize - 22) # Append the record start offset
if endrec[-4] == 0xffffffff: # Append a blank comment and record start offset
return _EndRecData64(fpin, -22, endrec) endrec.append(b"")
endrec.append(filesize - sizeEndCentDir)
if endrec[_ECD_OFFSET] == 0xffffffff:
# the value for the "offset of the start of the central directory"
# indicates that there is a "Zip64 end of central directory"
# structure present, so go look for it
return _EndRecData64(fpin, -sizeEndCentDir, endrec)
return endrec return endrec
# Search the last END_BLOCK bytes of the file for the record signature.
# The comment is appended to the ZIP file and has a 16 bit length. # Either this is not a ZIP file, or it is a ZIP file with an archive
# So the comment may be up to 64K long. We limit the search for the # comment. Search the end of the file for the "end of central directory"
# signature to a few Kbytes at the end of the file for efficiency. # record signature. The comment is the last item in the ZIP file and may be
# also, the signature must not appear in the comment. # up to 64K long. It is assumed that the "end of central directory" magic
END_BLOCK = min(filesize, 1024 * 4) # number does not appear in the comment.
fpin.seek(filesize - END_BLOCK, 0) maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
fpin.seek(maxCommentStart, 0)
data = fpin.read() data = fpin.read()
start = data.rfind(stringEndArchive) start = data.rfind(magicEndCentDir)
if start >= 0: # Correct signature string was found if start >= 0:
endrec = struct.unpack(structEndArchive, data[start:start+22]) # found the magic number; attempt to unpack and interpret
endrec = list(endrec) recData = data[start:start+sizeEndCentDir]
comment = data[start+22:] endrec = list(struct.unpack(structEndCentDir, recData))
if endrec[7] == len(comment): # Comment length checks out comment = data[start+sizeEndCentDir:]
# check that comment length is correct
if endrec[_ECD_COMMENT_SIZE] == len(comment):
# Append the archive comment and start offset # Append the archive comment and start offset
endrec.append(comment) endrec.append(comment)
endrec.append(filesize - END_BLOCK + start) endrec.append(maxCommentStart + start)
if endrec[-4] == 0xffffffff: if endrec[_ECD_OFFSET] == 0xffffffff:
return _EndRecData64(fpin, - END_BLOCK + start, endrec) # There is apparently a "Zip64 end of central directory"
# structure present, so go look for it
return _EndRecData64(fpin, start - filesize, endrec)
return endrec return endrec
return # Error, return None
# Unable to find a valid end of central directory structure
return
class ZipInfo (object): class ZipInfo (object):
...@@ -252,13 +346,13 @@ class ZipInfo (object): ...@@ -252,13 +346,13 @@ class ZipInfo (object):
fmt = '<HHQQ' fmt = '<HHQQ'
extra = extra + struct.pack(fmt, extra = extra + struct.pack(fmt,
1, struct.calcsize(fmt)-4, file_size, compress_size) 1, struct.calcsize(fmt)-4, file_size, compress_size)
file_size = 0xffffffff # -1 file_size = 0xffffffff
compress_size = 0xffffffff # -1 compress_size = 0xffffffff
self.extract_version = max(45, self.extract_version) self.extract_version = max(45, self.extract_version)
self.create_version = max(45, self.extract_version) self.create_version = max(45, self.extract_version)
filename, flag_bits = self._encodeFilenameFlags() filename, flag_bits = self._encodeFilenameFlags()
header = struct.pack(structFileHeader, stringFileHeader, header = struct.pack(structFileHeader, magicFileHeader,
self.extract_version, self.reserved, flag_bits, self.extract_version, self.reserved, flag_bits,
self.compress_type, dostime, dosdate, CRC, self.compress_type, dostime, dosdate, CRC,
compress_size, file_size, compress_size, file_size,
...@@ -292,16 +386,15 @@ class ZipInfo (object): ...@@ -292,16 +386,15 @@ class ZipInfo (object):
idx = 0 idx = 0
# ZIP64 extension (large files and/or large archives) # ZIP64 extension (large files and/or large archives)
# XXX Is this correct? won't this exclude 2**32-1 byte files?
if self.file_size in (0xffffffffffffffff, 0xffffffff): if self.file_size in (0xffffffffffffffff, 0xffffffff):
self.file_size = counts[idx] self.file_size = counts[idx]
idx += 1 idx += 1
if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF: if self.compress_size == 0xFFFFFFFF:
self.compress_size = counts[idx] self.compress_size = counts[idx]
idx += 1 idx += 1
if self.header_offset == -1 or self.header_offset == 0xffffffff: if self.header_offset == 0xffffffff:
old = self.header_offset old = self.header_offset
self.header_offset = counts[idx] self.header_offset = counts[idx]
idx+=1 idx+=1
...@@ -569,7 +662,7 @@ class ZipExtFile: ...@@ -569,7 +662,7 @@ class ZipExtFile:
class ZipFile: class ZipFile:
""" Class with methods to open, read, write, close, list zip files. """ Class with methods to open, read, write, close, list zip files.
z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True) z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
file: Either the path to the file, or a file-like object. file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile. If it is a path, the file will be opened and closed by ZipFile.
...@@ -605,6 +698,7 @@ class ZipFile: ...@@ -605,6 +698,7 @@ class ZipFile:
self.compression = compression # Method of compression self.compression = compression # Method of compression
self.mode = key = mode.replace('b', '')[0] self.mode = key = mode.replace('b', '')[0]
self.pwd = None self.pwd = None
self.comment = b''
# Check if we were passed a file-like object # Check if we were passed a file-like object
if isinstance(file, str): if isinstance(file, str):
...@@ -661,18 +755,20 @@ class ZipFile: ...@@ -661,18 +755,20 @@ class ZipFile:
raise BadZipfile("File is not a zip file") raise BadZipfile("File is not a zip file")
if self.debug > 1: if self.debug > 1:
print(endrec) print(endrec)
size_cd = endrec[5] # bytes in central directory size_cd = endrec[_ECD_SIZE] # bytes in central directory
offset_cd = endrec[6] # offset of central directory offset_cd = endrec[_ECD_OFFSET] # offset of central directory
self.comment = endrec[8] # archive comment self.comment = endrec[_ECD_COMMENT] # archive comment
# endrec[9] is the offset of the "End of Central Dir" record
if endrec[9] > ZIP64_LIMIT:
x = endrec[9] - size_cd - 56 - 20
else:
x = endrec[9] - size_cd
# "concat" is zero, unless zip was concatenated to another file # "concat" is zero, unless zip was concatenated to another file
concat = x - offset_cd concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
if endrec[_ECD_LOCATION] > ZIP64_LIMIT:
# If the offset of the "End of Central Dir" record requires Zip64
# extension structures, account for them
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
if self.debug > 2: if self.debug > 2:
print("given, inferred, offset", offset_cd, x, concat) inferred = concat + offset_cd
print("given, inferred, offset", offset_cd, inferred, concat)
# self.start_dir: Position of start of central directory # self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat self.start_dir = offset_cd + concat
fp.seek(self.start_dir, 0) fp.seek(self.start_dir, 0)
...@@ -680,9 +776,8 @@ class ZipFile: ...@@ -680,9 +776,8 @@ class ZipFile:
fp = io.BytesIO(data) fp = io.BytesIO(data)
total = 0 total = 0
while total < size_cd: while total < size_cd:
centdir = fp.read(46) centdir = fp.read(sizeCentralDir)
total = total + 46 if centdir[0:4] != magicCentralDir:
if centdir[0:4] != stringCentralDir:
raise BadZipfile("Bad magic number for central directory") raise BadZipfile("Bad magic number for central directory")
centdir = struct.unpack(structCentralDir, centdir) centdir = struct.unpack(structCentralDir, centdir)
if self.debug > 2: if self.debug > 2:
...@@ -699,9 +794,6 @@ class ZipFile: ...@@ -699,9 +794,6 @@ class ZipFile:
x = ZipInfo(filename) x = ZipInfo(filename)
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
total = (total + centdir[_CD_FILENAME_LENGTH]
+ centdir[_CD_EXTRA_FIELD_LENGTH]
+ centdir[_CD_COMMENT_LENGTH])
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
(x.create_version, x.create_system, x.extract_version, x.reserved, (x.create_version, x.create_system, x.extract_version, x.reserved,
x.flag_bits, x.compress_type, t, d, x.flag_bits, x.compress_type, t, d,
...@@ -716,6 +808,12 @@ class ZipFile: ...@@ -716,6 +808,12 @@ class ZipFile:
x.header_offset = x.header_offset + concat x.header_offset = x.header_offset + concat
self.filelist.append(x) self.filelist.append(x)
self.NameToInfo[x.filename] = x self.NameToInfo[x.filename] = x
# update total bytes read from central directory
total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
+ centdir[_CD_EXTRA_FIELD_LENGTH]
+ centdir[_CD_COMMENT_LENGTH])
if self.debug > 2: if self.debug > 2:
print("total", total) print("total", total)
...@@ -749,7 +847,6 @@ class ZipFile: ...@@ -749,7 +847,6 @@ class ZipFile:
except BadZipfile: except BadZipfile:
return zinfo.filename return zinfo.filename
def getinfo(self, name): def getinfo(self, name):
"""Return the instance of ZipInfo given 'name'.""" """Return the instance of ZipInfo given 'name'."""
info = self.NameToInfo.get(name) info = self.NameToInfo.get(name)
...@@ -794,8 +891,8 @@ class ZipFile: ...@@ -794,8 +891,8 @@ class ZipFile:
zef_file.seek(zinfo.header_offset, 0) zef_file.seek(zinfo.header_offset, 0)
# Skip the file header: # Skip the file header:
fheader = zef_file.read(30) fheader = zef_file.read(sizeFileHeader)
if fheader[0:4] != stringFileHeader: if fheader[0:4] != magicFileHeader:
raise BadZipfile("Bad magic number for file header") raise BadZipfile("Bad magic number for file header")
fheader = struct.unpack(structFileHeader, fheader) fheader = struct.unpack(structFileHeader, fheader)
...@@ -1059,15 +1156,15 @@ class ZipFile: ...@@ -1059,15 +1156,15 @@ class ZipFile:
or zinfo.compress_size > ZIP64_LIMIT: or zinfo.compress_size > ZIP64_LIMIT:
extra.append(zinfo.file_size) extra.append(zinfo.file_size)
extra.append(zinfo.compress_size) extra.append(zinfo.compress_size)
file_size = 0xffffffff #-1 file_size = 0xffffffff
compress_size = 0xffffffff #-1 compress_size = 0xffffffff
else: else:
file_size = zinfo.file_size file_size = zinfo.file_size
compress_size = zinfo.compress_size compress_size = zinfo.compress_size
if zinfo.header_offset > ZIP64_LIMIT: if zinfo.header_offset > ZIP64_LIMIT:
extra.append(zinfo.header_offset) extra.append(zinfo.header_offset)
header_offset = 0xffffffff # -1 32 bit header_offset = 0xffffffff
else: else:
header_offset = zinfo.header_offset header_offset = zinfo.header_offset
...@@ -1084,15 +1181,26 @@ class ZipFile: ...@@ -1084,15 +1181,26 @@ class ZipFile:
extract_version = zinfo.extract_version extract_version = zinfo.extract_version
create_version = zinfo.create_version create_version = zinfo.create_version
filename, flag_bits = zinfo._encodeFilenameFlags() try:
centdir = struct.pack(structCentralDir, filename, flag_bits = zinfo._encodeFilenameFlags()
stringCentralDir, create_version, centdir = struct.pack(structCentralDir,
zinfo.create_system, extract_version, zinfo.reserved, magicCentralDir, create_version,
flag_bits, zinfo.compress_type, dostime, dosdate, zinfo.create_system, extract_version, zinfo.reserved,
zinfo.CRC, compress_size, file_size, flag_bits, zinfo.compress_type, dostime, dosdate,
len(filename), len(extra_data), len(zinfo.comment), zinfo.CRC, compress_size, file_size,
0, zinfo.internal_attr, zinfo.external_attr, len(filename), len(extra_data), len(zinfo.comment),
header_offset) 0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
except DeprecationWarning:
print >>sys.stderr, (structCentralDir,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
raise
self.fp.write(centdir) self.fp.write(centdir)
self.fp.write(filename) self.fp.write(filename)
self.fp.write(extra_data) self.fp.write(extra_data)
...@@ -1100,27 +1208,35 @@ class ZipFile: ...@@ -1100,27 +1208,35 @@ class ZipFile:
pos2 = self.fp.tell() pos2 = self.fp.tell()
# Write end-of-zip-archive record # Write end-of-zip-archive record
centDirOffset = pos1
if pos1 > ZIP64_LIMIT: if pos1 > ZIP64_LIMIT:
# Need to write the ZIP64 end-of-archive records # Need to write the ZIP64 end-of-archive records
zip64endrec = struct.pack( zip64endrec = struct.pack(
structEndArchive64, stringEndArchive64, structEndCentDir64, magicEndCentDir64,
44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1) 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
self.fp.write(zip64endrec) self.fp.write(zip64endrec)
zip64locrec = struct.pack( zip64locrec = struct.pack(
structEndArchive64Locator, structEndCentDir64Locator,
stringEndArchive64Locator, 0, pos2, 1) magicEndCentDir64Locator, 0, pos2, 1)
self.fp.write(zip64locrec) self.fp.write(zip64locrec)
centDirOffset = 0xFFFFFFFF
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, count, count, pos2 - pos1, 0xffffffff, 0) # check for valid comment length
self.fp.write(endrec) if len(self.comment) >= ZIP_MAX_COMMENT:
if self.debug > 0:
else: msg = 'Archive comment is too long; truncating to %d bytes' \
endrec = struct.pack(structEndArchive, stringEndArchive, % ZIP_MAX_COMMENT
0, 0, count, count, pos2 - pos1, pos1, 0) self.comment = self.comment[:ZIP_MAX_COMMENT]
self.fp.write(endrec)
endrec = struct.pack(structEndCentDir, magicEndCentDir,
0, 0, count % ZIP_FILECOUNT_LIMIT,
count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
centDirOffset, len(self.comment))
self.fp.write(endrec)
self.fp.write(self.comment)
self.fp.flush() self.fp.flush()
if not self._filePassed: if not self._filePassed:
self.fp.close() self.fp.close()
self.fp = None self.fp = None
......
...@@ -17,8 +17,21 @@ Core and Builtins ...@@ -17,8 +17,21 @@ Core and Builtins
Library Library
------- -------
<<<<<<< .working
- Issue #2683: Fix inconsistency in subprocess.Popen.communicate(): the - Issue #2683: Fix inconsistency in subprocess.Popen.communicate(): the
argument now must be a bytes object in any case. argument now must be a bytes object in any case.
=======
- Issue #1622: Correct interpretation of various ZIP header fields.
- Issue #1526: Allow more than 64k files to be added to Zip64 file.
- Issue #1746: Correct handling of zipfile archive comments (previously
archives with comments over 4k were flagged as invalid). Allow writing
Zip files with archives by setting the 'comment' attribute of a ZipFile.
- Issue #449227: Now with the rlcompleter module, callable objects are added
"(" when completed.
>>>>>>> .merge-right.r64688
- Issue #3145: help("modules whatever") failed when trying to load the source - Issue #3145: help("modules whatever") failed when trying to load the source
code of every single module of the standard library, including invalid files code of every single module of the standard library, including invalid files
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment