Commit 45aa771f authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects

returned by ZipFile.open() can now operate independently of the ZipFile even
if the ZipFile was created by passing in a file-like object as the first
argument to the constructor.
parent 2268c877
...@@ -186,14 +186,8 @@ ZipFile Objects ...@@ -186,14 +186,8 @@ ZipFile Objects
.. note:: .. note::
If the ZipFile was created by passing in a file-like object as the first Objects returned by :meth:`.open` can operate independently of the
argument to the constructor, then the object returned by :meth:`.open` shares the ZipFile.
ZipFile's file pointer. Under these circumstances, the object returned by
:meth:`.open` should not be used after any additional operations are performed
on the ZipFile object. If the ZipFile was created by passing in a string (the
filename) as the first argument to the constructor, then :meth:`.open` will
create a new file object that will be held by the ZipExtFile, allowing it to
operate independently of the ZipFile.
.. note:: .. note::
......
...@@ -14,7 +14,7 @@ import unittest ...@@ -14,7 +14,7 @@ import unittest
from StringIO import StringIO from StringIO import StringIO
from tempfile import TemporaryFile from tempfile import TemporaryFile
from random import randint, random from random import randint, random, getrandbits
from unittest import skipUnless from unittest import skipUnless
from test.test_support import TESTFN, TESTFN_UNICODE, TESTFN_ENCODING, \ from test.test_support import TESTFN, TESTFN_UNICODE, TESTFN_ENCODING, \
...@@ -35,6 +35,20 @@ SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'), ...@@ -35,6 +35,20 @@ SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'),
('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'), ('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'),
('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')] ('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')]
def getrandbytes(size):
return getrandbits(8 * size).to_bytes(size, 'little')
def getrandbytes(size):
return bytes(bytearray.fromhex('%0*x' % (2 * size, getrandbits(8 * size))))
def get_files(test):
yield TESTFN2
with TemporaryFile() as f:
yield f
test.assertFalse(f.closed)
with io.BytesIO() as f:
yield f
test.assertFalse(f.closed)
class TestsWithSourceFile(unittest.TestCase): class TestsWithSourceFile(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -1168,8 +1182,7 @@ class OtherTests(unittest.TestCase): ...@@ -1168,8 +1182,7 @@ class OtherTests(unittest.TestCase):
# than requested. # than requested.
for test_size in (1, 4095, 4096, 4097, 16384): for test_size in (1, 4095, 4096, 4097, 16384):
file_size = test_size + 1 file_size = test_size + 1
junk = b''.join(struct.pack('B', randint(0, 255)) junk = getrandbytes(file_size)
for x in range(file_size))
with zipfile.ZipFile(io.BytesIO(), "w", compression) as zipf: with zipfile.ZipFile(io.BytesIO(), "w", compression) as zipf:
zipf.writestr('foo', junk) zipf.writestr('foo', junk)
with zipf.open('foo', 'r') as fp: with zipf.open('foo', 'r') as fp:
...@@ -1400,50 +1413,110 @@ class TestsWithRandomBinaryFiles(unittest.TestCase): ...@@ -1400,50 +1413,110 @@ class TestsWithRandomBinaryFiles(unittest.TestCase):
@skipUnless(zlib, "requires zlib") @skipUnless(zlib, "requires zlib")
class TestsWithMultipleOpens(unittest.TestCase): class TestsWithMultipleOpens(unittest.TestCase):
def setUp(self): @classmethod
def setUpClass(cls):
cls.data1 = b'111' + getrandbytes(10000)
cls.data2 = b'222' + getrandbytes(10000)
def make_test_archive(self, f):
# Create the ZIP archive # Create the ZIP archive
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_DEFLATED) as zipfp: with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipfp:
zipfp.writestr('ones', '1'*FIXEDTEST_SIZE) zipfp.writestr('ones', self.data1)
zipfp.writestr('twos', '2'*FIXEDTEST_SIZE) zipfp.writestr('twos', self.data2)
def test_same_file(self): def test_same_file(self):
# Verify that (when the ZipFile is in control of creating file objects) # Verify that (when the ZipFile is in control of creating file objects)
# multiple open() calls can be made without interfering with each other. # multiple open() calls can be made without interfering with each other.
with zipfile.ZipFile(TESTFN2, mode="r") as zipf: for f in get_files(self):
with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2: self.make_test_archive(f)
data1 = zopen1.read(500) with zipfile.ZipFile(f, mode="r") as zipf:
data2 = zopen2.read(500) with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:
data1 += zopen1.read(500) data1 = zopen1.read(500)
data2 += zopen2.read(500) data2 = zopen2.read(500)
self.assertEqual(data1, data2) data1 += zopen1.read()
data2 += zopen2.read()
self.assertEqual(data1, data2)
self.assertEqual(data1, self.data1)
def test_different_file(self): def test_different_file(self):
# Verify that (when the ZipFile is in control of creating file objects) # Verify that (when the ZipFile is in control of creating file objects)
# multiple open() calls can be made without interfering with each other. # multiple open() calls can be made without interfering with each other.
with zipfile.ZipFile(TESTFN2, mode="r") as zipf: for f in get_files(self):
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: self.make_test_archive(f)
data1 = zopen1.read(500) with zipfile.ZipFile(f, mode="r") as zipf:
data2 = zopen2.read(500) with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
data1 += zopen1.read(500) data1 = zopen1.read(500)
data2 += zopen2.read(500) data2 = zopen2.read(500)
self.assertEqual(data1, '1'*FIXEDTEST_SIZE) data1 += zopen1.read()
self.assertEqual(data2, '2'*FIXEDTEST_SIZE) data2 += zopen2.read()
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_interleaved(self): def test_interleaved(self):
# Verify that (when the ZipFile is in control of creating file objects) # Verify that (when the ZipFile is in control of creating file objects)
# multiple open() calls can be made without interfering with each other. # multiple open() calls can be made without interfering with each other.
with zipfile.ZipFile(TESTFN2, mode="r") as zipf: for f in get_files(self):
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: self.make_test_archive(f)
with zipfile.ZipFile(f, mode="r") as zipf:
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
data1 = zopen1.read(500)
data2 = zopen2.read(500)
data1 += zopen1.read()
data2 += zopen2.read()
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_read_after_close(self):
for f in get_files(self):
self.make_test_archive(f)
zopen1 = zopen2 = None
try:
with zipfile.ZipFile(f, 'r') as zipf:
zopen1 = zipf.open('ones')
zopen2 = zipf.open('twos')
data1 = zopen1.read(500) data1 = zopen1.read(500)
data2 = zopen2.read(500) data2 = zopen2.read(500)
data1 += zopen1.read(500) data1 += zopen1.read()
data2 += zopen2.read(500) data2 += zopen2.read()
self.assertEqual(data1, '1'*FIXEDTEST_SIZE) finally:
self.assertEqual(data2, '2'*FIXEDTEST_SIZE) if zopen1:
zopen1.close()
if zopen2:
zopen2.close()
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_read_after_write(self):
for f in get_files(self):
with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.writestr('ones', self.data1)
zipf.writestr('twos', self.data2)
with zipf.open('ones') as zopen1:
data1 = zopen1.read(500)
self.assertEqual(data1, self.data1[:500])
with zipfile.ZipFile(f, 'r') as zipf:
data1 = zipf.read('ones')
data2 = zipf.read('twos')
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_write_after_read(self):
for f in get_files(self):
with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipf:
zipf.writestr('ones', self.data1)
with zipf.open('ones') as zopen1:
zopen1.read(500)
zipf.writestr('twos', self.data2)
with zipfile.ZipFile(f, 'r') as zipf:
data1 = zipf.read('ones')
data2 = zipf.read('twos')
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_many_opens(self): def test_many_opens(self):
# Verify that read() and open() promptly close the file descriptor, # Verify that read() and open() promptly close the file descriptor,
# and don't rely on the garbage collector to free resources. # and don't rely on the garbage collector to free resources.
self.make_test_archive(TESTFN2)
with zipfile.ZipFile(TESTFN2, mode="r") as zipf: with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
for x in range(100): for x in range(100):
zipf.read('ones') zipf.read('ones')
......
...@@ -498,6 +498,25 @@ compressor_names = { ...@@ -498,6 +498,25 @@ compressor_names = {
} }
class _SharedFile:
def __init__(self, file, pos, close):
self._file = file
self._pos = pos
self._close = close
def read(self, n=-1):
self._file.seek(self._pos)
data = self._file.read(n)
self._pos = self._file.tell()
return data
def close(self):
if self._file is not None:
fileobj = self._file
self._file = None
self._close(fileobj)
class ZipExtFile(io.BufferedIOBase): class ZipExtFile(io.BufferedIOBase):
"""File-like object for reading an archive member. """File-like object for reading an archive member.
Is returned by ZipFile.open(). Is returned by ZipFile.open().
...@@ -743,7 +762,7 @@ class ZipFile(object): ...@@ -743,7 +762,7 @@ class ZipFile(object):
self.NameToInfo = {} # Find file info given name self.NameToInfo = {} # Find file info given name
self.filelist = [] # List of ZipInfo instances for archive self.filelist = [] # List of ZipInfo instances for archive
self.compression = compression # Method of compression self.compression = compression # Method of compression
self.mode = key = mode.replace('b', '')[0] self.mode = mode
self.pwd = None self.pwd = None
self._comment = '' self._comment = ''
...@@ -751,28 +770,33 @@ class ZipFile(object): ...@@ -751,28 +770,33 @@ class ZipFile(object):
if isinstance(file, basestring): if isinstance(file, basestring):
self._filePassed = 0 self._filePassed = 0
self.filename = file self.filename = file
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
try: 'r+b': 'w+b', 'w+b': 'wb'}
self.fp = open(file, modeDict[mode]) filemode = modeDict[mode]
except IOError: while True:
if mode == 'a': try:
mode = key = 'w' self.fp = io.open(file, filemode)
self.fp = open(file, modeDict[mode]) except IOError:
else: if filemode in modeDict:
filemode = modeDict[filemode]
continue
raise raise
break
else: else:
self._filePassed = 1 self._filePassed = 1
self.fp = file self.fp = file
self.filename = getattr(file, 'name', None) self.filename = getattr(file, 'name', None)
self._fileRefCnt = 1
try: try:
if key == 'r': if mode == 'r':
self._RealGetContents() self._RealGetContents()
elif key == 'w': elif mode == 'w':
# set the modified flag so central directory gets written # set the modified flag so central directory gets written
# even if no files are added to the archive # even if no files are added to the archive
self._didModify = True self._didModify = True
elif key == 'a': self.start_dir = 0
elif mode == 'a':
try: try:
# See if file is a zip file # See if file is a zip file
self._RealGetContents() self._RealGetContents()
...@@ -785,13 +809,13 @@ class ZipFile(object): ...@@ -785,13 +809,13 @@ class ZipFile(object):
# set the modified flag so central directory gets written # set the modified flag so central directory gets written
# even if no files are added to the archive # even if no files are added to the archive
self._didModify = True self._didModify = True
self.start_dir = self.fp.tell()
else: else:
raise RuntimeError('Mode must be "r", "w" or "a"') raise RuntimeError('Mode must be "r", "w" or "a"')
except: except:
fp = self.fp fp = self.fp
self.fp = None self.fp = None
if not self._filePassed: self._fpclose(fp)
fp.close()
raise raise
def __enter__(self): def __enter__(self):
...@@ -942,26 +966,17 @@ class ZipFile(object): ...@@ -942,26 +966,17 @@ class ZipFile(object):
raise RuntimeError, \ raise RuntimeError, \
"Attempt to read ZIP archive that was already closed" "Attempt to read ZIP archive that was already closed"
# Only open a new file for instances where we were not # Make sure we have an info object
# given a file object in the constructor if isinstance(name, ZipInfo):
if self._filePassed: # 'name' is already an info object
zef_file = self.fp zinfo = name
should_close = False
else: else:
zef_file = open(self.filename, 'rb') # Get info object for name
should_close = True zinfo = self.getinfo(name)
self._fileRefCnt += 1
zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)
try: try:
# Make sure we have an info object
if isinstance(name, ZipInfo):
# 'name' is already an info object
zinfo = name
else:
# Get info object for name
zinfo = self.getinfo(name)
zef_file.seek(zinfo.header_offset, 0)
# Skip the file header: # Skip the file header:
fheader = zef_file.read(sizeFileHeader) fheader = zef_file.read(sizeFileHeader)
if len(fheader) != sizeFileHeader: if len(fheader) != sizeFileHeader:
...@@ -1006,11 +1021,9 @@ class ZipFile(object): ...@@ -1006,11 +1021,9 @@ class ZipFile(object):
if ord(h[11]) != check_byte: if ord(h[11]) != check_byte:
raise RuntimeError("Bad password for file", name) raise RuntimeError("Bad password for file", name)
return ZipExtFile(zef_file, mode, zinfo, zd, return ZipExtFile(zef_file, mode, zinfo, zd, True)
close_fileobj=should_close)
except: except:
if should_close: zef_file.close()
zef_file.close()
raise raise
def extract(self, member, path=None, pwd=None): def extract(self, member, path=None, pwd=None):
...@@ -1141,6 +1154,7 @@ class ZipFile(object): ...@@ -1141,6 +1154,7 @@ class ZipFile(object):
zinfo.file_size = st.st_size zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00 zinfo.flag_bits = 0x00
self.fp.seek(self.start_dir, 0)
zinfo.header_offset = self.fp.tell() # Start of header bytes zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo) self._writecheck(zinfo)
...@@ -1154,6 +1168,7 @@ class ZipFile(object): ...@@ -1154,6 +1168,7 @@ class ZipFile(object):
self.filelist.append(zinfo) self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader(False)) self.fp.write(zinfo.FileHeader(False))
self.start_dir = self.fp.tell()
return return
with open(filename, "rb") as fp: with open(filename, "rb") as fp:
...@@ -1196,10 +1211,10 @@ class ZipFile(object): ...@@ -1196,10 +1211,10 @@ class ZipFile(object):
raise RuntimeError('Compressed size larger than uncompressed size') raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include # Seek backwards and write file header (which will now include
# correct CRC and file sizes) # correct CRC and file sizes)
position = self.fp.tell() # Preserve current position in file self.start_dir = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset, 0) self.fp.seek(zinfo.header_offset, 0)
self.fp.write(zinfo.FileHeader(zip64)) self.fp.write(zinfo.FileHeader(zip64))
self.fp.seek(position, 0) self.fp.seek(self.start_dir, 0)
self.filelist.append(zinfo) self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo self.NameToInfo[zinfo.filename] = zinfo
...@@ -1228,6 +1243,7 @@ class ZipFile(object): ...@@ -1228,6 +1243,7 @@ class ZipFile(object):
zinfo.compress_type = compress_type zinfo.compress_type = compress_type
zinfo.file_size = len(bytes) # Uncompressed size zinfo.file_size = len(bytes) # Uncompressed size
self.fp.seek(self.start_dir, 0)
zinfo.header_offset = self.fp.tell() # Start of header bytes zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo) self._writecheck(zinfo)
self._didModify = True self._didModify = True
...@@ -1251,6 +1267,7 @@ class ZipFile(object): ...@@ -1251,6 +1267,7 @@ class ZipFile(object):
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
zinfo.file_size)) zinfo.file_size))
self.fp.flush() self.fp.flush()
self.start_dir = self.fp.tell()
self.filelist.append(zinfo) self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo self.NameToInfo[zinfo.filename] = zinfo
...@@ -1266,7 +1283,7 @@ class ZipFile(object): ...@@ -1266,7 +1283,7 @@ class ZipFile(object):
try: try:
if self.mode in ("w", "a") and self._didModify: # write ending records if self.mode in ("w", "a") and self._didModify: # write ending records
pos1 = self.fp.tell() self.fp.seek(self.start_dir, 0)
for zinfo in self.filelist: # write central directory for zinfo in self.filelist: # write central directory
dt = zinfo.date_time dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
...@@ -1329,8 +1346,8 @@ class ZipFile(object): ...@@ -1329,8 +1346,8 @@ class ZipFile(object):
pos2 = self.fp.tell() pos2 = self.fp.tell()
# Write end-of-zip-archive record # Write end-of-zip-archive record
centDirCount = len(self.filelist) centDirCount = len(self.filelist)
centDirSize = pos2 - pos1 centDirSize = pos2 - self.start_dir
centDirOffset = pos1 centDirOffset = self.start_dir
requires_zip64 = None requires_zip64 = None
if centDirCount > ZIP_FILECOUNT_LIMIT: if centDirCount > ZIP_FILECOUNT_LIMIT:
requires_zip64 = "Files count" requires_zip64 = "Files count"
...@@ -1366,8 +1383,13 @@ class ZipFile(object): ...@@ -1366,8 +1383,13 @@ class ZipFile(object):
finally: finally:
fp = self.fp fp = self.fp
self.fp = None self.fp = None
if not self._filePassed: self._fpclose(fp)
fp.close()
def _fpclose(self, fp):
assert self._fileRefCnt > 0
self._fileRefCnt -= 1
if not self._fileRefCnt and not self._filePassed:
fp.close()
class PyZipFile(ZipFile): class PyZipFile(ZipFile):
......
...@@ -13,6 +13,11 @@ Core and Builtins ...@@ -13,6 +13,11 @@ Core and Builtins
Library Library
------- -------
- Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects
returned by ZipFile.open() can now operate independently of the ZipFile even
if the ZipFile was created by passing in a file-like object as the first
argument to the constructor.
- Issue #21032. Fixed socket leak if HTTPConnection.getresponse() fails. - Issue #21032. Fixed socket leak if HTTPConnection.getresponse() fails.
Original patch by Martin Panter. Original patch by Martin Panter.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment