Commit 45aa771f authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects

returned by ZipFile.open() can now operate independently of the ZipFile even
if the ZipFile was created by passing in a file-like object as the first
argument to the constructor.
parent 2268c877
......@@ -186,14 +186,8 @@ ZipFile Objects
.. note::
If the ZipFile was created by passing in a file-like object as the first
argument to the constructor, then the object returned by :meth:`.open` shares the
ZipFile's file pointer. Under these circumstances, the object returned by
:meth:`.open` should not be used after any additional operations are performed
on the ZipFile object. If the ZipFile was created by passing in a string (the
filename) as the first argument to the constructor, then :meth:`.open` will
create a new file object that will be held by the ZipExtFile, allowing it to
operate independently of the ZipFile.
Objects returned by :meth:`.open` can operate independently of the
ZipFile.
.. note::
......
......@@ -14,7 +14,7 @@ import unittest
from StringIO import StringIO
from tempfile import TemporaryFile
from random import randint, random
from random import randint, random, getrandbits
from unittest import skipUnless
from test.test_support import TESTFN, TESTFN_UNICODE, TESTFN_ENCODING, \
......@@ -35,6 +35,20 @@ SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'),
('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'),
('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')]
def getrandbytes(size):
return getrandbits(8 * size).to_bytes(size, 'little')
def getrandbytes(size):
return bytes(bytearray.fromhex('%0*x' % (2 * size, getrandbits(8 * size))))
def get_files(test):
yield TESTFN2
with TemporaryFile() as f:
yield f
test.assertFalse(f.closed)
with io.BytesIO() as f:
yield f
test.assertFalse(f.closed)
class TestsWithSourceFile(unittest.TestCase):
def setUp(self):
......@@ -1168,8 +1182,7 @@ class OtherTests(unittest.TestCase):
# than requested.
for test_size in (1, 4095, 4096, 4097, 16384):
file_size = test_size + 1
junk = b''.join(struct.pack('B', randint(0, 255))
for x in range(file_size))
junk = getrandbytes(file_size)
with zipfile.ZipFile(io.BytesIO(), "w", compression) as zipf:
zipf.writestr('foo', junk)
with zipf.open('foo', 'r') as fp:
......@@ -1400,50 +1413,110 @@ class TestsWithRandomBinaryFiles(unittest.TestCase):
@skipUnless(zlib, "requires zlib")
class TestsWithMultipleOpens(unittest.TestCase):
def setUp(self):
@classmethod
def setUpClass(cls):
cls.data1 = b'111' + getrandbytes(10000)
cls.data2 = b'222' + getrandbytes(10000)
def make_test_archive(self, f):
# Create the ZIP archive
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_DEFLATED) as zipfp:
zipfp.writestr('ones', '1'*FIXEDTEST_SIZE)
zipfp.writestr('twos', '2'*FIXEDTEST_SIZE)
with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipfp:
zipfp.writestr('ones', self.data1)
zipfp.writestr('twos', self.data2)
def test_same_file(self):
# Verify that (when the ZipFile is in control of creating file objects)
# multiple open() calls can be made without interfering with each other.
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:
data1 = zopen1.read(500)
data2 = zopen2.read(500)
data1 += zopen1.read(500)
data2 += zopen2.read(500)
self.assertEqual(data1, data2)
for f in get_files(self):
self.make_test_archive(f)
with zipfile.ZipFile(f, mode="r") as zipf:
with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:
data1 = zopen1.read(500)
data2 = zopen2.read(500)
data1 += zopen1.read()
data2 += zopen2.read()
self.assertEqual(data1, data2)
self.assertEqual(data1, self.data1)
def test_different_file(self):
# Verify that (when the ZipFile is in control of creating file objects)
# multiple open() calls can be made without interfering with each other.
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
data1 = zopen1.read(500)
data2 = zopen2.read(500)
data1 += zopen1.read(500)
data2 += zopen2.read(500)
self.assertEqual(data1, '1'*FIXEDTEST_SIZE)
self.assertEqual(data2, '2'*FIXEDTEST_SIZE)
for f in get_files(self):
self.make_test_archive(f)
with zipfile.ZipFile(f, mode="r") as zipf:
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
data1 = zopen1.read(500)
data2 = zopen2.read(500)
data1 += zopen1.read()
data2 += zopen2.read()
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_interleaved(self):
# Verify that (when the ZipFile is in control of creating file objects)
# multiple open() calls can be made without interfering with each other.
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
for f in get_files(self):
self.make_test_archive(f)
with zipfile.ZipFile(f, mode="r") as zipf:
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
data1 = zopen1.read(500)
data2 = zopen2.read(500)
data1 += zopen1.read()
data2 += zopen2.read()
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_read_after_close(self):
for f in get_files(self):
self.make_test_archive(f)
zopen1 = zopen2 = None
try:
with zipfile.ZipFile(f, 'r') as zipf:
zopen1 = zipf.open('ones')
zopen2 = zipf.open('twos')
data1 = zopen1.read(500)
data2 = zopen2.read(500)
data1 += zopen1.read(500)
data2 += zopen2.read(500)
self.assertEqual(data1, '1'*FIXEDTEST_SIZE)
self.assertEqual(data2, '2'*FIXEDTEST_SIZE)
data1 += zopen1.read()
data2 += zopen2.read()
finally:
if zopen1:
zopen1.close()
if zopen2:
zopen2.close()
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_read_after_write(self):
for f in get_files(self):
with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.writestr('ones', self.data1)
zipf.writestr('twos', self.data2)
with zipf.open('ones') as zopen1:
data1 = zopen1.read(500)
self.assertEqual(data1, self.data1[:500])
with zipfile.ZipFile(f, 'r') as zipf:
data1 = zipf.read('ones')
data2 = zipf.read('twos')
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_write_after_read(self):
for f in get_files(self):
with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipf:
zipf.writestr('ones', self.data1)
with zipf.open('ones') as zopen1:
zopen1.read(500)
zipf.writestr('twos', self.data2)
with zipfile.ZipFile(f, 'r') as zipf:
data1 = zipf.read('ones')
data2 = zipf.read('twos')
self.assertEqual(data1, self.data1)
self.assertEqual(data2, self.data2)
def test_many_opens(self):
# Verify that read() and open() promptly close the file descriptor,
# and don't rely on the garbage collector to free resources.
self.make_test_archive(TESTFN2)
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
for x in range(100):
zipf.read('ones')
......
......@@ -498,6 +498,25 @@ compressor_names = {
}
class _SharedFile:
def __init__(self, file, pos, close):
self._file = file
self._pos = pos
self._close = close
def read(self, n=-1):
self._file.seek(self._pos)
data = self._file.read(n)
self._pos = self._file.tell()
return data
def close(self):
if self._file is not None:
fileobj = self._file
self._file = None
self._close(fileobj)
class ZipExtFile(io.BufferedIOBase):
"""File-like object for reading an archive member.
Is returned by ZipFile.open().
......@@ -743,7 +762,7 @@ class ZipFile(object):
self.NameToInfo = {} # Find file info given name
self.filelist = [] # List of ZipInfo instances for archive
self.compression = compression # Method of compression
self.mode = key = mode.replace('b', '')[0]
self.mode = mode
self.pwd = None
self._comment = ''
......@@ -751,28 +770,33 @@ class ZipFile(object):
if isinstance(file, basestring):
self._filePassed = 0
self.filename = file
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
try:
self.fp = open(file, modeDict[mode])
except IOError:
if mode == 'a':
mode = key = 'w'
self.fp = open(file, modeDict[mode])
else:
modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
'r+b': 'w+b', 'w+b': 'wb'}
filemode = modeDict[mode]
while True:
try:
self.fp = io.open(file, filemode)
except IOError:
if filemode in modeDict:
filemode = modeDict[filemode]
continue
raise
break
else:
self._filePassed = 1
self.fp = file
self.filename = getattr(file, 'name', None)
self._fileRefCnt = 1
try:
if key == 'r':
if mode == 'r':
self._RealGetContents()
elif key == 'w':
elif mode == 'w':
# set the modified flag so central directory gets written
# even if no files are added to the archive
self._didModify = True
elif key == 'a':
self.start_dir = 0
elif mode == 'a':
try:
# See if file is a zip file
self._RealGetContents()
......@@ -785,13 +809,13 @@ class ZipFile(object):
# set the modified flag so central directory gets written
# even if no files are added to the archive
self._didModify = True
self.start_dir = self.fp.tell()
else:
raise RuntimeError('Mode must be "r", "w" or "a"')
except:
fp = self.fp
self.fp = None
if not self._filePassed:
fp.close()
self._fpclose(fp)
raise
def __enter__(self):
......@@ -942,26 +966,17 @@ class ZipFile(object):
raise RuntimeError, \
"Attempt to read ZIP archive that was already closed"
# Only open a new file for instances where we were not
# given a file object in the constructor
if self._filePassed:
zef_file = self.fp
should_close = False
# Make sure we have an info object
if isinstance(name, ZipInfo):
# 'name' is already an info object
zinfo = name
else:
zef_file = open(self.filename, 'rb')
should_close = True
# Get info object for name
zinfo = self.getinfo(name)
self._fileRefCnt += 1
zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)
try:
# Make sure we have an info object
if isinstance(name, ZipInfo):
# 'name' is already an info object
zinfo = name
else:
# Get info object for name
zinfo = self.getinfo(name)
zef_file.seek(zinfo.header_offset, 0)
# Skip the file header:
fheader = zef_file.read(sizeFileHeader)
if len(fheader) != sizeFileHeader:
......@@ -1006,11 +1021,9 @@ class ZipFile(object):
if ord(h[11]) != check_byte:
raise RuntimeError("Bad password for file", name)
return ZipExtFile(zef_file, mode, zinfo, zd,
close_fileobj=should_close)
return ZipExtFile(zef_file, mode, zinfo, zd, True)
except:
if should_close:
zef_file.close()
zef_file.close()
raise
def extract(self, member, path=None, pwd=None):
......@@ -1141,6 +1154,7 @@ class ZipFile(object):
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
self.fp.seek(self.start_dir, 0)
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
......@@ -1154,6 +1168,7 @@ class ZipFile(object):
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader(False))
self.start_dir = self.fp.tell()
return
with open(filename, "rb") as fp:
......@@ -1196,10 +1211,10 @@ class ZipFile(object):
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
position = self.fp.tell() # Preserve current position in file
self.start_dir = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset, 0)
self.fp.write(zinfo.FileHeader(zip64))
self.fp.seek(position, 0)
self.fp.seek(self.start_dir, 0)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
......@@ -1228,6 +1243,7 @@ class ZipFile(object):
zinfo.compress_type = compress_type
zinfo.file_size = len(bytes) # Uncompressed size
self.fp.seek(self.start_dir, 0)
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
......@@ -1251,6 +1267,7 @@ class ZipFile(object):
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
zinfo.file_size))
self.fp.flush()
self.start_dir = self.fp.tell()
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
......@@ -1266,7 +1283,7 @@ class ZipFile(object):
try:
if self.mode in ("w", "a") and self._didModify: # write ending records
pos1 = self.fp.tell()
self.fp.seek(self.start_dir, 0)
for zinfo in self.filelist: # write central directory
dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
......@@ -1329,8 +1346,8 @@ class ZipFile(object):
pos2 = self.fp.tell()
# Write end-of-zip-archive record
centDirCount = len(self.filelist)
centDirSize = pos2 - pos1
centDirOffset = pos1
centDirSize = pos2 - self.start_dir
centDirOffset = self.start_dir
requires_zip64 = None
if centDirCount > ZIP_FILECOUNT_LIMIT:
requires_zip64 = "Files count"
......@@ -1366,8 +1383,13 @@ class ZipFile(object):
finally:
fp = self.fp
self.fp = None
if not self._filePassed:
fp.close()
self._fpclose(fp)
def _fpclose(self, fp):
assert self._fileRefCnt > 0
self._fileRefCnt -= 1
if not self._fileRefCnt and not self._filePassed:
fp.close()
class PyZipFile(ZipFile):
......
......@@ -13,6 +13,11 @@ Core and Builtins
Library
-------
- Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects
returned by ZipFile.open() can now operate independently of the ZipFile even
if the ZipFile was created by passing in a file-like object as the first
argument to the constructor.
- Issue #21032. Fixed socket leak if HTTPConnection.getresponse() fails.
Original patch by Martin Panter.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment