Commit 77d89972 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #23252: Added support for writing ZIP files to unseekable streams.

parent f07a4b66
...@@ -140,6 +140,7 @@ ZipFile Objects ...@@ -140,6 +140,7 @@ ZipFile Objects
ZIP file, then a new ZIP archive is appended to the file. This is meant for ZIP file, then a new ZIP archive is appended to the file. This is meant for
adding a ZIP archive to another file (such as :file:`python.exe`). If adding a ZIP archive to another file (such as :file:`python.exe`). If
*mode* is ``a`` and the file does not exist at all, it is created. *mode* is ``a`` and the file does not exist at all, it is created.
If *mode* is ``r`` or ``a``, the file should be seekable.
*compression* is the ZIP compression method to use when writing the archive, *compression* is the ZIP compression method to use when writing the archive,
and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
:const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized :const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
...@@ -171,6 +172,9 @@ ZipFile Objects ...@@ -171,6 +172,9 @@ ZipFile Objects
.. versionchanged:: 3.4 .. versionchanged:: 3.4
ZIP64 extensions are enabled by default. ZIP64 extensions are enabled by default.
.. versionchanged:: 3.5
Added support for writing to unseekable streams.
.. method:: ZipFile.close() .. method:: ZipFile.close()
...@@ -328,7 +332,6 @@ ZipFile Objects ...@@ -328,7 +332,6 @@ ZipFile Objects
If ``arcname`` (or ``filename``, if ``arcname`` is not given) contains a null If ``arcname`` (or ``filename``, if ``arcname`` is not given) contains a null
byte, the name of the file in the archive will be truncated at the null byte. byte, the name of the file in the archive will be truncated at the null byte.
.. method:: ZipFile.writestr(zinfo_or_arcname, bytes[, compress_type]) .. method:: ZipFile.writestr(zinfo_or_arcname, bytes[, compress_type])
Write the string *bytes* to the archive; *zinfo_or_arcname* is either the file Write the string *bytes* to the archive; *zinfo_or_arcname* is either the file
......
...@@ -448,6 +448,12 @@ faulthandler ...@@ -448,6 +448,12 @@ faulthandler
:func:`~faulthandler.dump_traceback_later` functions now accept file :func:`~faulthandler.dump_traceback_later` functions now accept file
descriptors. (Contributed by Wei Wu in :issue:`23566`.) descriptors. (Contributed by Wei Wu in :issue:`23566`.)
zipfile
-------
* Added support for writing ZIP files to unseekable streams.
(Contributed by Serhiy Storchaka in :issue:`23252`.)
Optimizations Optimizations
============= =============
......
...@@ -1685,25 +1685,63 @@ class Tellable: ...@@ -1685,25 +1685,63 @@ class Tellable:
self.offset = 0 self.offset = 0
def write(self, data): def write(self, data):
self.offset += self.fp.write(data) n = self.fp.write(data)
self.offset += n
return n
def tell(self): def tell(self):
return self.offset return self.offset
def flush(self): def flush(self):
pass self.fp.flush()
class Unseekable:
def __init__(self, fp):
self.fp = fp
def write(self, data):
return self.fp.write(data)
def flush(self):
self.fp.flush()
class UnseekableTests(unittest.TestCase): class UnseekableTests(unittest.TestCase):
def test_writestr_tellable(self): def test_writestr(self):
f = io.BytesIO() for wrapper in (lambda f: f), Tellable, Unseekable:
with zipfile.ZipFile(Tellable(f), 'w', zipfile.ZIP_STORED) as zipfp: with self.subTest(wrapper=wrapper):
zipfp.writestr('ones', b'111') f = io.BytesIO()
zipfp.writestr('twos', b'222') f.write(b'abc')
with zipfile.ZipFile(f, mode='r') as zipf: bf = io.BufferedWriter(f)
with zipf.open('ones') as zopen: with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
self.assertEqual(zopen.read(), b'111') zipfp.writestr('ones', b'111')
with zipf.open('twos') as zopen: zipfp.writestr('twos', b'222')
self.assertEqual(zopen.read(), b'222') self.assertEqual(f.getvalue()[:5], b'abcPK')
with zipfile.ZipFile(f, mode='r') as zipf:
with zipf.open('ones') as zopen:
self.assertEqual(zopen.read(), b'111')
with zipf.open('twos') as zopen:
self.assertEqual(zopen.read(), b'222')
def test_write(self):
for wrapper in (lambda f: f), Tellable, Unseekable:
with self.subTest(wrapper=wrapper):
f = io.BytesIO()
f.write(b'abc')
bf = io.BufferedWriter(f)
with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
self.addCleanup(unlink, TESTFN)
with open(TESTFN, 'wb') as f2:
f2.write(b'111')
zipfp.write(TESTFN, 'ones')
with open(TESTFN, 'wb') as f2:
f2.write(b'222')
zipfp.write(TESTFN, 'twos')
self.assertEqual(f.getvalue()[:5], b'abcPK')
with zipfile.ZipFile(f, mode='r') as zipf:
with zipf.open('ones') as zopen:
self.assertEqual(zopen.read(), b'111')
with zipf.open('twos') as zopen:
self.assertEqual(zopen.read(), b'222')
@requires_zlib @requires_zlib
......
...@@ -667,6 +667,26 @@ class _SharedFile: ...@@ -667,6 +667,26 @@ class _SharedFile:
self._file = None self._file = None
self._close(fileobj) self._close(fileobj)
# Provide the tell method for unseekable stream
class _Tellable:
def __init__(self, fp):
self.fp = fp
self.offset = 0
def write(self, data):
n = self.fp.write(data)
self.offset += n
return n
def tell(self):
return self.offset
def flush(self):
self.fp.flush()
def close(self):
self.fp.close()
class ZipExtFile(io.BufferedIOBase): class ZipExtFile(io.BufferedIOBase):
"""File-like object for reading an archive member. """File-like object for reading an archive member.
...@@ -994,6 +1014,7 @@ class ZipFile: ...@@ -994,6 +1014,7 @@ class ZipFile:
self.filename = getattr(file, 'name', None) self.filename = getattr(file, 'name', None)
self._fileRefCnt = 1 self._fileRefCnt = 1
self._lock = threading.RLock() self._lock = threading.RLock()
self._seekable = True
try: try:
if mode == 'r': if mode == 'r':
...@@ -1002,13 +1023,24 @@ class ZipFile: ...@@ -1002,13 +1023,24 @@ class ZipFile:
# set the modified flag so central directory gets written # set the modified flag so central directory gets written
# even if no files are added to the archive # even if no files are added to the archive
self._didModify = True self._didModify = True
self.start_dir = self.fp.tell() try:
self.start_dir = self.fp.tell()
except (AttributeError, OSError):
self.fp = _Tellable(self.fp)
self.start_dir = 0
self._seekable = False
else:
# Some file-like objects can provide tell() but not seek()
try:
self.fp.seek(self.start_dir)
except (AttributeError, OSError):
self._seekable = False
elif mode == 'a': elif mode == 'a':
try: try:
# See if file is a zip file # See if file is a zip file
self._RealGetContents() self._RealGetContents()
# seek to start of directory and overwrite # seek to start of directory and overwrite
self.fp.seek(self.start_dir, 0) self.fp.seek(self.start_dir)
except BadZipFile: except BadZipFile:
# file is not a zip file, just append # file is not a zip file, just append
self.fp.seek(0, 2) self.fp.seek(0, 2)
...@@ -1415,7 +1447,8 @@ class ZipFile: ...@@ -1415,7 +1447,8 @@ class ZipFile:
zinfo.file_size = st.st_size zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00 zinfo.flag_bits = 0x00
with self._lock: with self._lock:
self.fp.seek(self.start_dir, 0) if self._seekable:
self.fp.seek(self.start_dir)
zinfo.header_offset = self.fp.tell() # Start of header bytes zinfo.header_offset = self.fp.tell() # Start of header bytes
if zinfo.compress_type == ZIP_LZMA: if zinfo.compress_type == ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker # Compressed data includes an end-of-stream (EOS) marker
...@@ -1436,6 +1469,8 @@ class ZipFile: ...@@ -1436,6 +1469,8 @@ class ZipFile:
return return
cmpr = _get_compressor(zinfo.compress_type) cmpr = _get_compressor(zinfo.compress_type)
if not self._seekable:
zinfo.flag_bits |= 0x08
with open(filename, "rb") as fp: with open(filename, "rb") as fp:
# Must overwrite CRC and sizes with correct data later # Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0 zinfo.CRC = CRC = 0
...@@ -1464,17 +1499,24 @@ class ZipFile: ...@@ -1464,17 +1499,24 @@ class ZipFile:
zinfo.compress_size = file_size zinfo.compress_size = file_size
zinfo.CRC = CRC zinfo.CRC = CRC
zinfo.file_size = file_size zinfo.file_size = file_size
if not zip64 and self._allowZip64: if zinfo.flag_bits & 0x08:
if file_size > ZIP64_LIMIT: # Write CRC and file sizes after the file data
raise RuntimeError('File size has increased during compressing') fmt = '<LQQ' if zip64 else '<LLL'
if compress_size > ZIP64_LIMIT: self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
raise RuntimeError('Compressed size larger than uncompressed size') zinfo.file_size))
# Seek backwards and write file header (which will now include self.start_dir = self.fp.tell()
# correct CRC and file sizes) else:
self.start_dir = self.fp.tell() # Preserve current position in file if not zip64 and self._allowZip64:
self.fp.seek(zinfo.header_offset, 0) if file_size > ZIP64_LIMIT:
self.fp.write(zinfo.FileHeader(zip64)) raise RuntimeError('File size has increased during compressing')
self.fp.seek(self.start_dir, 0) if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
self.start_dir = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset)
self.fp.write(zinfo.FileHeader(zip64))
self.fp.seek(self.start_dir)
self.filelist.append(zinfo) self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo self.NameToInfo[zinfo.filename] = zinfo
...@@ -1504,11 +1546,8 @@ class ZipFile: ...@@ -1504,11 +1546,8 @@ class ZipFile:
zinfo.file_size = len(data) # Uncompressed size zinfo.file_size = len(data) # Uncompressed size
with self._lock: with self._lock:
try: if self._seekable:
self.fp.seek(self.start_dir) self.fp.seek(self.start_dir)
except (AttributeError, io.UnsupportedOperation):
# Some file-like objects can provide tell() but not seek()
pass
zinfo.header_offset = self.fp.tell() # Start of header data zinfo.header_offset = self.fp.tell() # Start of header data
if compress_type is not None: if compress_type is not None:
zinfo.compress_type = compress_type zinfo.compress_type = compress_type
...@@ -1557,11 +1596,8 @@ class ZipFile: ...@@ -1557,11 +1596,8 @@ class ZipFile:
try: try:
if self.mode in ("w", "a") and self._didModify: # write ending records if self.mode in ("w", "a") and self._didModify: # write ending records
with self._lock: with self._lock:
try: if self._seekable:
self.fp.seek(self.start_dir) self.fp.seek(self.start_dir)
except (AttributeError, io.UnsupportedOperation):
# Some file-like objects can provide tell() but not seek()
pass
self._write_end_record() self._write_end_record()
finally: finally:
fp = self.fp fp = self.fp
......
...@@ -23,6 +23,8 @@ Core and Builtins ...@@ -23,6 +23,8 @@ Core and Builtins
Library Library
------- -------
- Issue #23252: Added support for writing ZIP files to unseekable streams.
- Issue #21526: Tkinter now supports new boolean type in Tcl 8.5. - Issue #21526: Tkinter now supports new boolean type in Tcl 8.5.
- Issue #23647: Increase impalib's MAXLINE to accommodate modern mailbox sizes. - Issue #23647: Increase impalib's MAXLINE to accommodate modern mailbox sizes.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment