Commit b2758ff9 authored by Jason R. Coombs's avatar Jason R. Coombs Committed by Barry Warsaw

bpo-36832: add zipfile.Path (#13153)

* bpo-36832: add zipfile.Path

* bpo-36832: add documentation for zipfile.Path

* 📜🤖 Added by blurb_it.

* Remove module reference from blurb.

* Sort the imports

* Update docstrings and docs per recommendations.

* Rely on test.support.temp_dir

* Signal that 'root' is the parameter.

* Correct spelling of 'mod'

* Convert docstring to comment for brevity.

* Fix more errors in the docs
parent 70b80541
...@@ -52,6 +52,15 @@ The module defines the following items: ...@@ -52,6 +52,15 @@ The module defines the following items:
:ref:`zipfile-objects` for constructor details. :ref:`zipfile-objects` for constructor details.
.. class:: Path
:noindex:
A pathlib-compatible wrapper for zip files. See section
:ref:`path-objects` for details.
.. versionadded:: 3.8
.. class:: PyZipFile .. class:: PyZipFile
:noindex: :noindex:
...@@ -456,6 +465,64 @@ The following data attributes are also available: ...@@ -456,6 +465,64 @@ The following data attributes are also available:
truncated. truncated.
.. _path-objects:
Path Objects
------------
.. class:: Path(root, at='')
Construct a Path object from a ``root`` zipfile (which may be a
:class:`ZipFile` instance or ``file`` suitable for passing to
the :class:`ZipFile` constructor).
``at`` specifies the location of this Path within the zipfile,
e.g. 'dir/file.txt', 'dir/', or ''. Defaults to the empty string,
indicating the root.
Path objects expose the following features of :mod:`pathlib.Path`
objects:
Path objects are traversable using the ``/`` operator.
.. attribute:: Path.name
The final path component.
.. method:: Path.open(*, **)
Invoke :meth:`ZipFile.open` on the current path. Accepts
the same arguments as :meth:`ZipFile.open`.
.. method:: Path.listdir()
Enumerate the children of the current directory.
.. method:: Path.is_dir()
Return ``True`` if the current context references a directory.
.. method:: Path.is_file()
Return ``True`` if the current context references a file.
.. method:: Path.exists()
Return ``True`` if the current context references a file or
directory in the zip file.
.. method:: Path.read_text(*, **)
Read the current file as unicode text. Positional and
keyword arguments are passed through to
:class:`io.TextIOWrapper` (except ``buffer``, which is
implied by the context).
.. method:: Path.read_bytes()
Read the current file as bytes.
.. _pyzipfile-objects: .. _pyzipfile-objects:
PyZipFile Objects PyZipFile Objects
......
import contextlib import contextlib
import importlib.util
import io import io
import os import os
import importlib.util
import pathlib import pathlib
import posixpath import posixpath
import time import shutil
import struct import struct
import zipfile import tempfile
import time
import unittest import unittest
import zipfile
from tempfile import TemporaryFile from tempfile import TemporaryFile
...@@ -2392,5 +2394,113 @@ class CommandLineTest(unittest.TestCase): ...@@ -2392,5 +2394,113 @@ class CommandLineTest(unittest.TestCase):
with open(path, 'rb') as f: with open(path, 'rb') as f:
self.assertEqual(f.read(), zf.read(zi)) self.assertEqual(f.read(), zf.read(zi))
# Poor man's technique to consume a (smallish) iterable.
consume = tuple
def add_dirs(zipfile):
"""
Given a writable zipfile, inject directory entries for
any directories implied by the presence of children.
"""
names = zipfile.namelist()
consume(
zipfile.writestr(name + "/", b"")
for name in map(posixpath.dirname, names)
if name and name + "/" not in names
)
return zipfile
def build_abcde_files():
"""
Create a zip file with this structure:
.
├── a.txt
└── b
├── c.txt
└── d
└── e.txt
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("a.txt", b"content of a")
zf.writestr("b/c.txt", b"content of c")
zf.writestr("b/d/e.txt", b"content of e")
zf.filename = "abcde.zip"
return zf
class TestPath(unittest.TestCase):
def setUp(self):
self.fixtures = contextlib.ExitStack()
self.addCleanup(self.fixtures.close)
def zipfile_abcde(self):
with self.subTest():
yield build_abcde_files()
with self.subTest():
yield add_dirs(build_abcde_files())
def zipfile_ondisk(self):
tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
for zipfile_abcde in self.zipfile_abcde():
buffer = zipfile_abcde.fp
zipfile_abcde.close()
path = tmpdir / zipfile_abcde.filename
with path.open("wb") as strm:
strm.write(buffer.getvalue())
yield path
def test_iterdir_istype(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
assert root.is_dir()
a, b = root.iterdir()
assert a.is_file()
assert b.is_dir()
c, d = b.iterdir()
assert c.is_file()
e, = d.iterdir()
assert e.is_file()
def test_open(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
a, b = root.iterdir()
with a.open() as strm:
data = strm.read()
assert data == b"content of a"
def test_read(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
a, b = root.iterdir()
assert a.read_text() == "content of a"
assert a.read_bytes() == b"content of a"
def test_traverse_truediv(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
a = root / "a"
assert a.is_file()
e = root / "b" / "d" / "e.txt"
assert e.read_text() == "content of e"
def test_pathlike_construction(self):
"""
zipfile.Path should be constructable from a path-like object
"""
for zipfile_ondisk in self.zipfile_ondisk():
pathlike = pathlib.Path(str(zipfile_ondisk))
zipfile.Path(pathlike)
def test_traverse_pathlike(self):
for zipfile_abcde in self.zipfile_abcde():
root = zipfile.Path(zipfile_abcde)
root / pathlib.Path("a")
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -3,16 +3,18 @@ Read and write ZIP files. ...@@ -3,16 +3,18 @@ Read and write ZIP files.
XXX references to utf-8 need further investigation. XXX references to utf-8 need further investigation.
""" """
import binascii
import functools
import importlib.util
import io import io
import os import os
import importlib.util import posixpath
import sys
import time
import stat
import shutil import shutil
import stat
import struct import struct
import binascii import sys
import threading import threading
import time
try: try:
import zlib # We may need its compression method import zlib # We may need its compression method
...@@ -2102,6 +2104,138 @@ class PyZipFile(ZipFile): ...@@ -2102,6 +2104,138 @@ class PyZipFile(ZipFile):
return (fname, archivename) return (fname, archivename)
class Path:
"""
A pathlib-compatible interface for zip files.
Consider a zip file with this structure::
.
├── a.txt
└── b
├── c.txt
└── d
└── e.txt
>>> data = io.BytesIO()
>>> zf = ZipFile(data, 'w')
>>> zf.writestr('a.txt', 'content of a')
>>> zf.writestr('b/c.txt', 'content of c')
>>> zf.writestr('b/d/e.txt', 'content of e')
>>> zf.filename = 'abcde.zip'
Path accepts the zipfile object itself or a filename
>>> root = Path(zf)
From there, several path operations are available.
Directory iteration (including the zip file itself):
>>> a, b = root.iterdir()
>>> a
Path('abcde.zip', 'a.txt')
>>> b
Path('abcde.zip', 'b/')
name property:
>>> b.name
'b'
join with divide operator:
>>> c = b / 'c.txt'
>>> c
Path('abcde.zip', 'b/c.txt')
>>> c.name
'c.txt'
Read text:
>>> c.read_text()
'content of c'
existence:
>>> c.exists()
True
>>> (b / 'missing.txt').exists()
False
Coersion to string:
>>> str(c)
'abcde.zip/b/c.txt'
"""
__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
def __init__(self, root, at=""):
self.root = root if isinstance(root, ZipFile) else ZipFile(root)
self.at = at
@property
def open(self):
return functools.partial(self.root.open, self.at)
@property
def name(self):
return posixpath.basename(self.at.rstrip("/"))
def read_text(self, *args, **kwargs):
with self.open() as strm:
return io.TextIOWrapper(strm, *args, **kwargs).read()
def read_bytes(self):
with self.open() as strm:
return strm.read()
def _is_child(self, path):
return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
def _next(self, at):
return Path(self.root, at)
def is_dir(self):
return not self.at or self.at.endswith("/")
def is_file(self):
return not self.is_dir()
def exists(self):
return self.at in self._names()
def iterdir(self):
if not self.is_dir():
raise ValueError("Can't listdir a file")
subs = map(self._next, self._names())
return filter(self._is_child, subs)
def __str__(self):
return posixpath.join(self.root.filename, self.at)
def __repr__(self):
return self.__repr.format(self=self)
def __truediv__(self, add):
next = posixpath.join(self.at, add)
next_dir = posixpath.join(self.at, add, "")
names = self._names()
return self._next(next_dir if next not in names and next_dir in names else next)
@staticmethod
def _add_implied_dirs(names):
return names + [
name + "/"
for name in map(posixpath.dirname, names)
if name and name + "/" not in names
]
def _names(self):
return self._add_implied_dirs(self.root.namelist())
def main(args=None): def main(args=None):
import argparse import argparse
......
Introducing ``zipfile.Path``, a pathlib-compatible wrapper for traversing zip files.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment