Commit 84521047 authored by Bernhard M. Wiedemann's avatar Bernhard M. Wiedemann Committed by Victor Stinner

bpo-30693: zip+tarfile: sort directory listing (#2263)

tarfile and zipfile now sort directory listing to generate tar and zip archives
in a more reproducible way.

See also https://reproducible-builds.org/docs/stable-inputs/ on that topic.
parent 8d83e4ba
...@@ -451,7 +451,8 @@ be finalized; only the internally used file object will be closed. See the ...@@ -451,7 +451,8 @@ be finalized; only the internally used file object will be closed. See the
(directory, fifo, symbolic link, etc.). If given, *arcname* specifies an (directory, fifo, symbolic link, etc.). If given, *arcname* specifies an
alternative name for the file in the archive. Directories are added alternative name for the file in the archive. Directories are added
recursively by default. This can be avoided by setting *recursive* to recursively by default. This can be avoided by setting *recursive* to
:const:`False`. If *filter* is given, it :const:`False`. Recursion adds entries in sorted order.
If *filter* is given, it
should be a function that takes a :class:`TarInfo` object argument and should be a function that takes a :class:`TarInfo` object argument and
returns the changed :class:`TarInfo` object. If it instead returns returns the changed :class:`TarInfo` object. If it instead returns
:const:`None` the :class:`TarInfo` object will be excluded from the :const:`None` the :class:`TarInfo` object will be excluded from the
...@@ -460,6 +461,9 @@ be finalized; only the internally used file object will be closed. See the ...@@ -460,6 +461,9 @@ be finalized; only the internally used file object will be closed. See the
.. versionchanged:: 3.2 .. versionchanged:: 3.2
Added the *filter* parameter. Added the *filter* parameter.
.. versionchanged:: 3.7
Recursion adds entries in sorted order.
.. method:: TarFile.addfile(tarinfo, fileobj=None) .. method:: TarFile.addfile(tarinfo, fileobj=None)
......
...@@ -491,7 +491,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the ...@@ -491,7 +491,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the
:file:`\*.pyc` are added at the top level. If the directory is a :file:`\*.pyc` are added at the top level. If the directory is a
package directory, then all :file:`\*.pyc` are added under the package package directory, then all :file:`\*.pyc` are added under the package
name as a file path, and if any subdirectories are package directories, name as a file path, and if any subdirectories are package directories,
all of these are added recursively. all of these are added recursively in sorted order.
*basename* is intended for internal use only. *basename* is intended for internal use only.
...@@ -524,6 +524,9 @@ The :class:`PyZipFile` constructor takes the same parameters as the ...@@ -524,6 +524,9 @@ The :class:`PyZipFile` constructor takes the same parameters as the
.. versionchanged:: 3.6.2 .. versionchanged:: 3.6.2
The *pathname* parameter accepts a :term:`path-like object`. The *pathname* parameter accepts a :term:`path-like object`.
.. versionchanged:: 3.7
Recursion sorts directory entries.
.. _zipinfo-objects: .. _zipinfo-objects:
......
...@@ -1943,7 +1943,7 @@ class TarFile(object): ...@@ -1943,7 +1943,7 @@ class TarFile(object):
elif tarinfo.isdir(): elif tarinfo.isdir():
self.addfile(tarinfo) self.addfile(tarinfo)
if recursive: if recursive:
for f in os.listdir(name): for f in sorted(os.listdir(name)):
self.add(os.path.join(name, f), os.path.join(arcname, f), self.add(os.path.join(name, f), os.path.join(arcname, f),
recursive, filter=filter) recursive, filter=filter)
......
...@@ -1129,6 +1129,30 @@ class WriteTest(WriteTestBase, unittest.TestCase): ...@@ -1129,6 +1129,30 @@ class WriteTest(WriteTestBase, unittest.TestCase):
finally: finally:
support.rmdir(path) support.rmdir(path)
# mock the following:
# os.listdir: so we know that files are in the wrong order
@unittest.mock.patch('os.listdir')
def test_ordered_recursion(self, mock_listdir):
path = os.path.join(TEMPDIR, "directory")
os.mkdir(path)
open(os.path.join(path, "1"), "a").close()
open(os.path.join(path, "2"), "a").close()
mock_listdir.return_value = ["2", "1"]
try:
tar = tarfile.open(tmpname, self.mode)
try:
tar.add(path)
paths = []
for m in tar.getmembers():
paths.append(os.path.split(m.name)[-1])
self.assertEqual(paths, ["directory", "1", "2"]);
finally:
tar.close()
finally:
support.unlink(os.path.join(path, "1"))
support.unlink(os.path.join(path, "2"))
support.rmdir(path)
def test_gettarinfo_pathlike_name(self): def test_gettarinfo_pathlike_name(self):
with tarfile.open(tmpname, self.mode) as tar: with tarfile.open(tmpname, self.mode) as tar:
path = pathlib.Path(TEMPDIR) / "file" path = pathlib.Path(TEMPDIR) / "file"
......
...@@ -1940,7 +1940,7 @@ class PyZipFile(ZipFile): ...@@ -1940,7 +1940,7 @@ class PyZipFile(ZipFile):
if self.debug: if self.debug:
print("Adding", arcname) print("Adding", arcname)
self.write(fname, arcname) self.write(fname, arcname)
dirlist = os.listdir(pathname) dirlist = sorted(os.listdir(pathname))
dirlist.remove("__init__.py") dirlist.remove("__init__.py")
# Add all *.py files and package subdirectories # Add all *.py files and package subdirectories
for filename in dirlist: for filename in dirlist:
...@@ -1965,7 +1965,7 @@ class PyZipFile(ZipFile): ...@@ -1965,7 +1965,7 @@ class PyZipFile(ZipFile):
# This is NOT a package directory, add its files at top level # This is NOT a package directory, add its files at top level
if self.debug: if self.debug:
print("Adding files from directory", pathname) print("Adding files from directory", pathname)
for filename in os.listdir(pathname): for filename in sorted(os.listdir(pathname)):
path = os.path.join(pathname, filename) path = os.path.join(pathname, filename)
root, ext = os.path.splitext(filename) root, ext = os.path.splitext(filename)
if ext == ".py": if ext == ".py":
...@@ -2116,7 +2116,7 @@ def main(args=None): ...@@ -2116,7 +2116,7 @@ def main(args=None):
elif os.path.isdir(path): elif os.path.isdir(path):
if zippath: if zippath:
zf.write(path, zippath) zf.write(path, zippath)
for nm in os.listdir(path): for nm in sorted(os.listdir(path)):
addToZip(zf, addToZip(zf,
os.path.join(path, nm), os.path.join(zippath, nm)) os.path.join(path, nm), os.path.join(zippath, nm))
# else: ignore # else: ignore
......
The ZipFile class now recurses directories in a reproducible way.
The TarFile class now recurses directories in a reproducible way.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment