Commit b48c5d51 authored by Gregory P. Smith's avatar Gregory P. Smith

Fixes issue19081: When a zipimport .zip file in sys.path being imported

from is modified during the lifetime of the Python process after
zipimport has already opened and cached the zip's table of contents
it now fstat's the file after opening it upon every attempt to access
anything within and will re-read the table of contents if the .zip file
inode, size or mtime have changed.

It would've been nicer to hold any .zip file used by zipimport open for the
duration of the process but that would be more invasive and add an additional
open file descriptor to all zipimport using processes.  It also would likely
not fix the problem on Windows due to different filesystem semantics.
parent 875565bb
import io
import sys
import os
import marshal
......@@ -55,6 +56,27 @@ TESTPACK2 = "ziptestpackage2"
TEMP_ZIP = os.path.abspath("junk95142" + os.extsep + "zip")
def _write_zip_package(zipname, files,
data_to_prepend=b"", compression=ZIP_STORED):
z = ZipFile(zipname, "w")
try:
for name, (mtime, data) in files.items():
zinfo = ZipInfo(name, time.localtime(mtime))
zinfo.compress_type = compression
z.writestr(zinfo, data)
finally:
z.close()
if data_to_prepend:
# Prepend data to the start of the zipfile
with open(zipname, "rb") as f:
zip_data = f.read()
with open(zipname, "wb") as f:
f.write(data_to_prepend)
f.write(zip_data)
class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
compression = ZIP_STORED
......@@ -67,26 +89,9 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
ImportHooksBaseTestCase.setUp(self)
def doTest(self, expected_ext, files, *modules, **kw):
z = ZipFile(TEMP_ZIP, "w")
_write_zip_package(TEMP_ZIP, files, data_to_prepend=kw.get("stuff"),
compression=self.compression)
try:
for name, (mtime, data) in files.items():
zinfo = ZipInfo(name, time.localtime(mtime))
zinfo.compress_type = self.compression
z.writestr(zinfo, data)
z.close()
stuff = kw.get("stuff", None)
if stuff is not None:
# Prepend 'stuff' to the start of the zipfile
f = open(TEMP_ZIP, "rb")
data = f.read()
f.close()
f = open(TEMP_ZIP, "wb")
f.write(stuff)
f.write(data)
f.close()
sys.path.insert(0, TEMP_ZIP)
mod = __import__(".".join(modules), globals(), locals(),
......@@ -101,7 +106,6 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
self.assertEqual(file, os.path.join(TEMP_ZIP,
*modules) + expected_ext)
finally:
z.close()
os.remove(TEMP_ZIP)
def testAFakeZlib(self):
......@@ -387,6 +391,64 @@ class CompressedZipImportTestCase(UncompressedZipImportTestCase):
compression = ZIP_DEFLATED
class ZipFileModifiedAfterImportTestCase(ImportHooksBaseTestCase):
def setUp(self):
zipimport._zip_directory_cache.clear()
zipimport._zip_stat_cache.clear()
ImportHooksBaseTestCase.setUp(self)
def tearDown(self):
ImportHooksBaseTestCase.tearDown(self)
if os.path.exists(TEMP_ZIP):
os.remove(TEMP_ZIP)
def testZipFileChangesAfterFirstImport(self):
"""Alter the zip file after caching its index and try an import."""
packdir = TESTPACK + os.sep
files = {packdir + "__init__" + pyc_ext: (NOW, test_pyc),
packdir + TESTMOD + ".py": (NOW, "test_value = 38\n"),
"ziptest_a.py": (NOW, "test_value = 23\n"),
"ziptest_b.py": (NOW, "test_value = 42\n"),
"ziptest_c.py": (NOW, "test_value = 1337\n")}
zipfile_path = TEMP_ZIP
_write_zip_package(zipfile_path, files)
self.assertTrue(os.path.exists(zipfile_path))
sys.path.insert(0, zipfile_path)
# Import something out of the zipfile and confirm it is correct.
testmod = __import__(TESTPACK + "." + TESTMOD,
globals(), locals(), ["__dummy__"])
self.assertEqual(testmod.test_value, 38)
# Import something else out of the zipfile and confirm it is correct.
ziptest_b = __import__("ziptest_b", globals(), locals(), ["test_value"])
self.assertEqual(ziptest_b.test_value, 42)
# Truncate and fill the zip file with non-zip garbage.
with io.open(zipfile_path, "rb") as orig_zip_file:
orig_zip_file_contents = orig_zip_file.read()
with io.open(zipfile_path, "wb") as byebye_valid_zip_file:
byebye_valid_zip_file.write(b"Tear down this wall!\n"*1987)
# Now that the zipfile has been replaced, import something else from it
# which should fail as the file contents are now garbage.
with self.assertRaises(ImportError):
ziptest_a = __import__("ziptest_a", globals(), locals(),
["test_value"])
# Now lets make it a valid zipfile that has some garbage at the start.
# This alters all of the offsets within the file
with io.open(zipfile_path, "wb") as new_zip_file:
new_zip_file.write(b"X"*1991) # The year Python was created.
new_zip_file.write(orig_zip_file_contents)
# Now that the zip file has been "restored" to a valid but different
# zipfile the zipimporter should *successfully* re-read the new zip
# file's end of file central index and be able to import from it again.
ziptest_a = __import__("ziptest_a", globals(), locals(), ["test_value"])
self.assertEqual(ziptest_a.test_value, 23)
ziptest_c = __import__("ziptest_c", globals(), locals(), ["test_value"])
self.assertEqual(ziptest_c.test_value, 1337)
class BadFileZipImportTestCase(unittest.TestCase):
def assertZipFailure(self, filename):
self.assertRaises(zipimport.ZipImportError,
......@@ -464,6 +526,7 @@ def test_main():
UncompressedZipImportTestCase,
CompressedZipImportTestCase,
BadFileZipImportTestCase,
ZipFileModifiedAfterImportTestCase,
)
finally:
test_support.unlink(TESTMOD)
......
......@@ -42,10 +42,16 @@ struct _zipimporter {
static PyObject *ZipImportError;
static PyObject *zip_directory_cache = NULL;
static PyObject *zip_stat_cache = NULL;
/* posix.fstat or nt.fstat function. Used due to posixmodule.c's
* superior fstat implementation over libc's on Windows. */
static PyObject *fstat_function = NULL; /* posix.fstat() or nt.fstat() */
/* forward decls */
static PyObject *read_directory(char *archive);
static PyObject *get_data(char *archive, PyObject *toc_entry);
static FILE *fopen_rb_and_stat(char *path, PyObject **py_stat_p);
static FILE *safely_reopen_archive(ZipImporter *self, char **archive_p);
static PyObject *read_directory(FILE *fp, char *archive);
static PyObject *get_data(FILE *fp, char *archive, PyObject *toc_entry);
static PyObject *get_module_code(ZipImporter *self, char *fullname,
int *p_ispackage, char **p_modpath);
......@@ -126,12 +132,38 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
PyObject *files;
files = PyDict_GetItemString(zip_directory_cache, path);
if (files == NULL) {
files = read_directory(buf);
if (files == NULL)
PyObject *zip_stat = NULL;
FILE *fp = fopen_rb_and_stat(buf, &zip_stat);
if (fp == NULL) {
PyErr_Format(ZipImportError, "can't open Zip file: "
"'%.200s'", buf);
Py_XDECREF(zip_stat);
return -1;
}
if (Py_VerboseFlag)
PySys_WriteStderr("# zipimport: %s not cached, "
"reading TOC.\n", path);
files = read_directory(fp, buf);
fclose(fp);
if (files == NULL) {
Py_XDECREF(zip_stat);
return -1;
}
if (PyDict_SetItemString(zip_directory_cache, path,
files) != 0)
files) != 0) {
Py_DECREF(files);
Py_XDECREF(zip_stat);
return -1;
}
if (zip_stat && PyDict_SetItemString(zip_stat_cache, path,
zip_stat) != 0) {
Py_DECREF(files);
Py_DECREF(zip_stat);
return -1;
}
Py_XDECREF(zip_stat);
}
else
Py_INCREF(files);
......@@ -419,11 +451,12 @@ static PyObject *
zipimporter_get_data(PyObject *obj, PyObject *args)
{
ZipImporter *self = (ZipImporter *)obj;
char *path;
char *path, *archive;
FILE *fp;
#ifdef ALTSEP
char *p, buf[MAXPATHLEN + 1];
#endif
PyObject *toc_entry;
PyObject *toc_entry, *data;
Py_ssize_t len;
if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
......@@ -448,12 +481,19 @@ zipimporter_get_data(PyObject *obj, PyObject *args)
path = path + len + 1;
}
fp = safely_reopen_archive(self, &archive);
if (fp == NULL)
return NULL;
toc_entry = PyDict_GetItemString(self->files, path);
if (toc_entry == NULL) {
PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
fclose(fp);
return NULL;
}
return get_data(PyString_AsString(self->archive), toc_entry);
data = get_data(fp, archive, toc_entry);
fclose(fp);
return data;
}
static PyObject *
......@@ -473,7 +513,8 @@ zipimporter_get_source(PyObject *obj, PyObject *args)
{
ZipImporter *self = (ZipImporter *)obj;
PyObject *toc_entry;
char *fullname, *subname, path[MAXPATHLEN+1];
FILE *fp;
char *fullname, *subname, path[MAXPATHLEN+1], *archive;
int len;
enum zi_module_info mi;
......@@ -501,13 +542,20 @@ zipimporter_get_source(PyObject *obj, PyObject *args)
else
strcpy(path + len, ".py");
fp = safely_reopen_archive(self, &archive);
if (fp == NULL)
return NULL;
toc_entry = PyDict_GetItemString(self->files, path);
if (toc_entry != NULL)
return get_data(PyString_AsString(self->archive), toc_entry);
if (toc_entry != NULL) {
PyObject *data = get_data(fp, archive, toc_entry);
fclose(fp);
return data;
}
fclose(fp);
/* we have the module, but no source */
Py_INCREF(Py_None);
return Py_None;
Py_RETURN_NONE;
}
PyDoc_STRVAR(doc_find_module,
......@@ -662,7 +710,139 @@ get_long(unsigned char *buf) {
}
/*
read_directory(archive) -> files dict (new reference)
fopen_rb_and_stat(path, &py_stat) -> FILE *
Opens path in "rb" mode and populates the Python py_stat stat_result
with information about the opened file. *py_stat may not be changed
if there is no fstat_function or if fstat_function fails.
Returns NULL and does nothing to *py_stat if the open failed.
*/
static FILE *
fopen_rb_and_stat(char *path, PyObject **py_stat_p)
{
FILE *fp;
assert(py_stat_p != NULL);
assert(*py_stat_p == NULL);
fp = fopen(path, "rb");
if (fp == NULL) {
return NULL;
}
if (fstat_function) {
PyObject *stat_result = PyObject_CallFunction(fstat_function,
"i", fileno(fp));
if (stat_result == NULL) {
PyErr_Clear(); /* We can function without it. */
} else {
*py_stat_p = stat_result;
}
}
return fp;
}
/* Return 1 if objects a and b fail a Py_EQ test for an attr. */
static int
compare_obj_attr_strings(PyObject *obj_a, PyObject *obj_b, char *attr_name)
{
int problem = 0;
PyObject *attr_a = PyObject_GetAttrString(obj_a, attr_name);
PyObject *attr_b = PyObject_GetAttrString(obj_b, attr_name);
if (attr_a == NULL || attr_b == NULL)
problem = 1;
else
problem = (PyObject_RichCompareBool(attr_a, attr_b, Py_EQ) != 1);
Py_XDECREF(attr_a);
Py_XDECREF(attr_b);
return problem;
}
/*
* Returns an open FILE * on success and sets *archive_p to point to
* a read only C string representation of the archive name (as a
* convenience for use in error messages).
*
* Returns NULL on error with the Python error context set.
*/
static FILE *
safely_reopen_archive(ZipImporter *self, char **archive_p)
{
FILE *fp;
PyObject *stat_now = NULL;
char *archive;
assert(archive_p != NULL);
*archive_p = PyString_AsString(self->archive);
if (*archive_p == NULL)
return NULL;
archive = *archive_p;
fp = fopen_rb_and_stat(archive, &stat_now);
if (!fp) {
PyErr_Format(PyExc_IOError,
"zipimport: can not open file %s", archive);
Py_XDECREF(stat_now);
return NULL;
}
if (stat_now != NULL) {
int problem = 0;
PyObject *files;
PyObject *prev_stat = PyDict_GetItemString(zip_stat_cache, archive);
/* Test stat_now vs the old cached stat on some key attributes. */
if (prev_stat != NULL) {
problem = compare_obj_attr_strings(prev_stat, stat_now,
"st_ino");
problem |= compare_obj_attr_strings(prev_stat, stat_now,
"st_size");
problem |= compare_obj_attr_strings(prev_stat, stat_now,
"st_mtime");
} else {
if (Py_VerboseFlag)
PySys_WriteStderr("# zipimport: no stat data for %s!\n",
archive);
problem = 1;
}
if (problem) {
if (Py_VerboseFlag)
PySys_WriteStderr("# zipimport: %s modified since last"
" import, rereading TOC.\n", archive);
files = read_directory(fp, archive);
if (files == NULL) {
Py_DECREF(stat_now);
fclose(fp);
return NULL;
}
if (PyDict_SetItem(zip_directory_cache, self->archive,
files) != 0) {
Py_DECREF(files);
Py_DECREF(stat_now);
fclose(fp);
return NULL;
}
if (stat_now && PyDict_SetItem(zip_stat_cache, self->archive,
stat_now) != 0) {
Py_DECREF(files);
Py_DECREF(stat_now);
fclose(fp);
return NULL;
}
Py_XDECREF(self->files); /* free the old value. */
self->files = files;
} else {
/* No problem, discard the new stat data. */
Py_DECREF(stat_now);
}
} /* stat succeeded */
return fp;
}
/*
read_directory(fp, archive) -> files dict (new reference)
Given a path to a Zip archive, build a dict, mapping file names
(local to the archive, using SEP as a separator) to toc entries.
......@@ -683,10 +863,9 @@ get_long(unsigned char *buf) {
data_size and file_offset are 0.
*/
static PyObject *
read_directory(char *archive)
read_directory(FILE *fp, char *archive)
{
PyObject *files = NULL;
FILE *fp;
long compress, crc, data_size, file_size, file_offset, date, time;
long header_offset, name_size, header_size, header_position;
long i, l, count;
......@@ -696,6 +875,7 @@ read_directory(char *archive)
char *p, endof_central_dir[22];
long arc_offset; /* offset from beginning of file to start of zip-archive */
assert(fp != NULL);
if (strlen(archive) > MAXPATHLEN) {
PyErr_SetString(PyExc_OverflowError,
"Zip path name is too long");
......@@ -703,28 +883,18 @@ read_directory(char *archive)
}
strcpy(path, archive);
fp = fopen(archive, "rb");
if (fp == NULL) {
PyErr_Format(ZipImportError, "can't open Zip file: "
"'%.200s'", archive);
return NULL;
}
if (fseek(fp, -22, SEEK_END) == -1) {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
return NULL;
}
header_position = ftell(fp);
if (fread(endof_central_dir, 1, 22, fp) != 22) {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: "
"'%.200s'", archive);
return NULL;
}
if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
/* Bad: End of Central Dir signature */
fclose(fp);
PyErr_Format(ZipImportError, "not a Zip file: "
"'%.200s'", archive);
return NULL;
......@@ -793,18 +963,15 @@ read_directory(char *archive)
goto error;
count++;
}
fclose(fp);
if (Py_VerboseFlag)
PySys_WriteStderr("# zipimport: found %ld names in %s\n",
count, archive);
return files;
fseek_error:
fclose(fp);
Py_XDECREF(files);
PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
return NULL;
error:
fclose(fp);
Py_XDECREF(files);
return NULL;
}
......@@ -841,14 +1008,13 @@ get_decompress_func(void)
return decompress;
}
/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
/* Given a FILE* to a Zip file and a toc_entry, return the (uncompressed)
data as a new reference. */
static PyObject *
get_data(char *archive, PyObject *toc_entry)
get_data(FILE *fp, char *archive, PyObject *toc_entry)
{
PyObject *raw_data, *data = NULL, *decompress;
char *buf;
FILE *fp;
int err;
Py_ssize_t bytes_read = 0;
long l;
......@@ -862,16 +1028,8 @@ get_data(char *archive, PyObject *toc_entry)
return NULL;
}
fp = fopen(archive, "rb");
if (!fp) {
PyErr_Format(PyExc_IOError,
"zipimport: can not open file %s", archive);
return NULL;
}
/* Check to make sure the local file header is correct */
if (fseek(fp, file_offset, 0) == -1) {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
return NULL;
}
......@@ -882,11 +1040,9 @@ get_data(char *archive, PyObject *toc_entry)
PyErr_Format(ZipImportError,
"bad local file header in %s",
archive);
fclose(fp);
return NULL;
}
if (fseek(fp, file_offset + 26, 0) == -1) {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
return NULL;
}
......@@ -898,7 +1054,6 @@ get_data(char *archive, PyObject *toc_entry)
raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
data_size : data_size + 1);
if (raw_data == NULL) {
fclose(fp);
return NULL;
}
buf = PyString_AsString(raw_data);
......@@ -907,11 +1062,9 @@ get_data(char *archive, PyObject *toc_entry)
if (err == 0) {
bytes_read = fread(buf, 1, data_size, fp);
} else {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
return NULL;
}
fclose(fp);
if (err || bytes_read != data_size) {
PyErr_SetString(PyExc_IOError,
"zipimport: can't read data");
......@@ -1107,17 +1260,13 @@ get_mtime_of_source(ZipImporter *self, char *path)
/* Return the code object for the module named by 'fullname' from the
Zip archive as a new reference. */
static PyObject *
get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
time_t mtime, PyObject *toc_entry)
get_code_from_data(char *archive, FILE *fp, int ispackage,
int isbytecode, time_t mtime, PyObject *toc_entry)
{
PyObject *data, *code;
char *modpath;
char *archive = PyString_AsString(self->archive);
if (archive == NULL)
return NULL;
data = get_data(archive, toc_entry);
data = get_data(fp, archive, toc_entry);
if (data == NULL)
return NULL;
......@@ -1152,12 +1301,19 @@ get_module_code(ZipImporter *self, char *fullname,
for (zso = zip_searchorder; *zso->suffix; zso++) {
PyObject *code = NULL;
FILE *fp;
char *archive;
strcpy(path + len, zso->suffix);
if (Py_VerboseFlag > 1)
PySys_WriteStderr("# trying %s%c%s\n",
PyString_AsString(self->archive),
SEP, path);
fp = safely_reopen_archive(self, &archive);
if (fp == NULL)
return NULL;
toc_entry = PyDict_GetItemString(self->files, path);
if (toc_entry != NULL) {
time_t mtime = 0;
......@@ -1168,9 +1324,10 @@ get_module_code(ZipImporter *self, char *fullname,
mtime = get_mtime_of_source(self, path);
if (p_ispackage != NULL)
*p_ispackage = ispackage;
code = get_code_from_data(self, ispackage,
code = get_code_from_data(archive, fp, ispackage,
isbytecode, mtime,
toc_entry);
fclose(fp);
if (code == Py_None) {
/* bad magic number or non-matching mtime
in byte code, try next */
......@@ -1182,6 +1339,7 @@ get_module_code(ZipImporter *self, char *fullname,
PyTuple_GetItem(toc_entry, 0));
return code;
}
fclose(fp);
}
PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
return NULL;
......@@ -1199,6 +1357,8 @@ This module exports three objects:\n\
subclass of ImportError, so it can be caught as ImportError, too.\n\
- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
info dicts, as used in zipimporter._files.\n\
- _zip_stat_cache: a dict, mapping archive paths to stat_result\n\
info for the .zip the last time anything was imported from it.\n\
\n\
It is usually not needed to use the zipimport module explicitly; it is\n\
used by the builtin import mechanism for sys.path items that are paths\n\
......@@ -1247,6 +1407,7 @@ initzipimport(void)
(PyObject *)&ZipImporter_Type) < 0)
return;
Py_XDECREF(zip_directory_cache); /* Avoid embedded interpreter leaks. */
zip_directory_cache = PyDict_New();
if (zip_directory_cache == NULL)
return;
......@@ -1254,4 +1415,31 @@ initzipimport(void)
if (PyModule_AddObject(mod, "_zip_directory_cache",
zip_directory_cache) < 0)
return;
Py_XDECREF(zip_stat_cache); /* Avoid embedded interpreter leaks. */
zip_stat_cache = PyDict_New();
if (zip_stat_cache == NULL)
return;
Py_INCREF(zip_stat_cache);
if (PyModule_AddObject(mod, "_zip_stat_cache", zip_stat_cache) < 0)
return;
{
/* We cannot import "os" here as that is a .py/.pyc file that could
* live within a zipped up standard library. Import the posix or nt
* builtin that provides the fstat() function we want instead. */
PyObject *os_like_module;
Py_XDECREF(fstat_function); /* Avoid embedded interpreter leaks. */
os_like_module = PyImport_ImportModule("posix");
if (os_like_module == NULL) {
os_like_module = PyImport_ImportModule("nt");
}
if (os_like_module != NULL) {
fstat_function = PyObject_GetAttrString(os_like_module, "fstat");
Py_DECREF(os_like_module);
}
if (fstat_function == NULL) {
PyErr_Clear(); /* non-fatal, we'll go on without it. */
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment