Commit 2bcbc141 authored by Gregory P. Smith's avatar Gregory P. Smith

Fixes Issue #19081: When a zipimport .zip file in sys.path being imported from

is modified during the lifetime of the Python process after zipimport has
already cached the zip's table of contents we detect this and recover
rather than read bad data from the .zip (causing odd import errors).
parent a21acb5d
......@@ -46,6 +46,27 @@ pyc_file = imp.cache_from_source(TESTMOD + '.py')
pyc_ext = ('.pyc' if __debug__ else '.pyo')
def _write_zip_package(zipname, files,
data_to_prepend=b"", compression=ZIP_STORED):
z = ZipFile(zipname, "w")
try:
for name, (mtime, data) in files.items():
zinfo = ZipInfo(name, time.localtime(mtime))
zinfo.compress_type = compression
z.writestr(zinfo, data)
finally:
z.close()
if data_to_prepend:
# Prepend data to the start of the zipfile
with open(zipname, "rb") as f:
zip_data = f.read()
with open(zipname, "wb") as f:
f.write(data_to_prepend)
f.write(zip_data)
class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
compression = ZIP_STORED
......@@ -58,23 +79,9 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
ImportHooksBaseTestCase.setUp(self)
def doTest(self, expected_ext, files, *modules, **kw):
z = ZipFile(TEMP_ZIP, "w")
_write_zip_package(TEMP_ZIP, files, data_to_prepend=kw.get("stuff"),
compression=self.compression)
try:
for name, (mtime, data) in files.items():
zinfo = ZipInfo(name, time.localtime(mtime))
zinfo.compress_type = self.compression
z.writestr(zinfo, data)
z.close()
stuff = kw.get("stuff", None)
if stuff is not None:
# Prepend 'stuff' to the start of the zipfile
with open(TEMP_ZIP, "rb") as f:
data = f.read()
with open(TEMP_ZIP, "wb") as f:
f.write(stuff)
f.write(data)
sys.path.insert(0, TEMP_ZIP)
mod = __import__(".".join(modules), globals(), locals(),
......@@ -89,7 +96,8 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
self.assertEqual(file, os.path.join(TEMP_ZIP,
*modules) + expected_ext)
finally:
z.close()
while TEMP_ZIP in sys.path:
sys.path.remove(TEMP_ZIP)
os.remove(TEMP_ZIP)
def testAFakeZlib(self):
......@@ -395,10 +403,67 @@ class CompressedZipImportTestCase(UncompressedZipImportTestCase):
compression = ZIP_DEFLATED
class ZipFileModifiedAfterImportTestCase(ImportHooksBaseTestCase):
def setUp(self):
zipimport._zip_directory_cache.clear()
zipimport._zip_stat_cache.clear()
ImportHooksBaseTestCase.setUp(self)
def tearDown(self):
ImportHooksBaseTestCase.tearDown(self)
if os.path.exists(TEMP_ZIP):
os.remove(TEMP_ZIP)
def testZipFileChangesAfterFirstImport(self):
"""Alter the zip file after caching its index and try an import."""
packdir = TESTPACK + os.sep
files = {packdir + "__init__" + pyc_ext: (NOW, test_pyc),
packdir + TESTMOD + ".py": (NOW, "test_value = 38\n"),
"ziptest_a.py": (NOW, "test_value = 23\n"),
"ziptest_b.py": (NOW, "test_value = 42\n"),
"ziptest_c.py": (NOW, "test_value = 1337\n")}
zipfile_path = TEMP_ZIP
_write_zip_package(zipfile_path, files)
self.assertTrue(os.path.exists(zipfile_path))
sys.path.insert(0, zipfile_path)
# Import something out of the zipfile and confirm it is correct.
testmod = __import__(TESTPACK + "." + TESTMOD,
globals(), locals(), ["__dummy__"])
self.assertEqual(testmod.test_value, 38)
# Import something else out of the zipfile and confirm it is correct.
ziptest_b = __import__("ziptest_b", globals(), locals(), ["test_value"])
self.assertEqual(ziptest_b.test_value, 42)
# Truncate and fill the zip file with non-zip garbage.
with open(zipfile_path, "rb") as orig_zip_file:
orig_zip_file_contents = orig_zip_file.read()
with open(zipfile_path, "wb") as byebye_valid_zip_file:
byebye_valid_zip_file.write(b"Tear down this wall!\n"*1987)
# Now that the zipfile has been replaced, import something else from it
# which should fail as the file contents are now garbage.
with self.assertRaises(ImportError):
ziptest_a = __import__("ziptest_a", globals(), locals(),
["test_value"])
self.assertEqual(ziptest_a.test_value, 23)
# Now lets make it a valid zipfile that has some garbage at the start.
# This alters all of the offsets within the file
with open(zipfile_path, "wb") as new_zip_file:
new_zip_file.write(b"X"*1991) # The year Python was created.
new_zip_file.write(orig_zip_file_contents)
# Now that the zip file has been "restored" to a valid but different
# zipfile the zipimporter should *successfully* re-read the new zip
# file's end of file central index and be able to import from it again.
ziptest_c = __import__("ziptest_c", globals(), locals(), ["test_value"])
self.assertEqual(ziptest_c.test_value, 1337)
class BadFileZipImportTestCase(unittest.TestCase):
def assertZipFailure(self, filename):
self.assertRaises(zipimport.ZipImportError,
zipimport.zipimporter, filename)
with self.assertRaises(zipimport.ZipImportError):
zipimport.zipimporter(filename)
def testNoFile(self):
self.assertZipFailure('AdfjdkFJKDFJjdklfjs')
......@@ -472,6 +537,7 @@ def test_main():
UncompressedZipImportTestCase,
CompressedZipImportTestCase,
BadFileZipImportTestCase,
ZipFileModifiedAfterImportTestCase,
)
finally:
support.unlink(TESTMOD)
......
......@@ -10,6 +10,11 @@ What's New in Python 3.3.4 release candidate 1?
Core and Builtins
-----------------
- Issue #19081: When a zipimport .zip file in sys.path being imported from
is modified during the lifetime of the Python process after zipimport has
already cached the zip's table of contents we detect this and recover
rather than read bad data from the .zip (causing odd import errors).
- Issue #17432: Drop UCS2 from names of Unicode functions in python3.def.
- Issue #19969: PyBytes_FromFormatV() now raises an OverflowError if "%c"
......
......@@ -45,10 +45,16 @@ struct _zipimporter {
static PyObject *ZipImportError;
/* read_directory() cache */
static PyObject *zip_directory_cache = NULL;
static PyObject *zip_stat_cache = NULL;
/* posix.fstat or nt.fstat function. Used due to posixmodule.c's
* superior fstat implementation over libc's on Windows. */
static PyObject *fstat_function = NULL; /* posix.fstat() or nt.fstat() */
/* forward decls */
static PyObject *read_directory(PyObject *archive);
static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
static FILE *fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p);
static FILE *safely_reopen_archive(ZipImporter *self);
static PyObject *read_directory(FILE *fp, PyObject *archive);
static PyObject *get_data(FILE *fp, PyObject *archive, PyObject *toc_entry);
static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
int *p_ispackage, PyObject **p_modpath);
......@@ -128,11 +134,39 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
files = PyDict_GetItem(zip_directory_cache, filename);
if (files == NULL) {
files = read_directory(filename);
if (files == NULL)
PyObject *zip_stat = NULL;
FILE *fp = fopen_rb_and_stat(filename, &zip_stat);
if (fp == NULL) {
if (!PyErr_Occurred())
PyErr_Format(ZipImportError, "can't open Zip file: %R",
filename);
Py_XDECREF(zip_stat);
goto error;
if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
}
if (Py_VerboseFlag)
PySys_FormatStderr("# zipimport: %U not cached, "
"reading TOC.\n", filename);
files = read_directory(fp, filename);
fclose(fp);
if (files == NULL) {
Py_XDECREF(zip_stat);
goto error;
}
if (PyDict_SetItem(zip_directory_cache, filename, files) != 0) {
Py_DECREF(files);
Py_XDECREF(zip_stat);
goto error;
}
if (zip_stat && PyDict_SetItem(zip_stat_cache, filename,
zip_stat) != 0) {
Py_DECREF(files);
Py_DECREF(zip_stat);
goto error;
}
Py_XDECREF(zip_stat);
}
else
Py_INCREF(files);
......@@ -554,10 +588,11 @@ zipimporter_get_data(PyObject *obj, PyObject *args)
{
ZipImporter *self = (ZipImporter *)obj;
PyObject *path, *key;
FILE *fp;
#ifdef ALTSEP
_Py_IDENTIFIER(replace);
#endif
PyObject *toc_entry;
PyObject *toc_entry, *data;
Py_ssize_t path_start, path_len, len;
if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path))
......@@ -585,15 +620,23 @@ zipimporter_get_data(PyObject *obj, PyObject *args)
key = PyUnicode_Substring(path, path_start, path_len);
if (key == NULL)
goto error;
fp = safely_reopen_archive(self);
if (fp == NULL)
goto error;
toc_entry = PyDict_GetItem(self->files, key);
if (toc_entry == NULL) {
PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
Py_DECREF(key);
fclose(fp);
goto error;
}
Py_DECREF(key);
Py_DECREF(path);
return get_data(self->archive, toc_entry);
data = get_data(fp, self->archive, toc_entry);
fclose(fp);
return data;
error:
Py_DECREF(path);
return NULL;
......@@ -618,6 +661,7 @@ zipimporter_get_source(PyObject *obj, PyObject *args)
PyObject *toc_entry;
PyObject *fullname, *subname, *path, *fullpath;
enum zi_module_info mi;
FILE *fp;
if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
return NULL;
......@@ -647,11 +691,18 @@ zipimporter_get_source(PyObject *obj, PyObject *args)
if (fullpath == NULL)
return NULL;
fp = safely_reopen_archive(self);
if (fp == NULL) {
Py_DECREF(fullpath);
return NULL;
}
toc_entry = PyDict_GetItem(self->files, fullpath);
Py_DECREF(fullpath);
if (toc_entry != NULL) {
PyObject *res, *bytes;
bytes = get_data(self->archive, toc_entry);
bytes = get_data(fp, self->archive, toc_entry);
fclose(fp);
if (bytes == NULL)
return NULL;
res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
......@@ -659,10 +710,10 @@ zipimporter_get_source(PyObject *obj, PyObject *args)
Py_DECREF(bytes);
return res;
}
fclose(fp);
/* we have the module, but no source */
Py_INCREF(Py_None);
return Py_None;
Py_RETURN_NONE;
}
PyDoc_STRVAR(doc_find_module,
......@@ -828,10 +879,135 @@ get_long(unsigned char *buf) {
return x;
}
/* Return 1 if objects a and b fail a Py_EQ test for an attr. */
static int
compare_obj_attr_strings(PyObject *obj_a, PyObject *obj_b, char *attr_name)
{
int problem = 0;
PyObject *attr_a = PyObject_GetAttrString(obj_a, attr_name);
PyObject *attr_b = PyObject_GetAttrString(obj_b, attr_name);
if (attr_a == NULL || attr_b == NULL)
problem = 1;
else
problem = (PyObject_RichCompareBool(attr_a, attr_b, Py_EQ) != 1);
Py_XDECREF(attr_a);
Py_XDECREF(attr_b);
return problem;
}
/*
* Returns an open FILE * on success.
* Returns NULL on error with the Python error context set.
*/
static FILE *
safely_reopen_archive(ZipImporter *self)
{
FILE *fp;
PyObject *stat_now = NULL;
fp = fopen_rb_and_stat(self->archive, &stat_now);
if (!fp) {
PyErr_Format(ZipImportError,
"zipimport: can not open file %U", self->archive);
Py_XDECREF(stat_now);
return NULL;
}
if (stat_now != NULL) {
int problem = 0;
PyObject *files;
PyObject *prev_stat = PyDict_GetItem(zip_stat_cache, self->archive);
/* Test stat_now vs the old cached stat on some key attributes. */
if (prev_stat != NULL) {
problem = compare_obj_attr_strings(prev_stat, stat_now,
"st_ino");
problem |= compare_obj_attr_strings(prev_stat, stat_now,
"st_size");
problem |= compare_obj_attr_strings(prev_stat, stat_now,
"st_mtime");
} else {
if (Py_VerboseFlag)
PySys_FormatStderr("# zipimport: no stat data for %U!\n",
self->archive);
problem = 1;
}
if (problem) {
if (Py_VerboseFlag)
PySys_FormatStderr("# zipimport: %U modified since last"
" import, rereading TOC.\n", self->archive);
files = read_directory(fp, self->archive);
if (files == NULL) {
Py_DECREF(stat_now);
fclose(fp);
return NULL;
}
if (PyDict_SetItem(zip_directory_cache, self->archive,
files) != 0) {
Py_DECREF(files);
Py_DECREF(stat_now);
fclose(fp);
return NULL;
}
if (stat_now && PyDict_SetItem(zip_stat_cache, self->archive,
stat_now) != 0) {
Py_DECREF(files);
Py_DECREF(stat_now);
fclose(fp);
return NULL;
}
Py_XDECREF(self->files); /* free the old value. */
self->files = files;
} else {
/* No problem, discard the new stat data. */
Py_DECREF(stat_now);
}
} /* stat succeeded */
return fp;
}
/*
fopen_rb_and_stat(path, &py_stat) -> FILE *
Opens path in "rb" mode and populates the Python py_stat stat_result
with information about the opened file. *py_stat may not be changed
if there is no fstat_function or if fstat_function fails.
Returns NULL and does nothing to *py_stat if the open failed.
*/
static FILE *
fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p)
{
FILE *fp;
assert(py_stat_p != NULL);
assert(*py_stat_p == NULL);
fp = _Py_fopen(path, "rb");
if (fp == NULL) {
if (!PyErr_Occurred())
PyErr_Format(ZipImportError,
"zipimport: can not open file %U", path);
return NULL;
}
if (fstat_function) {
PyObject *stat_result = PyObject_CallFunction(fstat_function,
"i", fileno(fp));
if (stat_result == NULL) {
PyErr_Clear(); /* We can function without it. */
} else {
*py_stat_p = stat_result;
}
}
return fp;
}
/*
read_directory(archive) -> files dict (new reference)
read_directory(fp, archive) -> files dict (new reference)
Given a path to a Zip archive, build a dict, mapping file names
Given an open Zip archive, build a dict, mapping file names
(local to the archive, using SEP as a separator) to toc entries.
A toc_entry is a tuple:
......@@ -851,10 +1027,9 @@ get_long(unsigned char *buf) {
data_size and file_offset are 0.
*/
static PyObject *
read_directory(PyObject *archive)
read_directory(FILE *fp, PyObject *archive)
{
PyObject *files = NULL;
FILE *fp;
unsigned short flags;
short compress, time, date, name_size;
long crc, data_size, file_size, header_size;
......@@ -869,27 +1044,18 @@ read_directory(PyObject *archive)
const char *charset;
int bootstrap;
fp = _Py_fopen(archive, "rb");
if (fp == NULL) {
if (!PyErr_Occurred())
PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
return NULL;
}
assert(fp != NULL);
if (fseek(fp, -22, SEEK_END) == -1) {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
header_position = ftell(fp);
if (fread(endof_central_dir, 1, 22, fp) != 22) {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
/* Bad: End of Central Dir signature */
fclose(fp);
PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
return NULL;
}
......@@ -983,19 +1149,16 @@ read_directory(PyObject *archive)
goto error;
count++;
}
fclose(fp);
if (Py_VerboseFlag)
PySys_FormatStderr("# zipimport: found %ld names in %R\n",
count, archive);
return files;
fseek_error:
fclose(fp);
Py_XDECREF(files);
Py_XDECREF(nameobj);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
error:
fclose(fp);
Py_XDECREF(files);
Py_XDECREF(nameobj);
return NULL;
......@@ -1034,14 +1197,13 @@ get_decompress_func(void)
return decompress;
}
/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
/* Given a FILE* to a Zip file and a toc_entry, return the (uncompressed)
data as a new reference. */
static PyObject *
get_data(PyObject *archive, PyObject *toc_entry)
get_data(FILE *fp, PyObject *archive, PyObject *toc_entry)
{
PyObject *raw_data, *data = NULL, *decompress;
char *buf;
FILE *fp;
int err;
Py_ssize_t bytes_read = 0;
long l;
......@@ -1055,17 +1217,8 @@ get_data(PyObject *archive, PyObject *toc_entry)
return NULL;
}
fp = _Py_fopen(archive, "rb");
if (!fp) {
if (!PyErr_Occurred())
PyErr_Format(PyExc_IOError,
"zipimport: can not open file %U", archive);
return NULL;
}
/* Check to make sure the local file header is correct */
if (fseek(fp, file_offset, 0) == -1) {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
......@@ -1076,11 +1229,9 @@ get_data(PyObject *archive, PyObject *toc_entry)
PyErr_Format(ZipImportError,
"bad local file header in %U",
archive);
fclose(fp);
return NULL;
}
if (fseek(fp, file_offset + 26, 0) == -1) {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
......@@ -1095,7 +1246,6 @@ get_data(PyObject *archive, PyObject *toc_entry)
raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
if (raw_data == NULL) {
fclose(fp);
return NULL;
}
buf = PyBytes_AsString(raw_data);
......@@ -1104,11 +1254,9 @@ get_data(PyObject *archive, PyObject *toc_entry)
if (err == 0) {
bytes_read = fread(buf, 1, data_size, fp);
} else {
fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
fclose(fp);
if (err || bytes_read != data_size) {
PyErr_SetString(PyExc_IOError,
"zipimport: can't read data");
......@@ -1329,12 +1477,12 @@ get_mtime_of_source(ZipImporter *self, PyObject *path)
/* Return the code object for the module named by 'fullname' from the
Zip archive as a new reference. */
static PyObject *
get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
get_code_from_data(ZipImporter *self, FILE *fp, int ispackage, int isbytecode,
time_t mtime, PyObject *toc_entry)
{
PyObject *data, *modpath, *code;
data = get_data(self->archive, toc_entry);
data = get_data(fp, self->archive, toc_entry);
if (data == NULL)
return NULL;
......@@ -1356,6 +1504,7 @@ get_module_code(ZipImporter *self, PyObject *fullname,
PyObject *code = NULL, *toc_entry, *subname;
PyObject *path, *fullpath = NULL;
struct st_zip_searchorder *zso;
FILE *fp;
subname = get_subname(fullname);
if (subname == NULL)
......@@ -1366,6 +1515,12 @@ get_module_code(ZipImporter *self, PyObject *fullname,
if (path == NULL)
return NULL;
fp = safely_reopen_archive(self);
if (fp == NULL) {
Py_DECREF(path);
return NULL;
}
for (zso = zip_searchorder; *zso->suffix; zso++) {
code = NULL;
......@@ -1376,6 +1531,7 @@ get_module_code(ZipImporter *self, PyObject *fullname,
if (Py_VerboseFlag > 1)
PySys_FormatStderr("# trying %U%c%U\n",
self->archive, (int)SEP, fullpath);
toc_entry = PyDict_GetItem(self->files, fullpath);
if (toc_entry != NULL) {
time_t mtime = 0;
......@@ -1391,7 +1547,7 @@ get_module_code(ZipImporter *self, PyObject *fullname,
Py_CLEAR(fullpath);
if (p_ispackage != NULL)
*p_ispackage = ispackage;
code = get_code_from_data(self, ispackage,
code = get_code_from_data(self, fp, ispackage,
isbytecode, mtime,
toc_entry);
if (code == Py_None) {
......@@ -1411,6 +1567,7 @@ get_module_code(ZipImporter *self, PyObject *fullname,
}
PyErr_Format(ZipImportError, "can't find module %R", fullname);
exit:
fclose(fp);
Py_DECREF(path);
Py_XDECREF(fullpath);
return code;
......@@ -1428,6 +1585,8 @@ This module exports three objects:\n\
subclass of ImportError, so it can be caught as ImportError, too.\n\
- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
info dicts, as used in zipimporter._files.\n\
- _zip_stat_cache: a dict, mapping archive paths to stat_result\n\
info for the .zip the last time anything was imported from it.\n\
\n\
It is usually not needed to use the zipimport module explicitly; it is\n\
used by the builtin import mechanism for sys.path items that are paths\n\
......@@ -1487,6 +1646,7 @@ PyInit_zipimport(void)
(PyObject *)&ZipImporter_Type) < 0)
return NULL;
Py_XDECREF(zip_directory_cache); /* Avoid embedded interpreter leaks. */
zip_directory_cache = PyDict_New();
if (zip_directory_cache == NULL)
return NULL;
......@@ -1494,5 +1654,36 @@ PyInit_zipimport(void)
if (PyModule_AddObject(mod, "_zip_directory_cache",
zip_directory_cache) < 0)
return NULL;
Py_XDECREF(zip_stat_cache); /* Avoid embedded interpreter leaks. */
zip_stat_cache = PyDict_New();
if (zip_stat_cache == NULL)
return NULL;
Py_INCREF(zip_stat_cache);
if (PyModule_AddObject(mod, "_zip_stat_cache", zip_stat_cache) < 0)
return NULL;
{
/* We cannot import "os" here as that is a .py/.pyc file that could
* live within a zipped up standard library. Import the posix or nt
* builtin that provides the fstat() function we want instead. */
PyObject *os_like_module;
Py_CLEAR(fstat_function); /* Avoid embedded interpreter leaks. */
os_like_module = PyImport_ImportModule("posix");
if (os_like_module == NULL) {
PyErr_Clear();
os_like_module = PyImport_ImportModule("nt");
}
if (os_like_module != NULL) {
fstat_function = PyObject_GetAttrString(os_like_module, "fstat");
Py_DECREF(os_like_module);
}
if (fstat_function == NULL) {
PyErr_Clear(); /* non-fatal, we'll go on without it. */
if (Py_VerboseFlag)
PySys_WriteStderr("# zipimport unable to use os.fstat().\n");
}
}
return mod;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment