Commit c541f8ef authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new...

Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new importlib.invalidate_caches() function.

importlib is now often faster than imp.find_module() at finding modules.
parent 336b2f45
...@@ -86,6 +86,14 @@ Functions ...@@ -86,6 +86,14 @@ Functions
that was imported (e.g. ``pkg.mod``), while :func:`__import__` returns the that was imported (e.g. ``pkg.mod``), while :func:`__import__` returns the
top-level package or module (e.g. ``pkg``). top-level package or module (e.g. ``pkg``).
.. function:: invalidate_caches()
Invalidate importlib's internal caches. Calling this function may be
needed if some modules are installed while your program is running and
you expect the program to notice the changes.
.. versionadded:: 3.3
:mod:`importlib.abc` -- Abstract base classes related to import :mod:`importlib.abc` -- Abstract base classes related to import
--------------------------------------------------------------- ---------------------------------------------------------------
......
...@@ -18,7 +18,7 @@ References on import: ...@@ -18,7 +18,7 @@ References on import:
http://www.python.org/dev/peps/pep-0328 http://www.python.org/dev/peps/pep-0328
""" """
__all__ = ['__import__', 'import_module'] __all__ = ['__import__', 'import_module', 'invalidate_caches']
from . import _bootstrap from . import _bootstrap
...@@ -37,7 +37,7 @@ _bootstrap._setup(sys, imp) ...@@ -37,7 +37,7 @@ _bootstrap._setup(sys, imp)
# Public API ######################################################### # Public API #########################################################
from ._bootstrap import __import__ from ._bootstrap import __import__, invalidate_caches
def import_module(name, package=None): def import_module(name, package=None):
......
...@@ -21,31 +21,16 @@ work. One should use importlib as the public-facing version of this module. ...@@ -21,31 +21,16 @@ work. One should use importlib as the public-facing version of this module.
CASE_INSENSITIVE_PLATFORMS = 'win', 'cygwin', 'darwin' CASE_INSENSITIVE_PLATFORMS = 'win', 'cygwin', 'darwin'
def _case_insensitive_ok(directory, check):
"""Check if the directory contains something matching 'check' exists in the
directory.
If PYTHONCASEOK is a defined environment variable then skip the def _relax_case():
case-sensitivity check. """True if filenames must be checked case-insensitively."""
if any(map(sys.platform.startswith, CASE_INSENSITIVE_PLATFORMS)):
""" def _relax_case():
if b'PYTHONCASEOK' not in _os.environ: return b'PYTHONCASEOK' in _os.environ
if not directory:
directory = '.'
return check in _os.listdir(directory)
else: else:
return True def _relax_case():
return False
def _case_sensitive_ok(directory, check): return _relax_case
"""Under case-sensitive filesystems always assume the case matches.
Since other code does the file existence check, that subsumes a
case-sensitivity check.
"""
return True
_case_ok = None
# TODO: Expose from marshal # TODO: Expose from marshal
...@@ -172,6 +157,18 @@ code_type = type(_wrap.__code__) ...@@ -172,6 +157,18 @@ code_type = type(_wrap.__code__)
# Finder/loader utility code ################################################## # Finder/loader utility code ##################################################
_cache_refresh = 0
def invalidate_caches():
"""Invalidate importlib's internal caches.
Calling this function may be needed if some modules are installed while
your program is running and you expect the program to notice the changes.
"""
global _cache_refresh
_cache_refresh += 1
def set_package(fxn): def set_package(fxn):
"""Set __package__ on the returned module.""" """Set __package__ on the returned module."""
def set_package_wrapper(*args, **kwargs): def set_package_wrapper(*args, **kwargs):
...@@ -708,7 +705,7 @@ class PathFinder: ...@@ -708,7 +705,7 @@ class PathFinder:
""" """
if path == '': if path == '':
path = _os.getcwd() path = '.'
try: try:
finder = sys.path_importer_cache[path] finder = sys.path_importer_cache[path]
except KeyError: except KeyError:
...@@ -760,29 +757,55 @@ class _FileFinder: ...@@ -760,29 +757,55 @@ class _FileFinder:
for suffix in detail.suffixes) for suffix in detail.suffixes)
self.packages = packages self.packages = packages
self.modules = modules self.modules = modules
self.path = path # Base (directory) path
self.path = path or '.'
self._path_mtime = -1
self._path_cache = set()
self._cache_refresh = 0
def find_module(self, fullname): def find_module(self, fullname):
"""Try to find a loader for the specified module.""" """Try to find a loader for the specified module."""
tail_module = fullname.rpartition('.')[2] tail_module = fullname.rpartition('.')[2]
if _relax_case():
tail_module = tail_module.lower()
try:
mtime = _os.stat(self.path).st_mtime
except OSError:
mtime = -1
if mtime != self._path_mtime or _cache_refresh != self._cache_refresh:
self._fill_cache()
self._path_mtime = mtime
self._cache_refresh = _cache_refresh
cache = self._path_cache
if tail_module in cache:
base_path = _path_join(self.path, tail_module) base_path = _path_join(self.path, tail_module)
if _path_isdir(base_path) and _case_ok(self.path, tail_module): if _path_isdir(base_path):
for suffix, loader in self.packages: for suffix, loader in self.packages:
init_filename = '__init__' + suffix init_filename = '__init__' + suffix
full_path = _path_join(base_path, init_filename) full_path = _path_join(base_path, init_filename)
if (_path_isfile(full_path) and if _path_isfile(full_path):
_case_ok(base_path, init_filename)):
return loader(fullname, full_path) return loader(fullname, full_path)
else: else:
msg = "Not importing directory {}: missing __init__" msg = "Not importing directory {}: missing __init__"
_warnings.warn(msg.format(base_path), ImportWarning) _warnings.warn(msg.format(base_path), ImportWarning)
for suffix, loader in self.modules: for suffix, loader in self.modules:
mod_filename = tail_module + suffix mod_filename = tail_module + suffix
if mod_filename in cache:
full_path = _path_join(self.path, mod_filename) full_path = _path_join(self.path, mod_filename)
if _path_isfile(full_path) and _case_ok(self.path, mod_filename): if _path_isfile(full_path):
return loader(fullname, full_path) return loader(fullname, full_path)
return None return None
def _fill_cache(self):
"""Fill the cache of potential modules and packages for this directory."""
path = self.path
contents = _os.listdir(path)
if _relax_case():
self._path_cache = set(fn.lower() for fn in contents)
else:
self._path_cache = set(contents)
class _SourceFinderDetails: class _SourceFinderDetails:
loader = _SourceFileLoader loader = _SourceFileLoader
...@@ -1060,7 +1083,7 @@ def _setup(sys_module, imp_module): ...@@ -1060,7 +1083,7 @@ def _setup(sys_module, imp_module):
modules, those two modules must be explicitly passed in. modules, those two modules must be explicitly passed in.
""" """
global _case_ok, imp, sys global imp, sys
imp = imp_module imp = imp_module
sys = sys_module sys = sys_module
...@@ -1093,12 +1116,8 @@ def _setup(sys_module, imp_module): ...@@ -1093,12 +1116,8 @@ def _setup(sys_module, imp_module):
raise ImportError('importlib requires posix or nt') raise ImportError('importlib requires posix or nt')
setattr(self_module, '_os', os_module) setattr(self_module, '_os', os_module)
setattr(self_module, 'path_sep', path_sep) setattr(self_module, 'path_sep', path_sep)
# Constants
if any(sys_module.platform.startswith(x) setattr(self_module, '_relax_case', _relax_case())
for x in CASE_INSENSITIVE_PLATFORMS):
_case_ok = _case_insensitive_ok
else:
_case_ok = _case_sensitive_ok
def _install(sys_module, imp_module): def _install(sys_module, imp_module):
......
...@@ -78,11 +78,11 @@ class FinderTests(unittest.TestCase): ...@@ -78,11 +78,11 @@ class FinderTests(unittest.TestCase):
path = '' path = ''
module = '<test module>' module = '<test module>'
importer = util.mock_modules(module) importer = util.mock_modules(module)
hook = import_util.mock_path_hook(os.getcwd(), importer=importer) hook = import_util.mock_path_hook(os.curdir, importer=importer)
with util.import_state(path=[path], path_hooks=[hook]): with util.import_state(path=[path], path_hooks=[hook]):
loader = machinery.PathFinder.find_module(module) loader = machinery.PathFinder.find_module(module)
self.assertIs(loader, importer) self.assertIs(loader, importer)
self.assertIn(os.getcwd(), sys.path_importer_cache) self.assertIn(os.curdir, sys.path_importer_cache)
class DefaultPathFinderTests(unittest.TestCase): class DefaultPathFinderTests(unittest.TestCase):
......
...@@ -2,6 +2,7 @@ import builtins ...@@ -2,6 +2,7 @@ import builtins
import imp import imp
from importlib.test.import_ import test_relative_imports from importlib.test.import_ import test_relative_imports
from importlib.test.import_ import util as importlib_util from importlib.test.import_ import util as importlib_util
import importlib
import marshal import marshal
import os import os
import platform import platform
...@@ -34,6 +35,7 @@ class ImportTests(unittest.TestCase): ...@@ -34,6 +35,7 @@ class ImportTests(unittest.TestCase):
def setUp(self): def setUp(self):
remove_files(TESTFN) remove_files(TESTFN)
importlib.invalidate_caches()
def tearDown(self): def tearDown(self):
unload(TESTFN) unload(TESTFN)
...@@ -107,6 +109,7 @@ class ImportTests(unittest.TestCase): ...@@ -107,6 +109,7 @@ class ImportTests(unittest.TestCase):
create_empty_file(fname) create_empty_file(fname)
fn = imp.cache_from_source(fname) fn = imp.cache_from_source(fname)
unlink(fn) unlink(fn)
importlib.invalidate_caches()
__import__(TESTFN) __import__(TESTFN)
if not os.path.exists(fn): if not os.path.exists(fn):
self.fail("__import__ did not result in creation of " self.fail("__import__ did not result in creation of "
...@@ -260,6 +263,7 @@ class ImportTests(unittest.TestCase): ...@@ -260,6 +263,7 @@ class ImportTests(unittest.TestCase):
os.remove(source) os.remove(source)
del sys.modules[TESTFN] del sys.modules[TESTFN]
make_legacy_pyc(source) make_legacy_pyc(source)
importlib.invalidate_caches()
mod = __import__(TESTFN) mod = __import__(TESTFN)
base, ext = os.path.splitext(mod.__file__) base, ext = os.path.splitext(mod.__file__)
self.assertIn(ext, ('.pyc', '.pyo')) self.assertIn(ext, ('.pyc', '.pyo'))
...@@ -358,6 +362,7 @@ func_filename = func.__code__.co_filename ...@@ -358,6 +362,7 @@ func_filename = func.__code__.co_filename
with open(self.file_name, "w") as f: with open(self.file_name, "w") as f:
f.write(self.module_source) f.write(self.module_source)
sys.path.insert(0, self.dir_name) sys.path.insert(0, self.dir_name)
importlib.invalidate_caches()
def tearDown(self): def tearDown(self):
sys.path[:] = self.sys_path sys.path[:] = self.sys_path
...@@ -552,6 +557,7 @@ class PycacheTests(unittest.TestCase): ...@@ -552,6 +557,7 @@ class PycacheTests(unittest.TestCase):
with open(self.source, 'w') as fp: with open(self.source, 'w') as fp:
print('# This is a test file written by test_import.py', file=fp) print('# This is a test file written by test_import.py', file=fp)
sys.path.insert(0, os.curdir) sys.path.insert(0, os.curdir)
importlib.invalidate_caches()
def tearDown(self): def tearDown(self):
assert sys.path[0] == os.curdir, 'Unexpected sys.path[0]' assert sys.path[0] == os.curdir, 'Unexpected sys.path[0]'
...@@ -599,6 +605,7 @@ class PycacheTests(unittest.TestCase): ...@@ -599,6 +605,7 @@ class PycacheTests(unittest.TestCase):
pyc_file = make_legacy_pyc(self.source) pyc_file = make_legacy_pyc(self.source)
os.remove(self.source) os.remove(self.source)
unload(TESTFN) unload(TESTFN)
importlib.invalidate_caches()
m = __import__(TESTFN) m = __import__(TESTFN)
self.assertEqual(m.__file__, self.assertEqual(m.__file__,
os.path.join(os.curdir, os.path.relpath(pyc_file))) os.path.join(os.curdir, os.path.relpath(pyc_file)))
...@@ -619,6 +626,7 @@ class PycacheTests(unittest.TestCase): ...@@ -619,6 +626,7 @@ class PycacheTests(unittest.TestCase):
pyc_file = make_legacy_pyc(self.source) pyc_file = make_legacy_pyc(self.source)
os.remove(self.source) os.remove(self.source)
unload(TESTFN) unload(TESTFN)
importlib.invalidate_caches()
m = __import__(TESTFN) m = __import__(TESTFN)
self.assertEqual(m.__cached__, self.assertEqual(m.__cached__,
os.path.join(os.curdir, os.path.relpath(pyc_file))) os.path.join(os.curdir, os.path.relpath(pyc_file)))
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import sys import sys
import os import os
import shutil import shutil
import importlib
import unittest import unittest
from test.support import run_unittest, create_empty_file from test.support import run_unittest, create_empty_file
...@@ -212,6 +213,7 @@ class LongReprTest(unittest.TestCase): ...@@ -212,6 +213,7 @@ class LongReprTest(unittest.TestCase):
# Remember where we are # Remember where we are
self.here = os.getcwd() self.here = os.getcwd()
sys.path.insert(0, self.here) sys.path.insert(0, self.here)
importlib.invalidate_caches()
def tearDown(self): def tearDown(self):
actions = [] actions = []
......
...@@ -469,6 +469,9 @@ Core and Builtins ...@@ -469,6 +469,9 @@ Core and Builtins
Library Library
------- -------
- Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a
new importlib.invalidate_caches() function.
- Issue #14001: CVE-2012-0845: xmlrpc: Fix an endless loop in - Issue #14001: CVE-2012-0845: xmlrpc: Fix an endless loop in
SimpleXMLRPCServer upon malformed POST request. SimpleXMLRPCServer upon malformed POST request.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment