Commit 9a76735b authored by Nick Coghlan's avatar Nick Coghlan

Close #19946: use runpy as needed in multiprocessing

- handles main files without a suffix
- handles main submodules properly
- adds test cases for the various kinds of __main__
parent 7cff4cd7
......@@ -624,13 +624,22 @@ mmap objects can now be weakref'ed.
multiprocessing
---------------
On Unix two new *start methods* have been added for starting processes
using :mod:`multiprocessing`. These make the mixing of processes with
threads more robust. See :issue:`8713`.
On Unix, two new *start methods* (``spawn`` and ``forkserver``) have been
added for starting processes using :mod:`multiprocessing`. These make
the mixing of processes with threads more robust, and the ``spawn``
method matches the semantics that multiprocessing has always used on
Windows. (Contributed by Richard Oudkerk in :issue:`8713`).
Also, except when using the old *fork* start method, child processes
will no longer inherit unneeded handles/file descriptors from their parents.
:mod:`multiprocessing` now relies on :mod:`runpy` (which implements the
``-m`` switch) to initialise ``__main__`` appropriately in child processes
when using the ``spawn`` or ``forkserver`` start methods. This resolves some
edge cases where combining multiprocessing, the ``-m`` command line switch
and explicit relative imports could cause obscure failures in child
processes. (Contributed by Nick Coghlan in :issue:`19946`)
os
--
......
......@@ -11,6 +11,8 @@
import os
import pickle
import sys
import runpy
import types
from . import get_start_method, set_start_method
from . import process
......@@ -157,15 +159,19 @@ def get_preparation_data(name):
start_method=get_start_method(),
)
if sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
main_path = getattr(sys.modules['__main__'], '__file__', None)
if not main_path and sys.argv[0] not in ('', '-c'):
main_path = sys.argv[0]
# Figure out whether to initialise main in the subprocess as a module
# or through direct execution (or to leave it alone entirely)
main_module = sys.modules['__main__']
main_mod_name = getattr(main_module.__spec__, "name", None)
if main_mod_name is not None:
d['init_main_from_name'] = main_mod_name
elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
main_path = getattr(main_module, '__file__', None)
if main_path is not None:
if (not os.path.isabs(main_path) and
process.ORIGINAL_DIR is not None):
main_path = os.path.join(process.ORIGINAL_DIR, main_path)
d['main_path'] = os.path.normpath(main_path)
d['init_main_from_path'] = os.path.normpath(main_path)
return d
......@@ -206,55 +212,68 @@ def prepare(data):
if 'start_method' in data:
set_start_method(data['start_method'])
if 'main_path' in data:
import_main_path(data['main_path'])
if 'init_main_from_name' in data:
_fixup_main_from_name(data['init_main_from_name'])
elif 'init_main_from_path' in data:
_fixup_main_from_path(data['init_main_from_path'])
# Multiprocessing module helpers to fix up the main module in
# spawned subprocesses
def _fixup_main_from_name(mod_name):
# __main__.py files for packages, directories, zip archives, etc, run
# their "main only" code unconditionally, so we don't even try to
# populate anything in __main__, nor do we make any changes to
# __main__ attributes
current_main = sys.modules['__main__']
if mod_name == "__main__" or mod_name.endswith(".__main__"):
return
def import_main_path(main_path):
'''
Set sys.modules['__main__'] to module at main_path
'''
# XXX (ncoghlan): The following code makes several bogus
# assumptions regarding the relationship between __file__
# and a module's real name. See PEP 302 and issue #10845
if getattr(sys.modules['__main__'], '__file__', None) == main_path:
# If this process was forked, __main__ may already be populated
if getattr(current_main.__spec__, "name", None) == mod_name:
return
# Otherwise, __main__ may contain some non-main code where we need to
# support unpickling it properly. We rerun it as __mp_main__ and make
# the normal __main__ an alias to that
old_main_modules.append(current_main)
main_module = types.ModuleType("__mp_main__")
main_content = runpy.run_module(mod_name,
run_name="__mp_main__",
alter_sys=True)
main_module.__dict__.update(main_content)
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
def _fixup_main_from_path(main_path):
# If this process was forked, __main__ may already be populated
current_main = sys.modules['__main__']
# Unfortunately, the main ipython launch script historically had no
# "if __name__ == '__main__'" guard, so we work around that
# by treating it like a __main__.py file
# See https://github.com/ipython/ipython/issues/4698
main_name = os.path.splitext(os.path.basename(main_path))[0]
if main_name == '__init__':
main_name = os.path.basename(os.path.dirname(main_path))
if main_name == 'ipython':
return
if main_name == '__main__':
main_module = sys.modules['__main__']
main_module.__file__ = main_path
elif main_name != 'ipython':
# Main modules not actually called __main__.py may
# contain additional code that should still be executed
import importlib
import types
if main_path is None:
dirs = None
elif os.path.basename(main_path).startswith('__init__.py'):
dirs = [os.path.dirname(os.path.dirname(main_path))]
else:
dirs = [os.path.dirname(main_path)]
assert main_name not in sys.modules, main_name
sys.modules.pop('__mp_main__', None)
# We should not try to load __main__
# since that would execute 'if __name__ == "__main__"'
# clauses, potentially causing a psuedo fork bomb.
main_module = types.ModuleType(main_name)
# XXX Use a target of main_module?
spec = importlib.find_spec(main_name, path=dirs)
if spec is None:
raise ImportError(name=main_name)
methods = importlib._bootstrap._SpecMethods(spec)
methods.init_module_attrs(main_module)
main_module.__name__ = '__mp_main__'
code = spec.loader.get_code(main_name)
exec(code, main_module.__dict__)
old_main_modules.append(sys.modules['__main__'])
# Otherwise, if __file__ already has the setting we expect,
# there's nothing more to do
if getattr(current_main, '__file__', None) == main_path:
return
# If the parent process has sent a path through rather than a module
# name we assume it is an executable script that may contain
# non-main code that needs to be executed
old_main_modules.append(current_main)
main_module = types.ModuleType("__mp_main__")
main_content = runpy.run_path(main_path,
run_name="__mp_main__")
main_module.__dict__.update(main_content)
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
def import_main_path(main_path):
'''
Set sys.modules['__main__'] to module at main_path
'''
_fixup_main_from_path(main_path)
# tests __main__ module handling in multiprocessing
import importlib
import importlib.machinery
import zipimport
import unittest
import sys
import os
import os.path
import py_compile
from test import support
from test.script_helper import (
make_pkg, make_script, make_zip_pkg, make_zip_script,
assert_python_ok, assert_python_failure, temp_dir,
spawn_python, kill_python)
# We look inside the context module to find out which
# start methods we can check
from multiprocessing.context import _concrete_contexts
verbose = support.verbose
test_source = """\
# multiprocessing includes all sorts of shenanigans to make __main__
# attributes accessible in the subprocess in a pickle compatible way.
# We run the "doesn't work in the interactive interpreter" example from
# the docs to make sure it *does* work from an executed __main__,
# regardless of the invocation mechanism
import sys
import time
from multiprocessing import Pool, set_start_method
# We use this __main__ defined function in the map call below in order to
# check that multiprocessing in correctly running the unguarded
# code in child processes and then making it available as __main__
def f(x):
return x*x
# Check explicit relative imports
if "check_sibling" in __file__:
# We're inside a package and not in a __main__.py file
# so make sure explicit relative imports work correctly
from . import sibling
if __name__ == '__main__':
start_method = sys.argv[1]
set_start_method(start_method)
p = Pool(5)
results = []
p.map_async(f, [1, 2, 3], callback=results.extend)
deadline = time.time() + 2 # up to 2 s to report the results
while not results:
time.sleep(0.05)
if time.time() > deadline:
raise RuntimeError("Timed out waiting for results")
results.sort()
print(start_method, "->", results)
"""
test_source_main_skipped_in_children = """\
# __main__.py files have an implied "if __name__ == '__main__'" so
# multiprocessing should always skip running them in child processes
# This means we can't use __main__ defined functions in child processes,
# so we just use "int" as a passthrough operation below
if __name__ != "__main__":
raise RuntimeError("Should only be called as __main__!")
import sys
import time
from multiprocessing import Pool, set_start_method
start_method = sys.argv[1]
set_start_method(start_method)
p = Pool(5)
results = []
p.map_async(int, [1, 4, 9], callback=results.extend)
deadline = time.time() + 2 # up to 2 s to report the results
while not results:
time.sleep(0.05)
if time.time() > deadline:
raise RuntimeError("Timed out waiting for results")
results.sort()
print(start_method, "->", results)
"""
# These helpers were copied from test_cmd_line_script & tweaked a bit...
def _make_test_script(script_dir, script_basename,
source=test_source, omit_suffix=False):
to_return = make_script(script_dir, script_basename,
source, omit_suffix)
# Hack to check explicit relative imports
if script_basename == "check_sibling":
make_script(script_dir, "sibling", "")
importlib.invalidate_caches()
return to_return
def _make_test_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
source=test_source, depth=1):
to_return = make_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
source, depth)
importlib.invalidate_caches()
return to_return
# There's no easy way to pass the script directory in to get
# -m to work (avoiding that is the whole point of making
# directories and zipfiles executable!)
# So we fake it for testing purposes with a custom launch script
launch_source = """\
import sys, os.path, runpy
sys.path.insert(0, %s)
runpy._run_module_as_main(%r)
"""
def _make_launch_script(script_dir, script_basename, module_name, path=None):
if path is None:
path = "os.path.dirname(__file__)"
else:
path = repr(path)
source = launch_source % (path, module_name)
to_return = make_script(script_dir, script_basename, source)
importlib.invalidate_caches()
return to_return
class MultiProcessingCmdLineMixin():
maxDiff = None # Show full tracebacks on subprocess failure
def setupClass(cls):
if cls.start_method not in _concrete_contexts:
raise unittest.SkipTest("%r start method not available" %
cls.start_method)
def _check_output(self, script_name, exit_code, out, err):
if verbose > 1:
print("Output from test script %r:" % script_name)
print(out)
self.assertEqual(exit_code, 0)
self.assertEqual(err.decode('utf-8'), '')
expected_results = "%s -> [1, 4, 9]" % self.start_method
self.assertEqual(out.decode('utf-8').strip(), expected_results)
def _check_script(self, script_name, *cmd_line_switches):
if not __debug__:
cmd_line_switches += ('-' + 'O' * sys.flags.optimize,)
run_args = cmd_line_switches + (script_name, self.start_method)
rc, out, err = assert_python_ok(*run_args, __isolated=False)
self._check_output(script_name, rc, out, err)
def test_basic_script(self):
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, 'script')
self._check_script(script_name)
def test_basic_script_no_suffix(self):
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, 'script',
omit_suffix=True)
self._check_script(script_name)
def test_ipython_workaround(self):
# Some versions of the IPython launch script are missing the
# __name__ = "__main__" guard, and multiprocessing has long had
# a workaround for that case
# See https://github.com/ipython/ipython/issues/4698
source = test_source_main_skipped_in_children
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, 'ipython',
source=source)
self._check_script(script_name)
script_no_suffix = _make_test_script(script_dir, 'ipython',
source=source,
omit_suffix=True)
self._check_script(script_no_suffix)
def test_script_compiled(self):
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, 'script')
py_compile.compile(script_name, doraise=True)
os.remove(script_name)
pyc_file = support.make_legacy_pyc(script_name)
self._check_script(pyc_file)
def test_directory(self):
source = self.main_in_children_source
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, '__main__',
source=source)
self._check_script(script_dir)
def test_directory_compiled(self):
source = self.main_in_children_source
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, '__main__',
source=source)
py_compile.compile(script_name, doraise=True)
os.remove(script_name)
pyc_file = support.make_legacy_pyc(script_name)
self._check_script(script_dir)
def test_zipfile(self):
source = self.main_in_children_source
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, '__main__',
source=source)
zip_name, run_name = make_zip_script(script_dir, 'test_zip', script_name)
self._check_script(zip_name)
def test_zipfile_compiled(self):
source = self.main_in_children_source
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, '__main__',
source=source)
compiled_name = py_compile.compile(script_name, doraise=True)
zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name)
self._check_script(zip_name)
def test_module_in_package(self):
with temp_dir() as script_dir:
pkg_dir = os.path.join(script_dir, 'test_pkg')
make_pkg(pkg_dir)
script_name = _make_test_script(pkg_dir, 'check_sibling')
launch_name = _make_launch_script(script_dir, 'launch',
'test_pkg.check_sibling')
self._check_script(launch_name)
def test_module_in_package_in_zipfile(self):
with temp_dir() as script_dir:
zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script')
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.script', zip_name)
self._check_script(launch_name)
def test_module_in_subpackage_in_zipfile(self):
with temp_dir() as script_dir:
zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script', depth=2)
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.test_pkg.script', zip_name)
self._check_script(launch_name)
def test_package(self):
source = self.main_in_children_source
with temp_dir() as script_dir:
pkg_dir = os.path.join(script_dir, 'test_pkg')
make_pkg(pkg_dir)
script_name = _make_test_script(pkg_dir, '__main__',
source=source)
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
self._check_script(launch_name)
def test_package_compiled(self):
source = self.main_in_children_source
with temp_dir() as script_dir:
pkg_dir = os.path.join(script_dir, 'test_pkg')
make_pkg(pkg_dir)
script_name = _make_test_script(pkg_dir, '__main__',
source=source)
compiled_name = py_compile.compile(script_name, doraise=True)
os.remove(script_name)
pyc_file = support.make_legacy_pyc(script_name)
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
self._check_script(launch_name)
# Test all supported start methods (setupClass skips as appropriate)
class SpawnCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
start_method = 'spawn'
main_in_children_source = test_source_main_skipped_in_children
class ForkCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
start_method = 'fork'
main_in_children_source = test_source
class ForkServerCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
start_method = 'forkserver'
main_in_children_source = test_source_main_skipped_in_children
def test_main():
support.run_unittest(SpawnCmdLineTest,
ForkCmdLineTest,
ForkServerCmdLineTest)
support.reap_children()
if __name__ == '__main__':
test_main()
......@@ -44,6 +44,12 @@ Core and Builtins
Library
-------
- Issue #19946: multiprocessing now uses runpy to initialize __main__ in
child processes when necessary, allowing it to correctly handle scripts
without suffixes and submodules that use explicit relative imports or
otherwise rely on parent modules being correctly imported prior to
execution.
- Issue #19921: When Path.mkdir() is called with parents=True, any missing
parent is created with the default permissions, ignoring the mode argument
(mimicking the POSIX "mkdir -p" command).
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment