Commit abcd3a95 authored by Jason Madden's avatar Jason Madden

Add --second-chance to the testrunner, and use it on CI.

Fixes #1719
parent 2a28796d
...@@ -284,11 +284,8 @@ jobs: ...@@ -284,11 +284,8 @@ jobs:
pip install -U pylint pip install -U pylint
python -m pylint --limit-inference-results=1 --rcfile=.pylintrc gevent python -m pylint --limit-inference-results=1 --rcfile=.pylintrc gevent
- name: "Tests: Basic" - name: "Tests: Basic"
# Run them twice if they fail; this is to workaround the inability to re-run
# single jobs. Ideally the testrunner should be updated to do this for just failing
# tests. See also make-manylinux.
run: | run: |
python -m gevent.tests $G_USE_COV || python -m gevent.tests python -m gevent.tests --second-chance $G_USE_COV
# For the CPython interpreters, unless we have reason to expect # For the CPython interpreters, unless we have reason to expect
# different behaviour across the versions (e.g., as measured by coverage) # different behaviour across the versions (e.g., as measured by coverage)
# it's sufficient to run the full suite on the current version # it's sufficient to run the full suite on the current version
...@@ -303,21 +300,21 @@ jobs: ...@@ -303,21 +300,21 @@ jobs:
env: env:
GEVENT_FILE: thread GEVENT_FILE: thread
run: | run: |
python -m gevent.tests $G_USE_COV `(cd src/gevent/tests >/dev/null && ls test__*subprocess*.py)` python -m gevent.tests --second-chance $G_USE_COV `(cd src/gevent/tests >/dev/null && ls test__*subprocess*.py)`
- name: "Tests: c-ares resolver" - name: "Tests: c-ares resolver"
# This sometimes fails on mac. Also, save mac minutes. # This sometimes fails on mac. Also, save mac minutes.
if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux') if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux')
env: env:
GEVENT_RESOLVER: ares GEVENT_RESOLVER: ares
run: | run: |
python -mgevent.tests $G_USE_COV --ignore tests_that_dont_use_resolver.txt python -mgevent.tests --second-chance $G_USE_COV --ignore tests_that_dont_use_resolver.txt
- name: "Tests: dnspython resolver" - name: "Tests: dnspython resolver"
# This has known issues on PyPy3. Also, save mac minutes. # This has known issues on PyPy3. Also, save mac minutes.
if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux') if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux')
env: env:
GEVENT_RESOLVER: dnspython GEVENT_RESOLVER: dnspython
run: | run: |
python -mgevent.tests $G_USE_COV --ignore tests_that_dont_use_resolver.txt python -mgevent.tests --second-chance $G_USE_COV --ignore tests_that_dont_use_resolver.txt
- name: "Tests: leakchecks" - name: "Tests: leakchecks"
# Run the leaktests; this seems to be extremely slow on Python 3.7 # Run the leaktests; this seems to be extremely slow on Python 3.7
# XXX: Figure out why. Can we reproduce locally? # XXX: Figure out why. Can we reproduce locally?
...@@ -325,7 +322,7 @@ jobs: ...@@ -325,7 +322,7 @@ jobs:
env: env:
GEVENTTEST_LEAKCHECK: 1 GEVENTTEST_LEAKCHECK: 1
run: | run: |
python -m gevent.tests --ignore tests_that_dont_do_leakchecks.txt python -m gevent.tests --second-chance --ignore tests_that_dont_do_leakchecks.txt
- name: "Tests: PURE_PYTHON" - name: "Tests: PURE_PYTHON"
# No compiled cython modules on CPython, using the default backend. Get coverage here. # No compiled cython modules on CPython, using the default backend. Get coverage here.
# We should only need to run this for a single Python 2 and a Python 3 # We should only need to run this for a single Python 2 and a Python 3
...@@ -333,19 +330,19 @@ jobs: ...@@ -333,19 +330,19 @@ jobs:
env: env:
PURE_PYTHON: 1 PURE_PYTHON: 1
run: | run: |
python -mgevent.tests --coverage || python -m gevent.tests python -mgevent.tests --second-chance --coverage
- name: "Tests: libuv" - name: "Tests: libuv"
if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) if: (matrix.python-version == 2.7 || matrix.python-version == 3.9)
env: env:
GEVENT_LOOP: libuv GEVENT_LOOP: libuv
run: | run: |
python -m gevent.tests $G_USE_COV || python -m gevent.tests python -m gevent.tests --second-chance $G_USE_COV
- name: "Tests: libev-cffi" - name: "Tests: libev-cffi"
if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux') if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux')
env: env:
GEVENT_LOOP: libev-cffi GEVENT_LOOP: libev-cffi
run: | run: |
python -m gevent.tests $G_USE_COV || python -m gevent.tests python -m gevent.tests --second-chance $G_USE_COV
- name: Report coverage - name: Report coverage
if: ${{ !startsWith(matrix.python-version, 'pypy') }} if: ${{ !startsWith(matrix.python-version, 'pypy') }}
run: | run: |
...@@ -460,7 +457,7 @@ jobs: ...@@ -460,7 +457,7 @@ jobs:
# Verify that we got non-embedded builds # Verify that we got non-embedded builds
python -c 'import gevent.libev.corecffi as CF; assert not CF.LIBEV_EMBED' python -c 'import gevent.libev.corecffi as CF; assert not CF.LIBEV_EMBED'
python -c 'import gevent.libuv.loop as CF; assert not CF.libuv.LIBUV_EMBED' python -c 'import gevent.libuv.loop as CF; assert not CF.libuv.LIBUV_EMBED'
python -mgevent.tests python -mgevent.tests --second-chance
manylinux_x86_64: manylinux_x86_64:
runs-on: ubuntu-latest runs-on: ubuntu-latest
......
...@@ -187,7 +187,7 @@ test_script: ...@@ -187,7 +187,7 @@ test_script:
- if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "import gevent.core; print(gevent.core.loop)" - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "import gevent.core; print(gevent.core.loop)"
- if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "import gevent; print(gevent.config.settings['resolver'].get_options())" - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "import gevent; print(gevent.config.settings['resolver'].get_options())"
- if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "from gevent._compat import get_clock_info; print(get_clock_info('perf_counter'))" - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "from gevent._compat import get_clock_info; print(get_clock_info('perf_counter'))"
- if not "%GWHEEL_ONLY%"=="true" %PYEXE% -mgevent.tests --config known_failures.py --quiet - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -mgevent.tests --second-chance --config known_failures.py --quiet
after_test: after_test:
# pycparser can't be built correctly in an isolated environment. # pycparser can't be built correctly in an isolated environment.
......
...@@ -134,9 +134,7 @@ if [ -d /gevent -a -d /opt/python ]; then ...@@ -134,9 +134,7 @@ if [ -d /gevent -a -d /opt/python ]; then
fi fi
if [ -z "$GEVENTTEST_SKIP_ALL" ]; then if [ -z "$GEVENTTEST_SKIP_ALL" ]; then
# TODO: Make the testrunner automatically repeat flaky tests. python -mgevent.tests --second-chance
# See the github action.
python -mgevent.tests || python -m gevent.tests
else else
# Allow skipping the bulk of the tests. If we're emulating Arm, # Allow skipping the bulk of the tests. If we're emulating Arm,
# running the whole thing takes forever. # running the whole thing takes forever.
......
...@@ -5,6 +5,7 @@ import re ...@@ -5,6 +5,7 @@ import re
import sys import sys
import os import os
import glob import glob
import operator
import traceback import traceback
import importlib import importlib
...@@ -80,8 +81,12 @@ class ResultCollector(object): ...@@ -80,8 +81,12 @@ class ResultCollector(object):
self.passed = {} self.passed = {}
self.total_cases = 0 self.total_cases = 0
self.total_skipped = 0 self.total_skipped = 0
# Every RunResult reported: failed, passed, rerun
self._all_results = []
def __iadd__(self, result): def __iadd__(self, result):
self._all_results.append(result)
if not result: if not result:
self.failed[result.name] = result #[cmd, kwargs] self.failed[result.name] = result #[cmd, kwargs]
else: else:
...@@ -90,6 +95,26 @@ class ResultCollector(object): ...@@ -90,6 +95,26 @@ class ResultCollector(object):
self.total_skipped += result.skipped_count self.total_skipped += result.skipped_count
return self return self
def __ilshift__(self, result):
"""
collector <<= result
Stores the result, but does not count it towards
the number of cases run, skipped, passed or failed.
"""
self._all_results.append(result)
return self
@property
def longest_running_tests(self):
"""
A new list of RunResult objects, sorted from longest running
to shortest running.
"""
return sorted(self._all_results,
key=operator.attrgetter('run_duration'),
reverse=True)
class FailFast(Exception): class FailFast(Exception):
pass pass
...@@ -105,7 +130,8 @@ class Runner(object): ...@@ -105,7 +130,8 @@ class Runner(object):
failfast=False, failfast=False,
quiet=False, quiet=False,
configured_run_alone_tests=(), configured_run_alone_tests=(),
worker_count=DEFAULT_NWORKERS): worker_count=DEFAULT_NWORKERS,
second_chance=False):
""" """
:keyword quiet: Set to True or False to explicitly choose. Set to :keyword quiet: Set to True or False to explicitly choose. Set to
`None` to use the default, which may come from the environment variable `None` to use the default, which may come from the environment variable
...@@ -113,10 +139,13 @@ class Runner(object): ...@@ -113,10 +139,13 @@ class Runner(object):
""" """
self._tests = tests self._tests = tests
self._configured_failing_tests = configured_failing_tests self._configured_failing_tests = configured_failing_tests
self._failfast = failfast
self._quiet = quiet self._quiet = quiet
self._configured_run_alone_tests = configured_run_alone_tests self._configured_run_alone_tests = configured_run_alone_tests
assert not (failfast and second_chance)
self._failfast = failfast
self._second_chance = second_chance
self.results = ResultCollector() self.results = ResultCollector()
self.results.total = len(self._tests) self.results.total = len(self._tests)
self._running_jobs = [] self._running_jobs = []
...@@ -127,6 +156,10 @@ class Runner(object): ...@@ -127,6 +156,10 @@ class Runner(object):
if self._quiet is not None: if self._quiet is not None:
kwargs['quiet'] = self._quiet kwargs['quiet'] = self._quiet
result = util.run(cmd, **kwargs) result = util.run(cmd, **kwargs)
if not result and self._second_chance:
self.results <<= result
util.log("> %s", result.name, color='warning')
result = util.run(cmd, **kwargs)
if not result and self._failfast: if not result and self._failfast:
# Under Python 3.9 (maybe older versions?), raising the # Under Python 3.9 (maybe older versions?), raising the
# SystemExit here (a background thread belonging to the # SystemExit here (a background thread belonging to the
...@@ -221,12 +254,10 @@ class Runner(object): ...@@ -221,12 +254,10 @@ class Runner(object):
def _report(self, elapsed_time, exit=False): def _report(self, elapsed_time, exit=False):
results = self.results results = self.results
report( report(
results.total, results.failed, results.passed, results,
exit=exit, exit=exit,
took=elapsed_time, took=elapsed_time,
configured_failing_tests=self._configured_failing_tests, configured_failing_tests=self._configured_failing_tests,
total_cases=results.total_cases,
total_skipped=results.total_skipped
) )
...@@ -482,7 +513,15 @@ class Discovery(object): ...@@ -482,7 +513,15 @@ class Discovery(object):
module_name = os.path.splitext(filename)[0] module_name = os.path.splitext(filename)[0]
qualified_name = self.package + '.' + module_name if self.package else module_name qualified_name = self.package + '.' + module_name if self.package else module_name
with open(os.path.abspath(filename), 'rb') as f: # Also allow just 'foo' as a shortcut for 'gevent.tests.foo'
abs_filename = os.path.abspath(filename)
if (
not os.path.exists(abs_filename)
and not filename.endswith('.py')
and os.path.exists(abs_filename + '.py') ):
abs_filename = abs_filename + '.py'
with open(abs_filename, 'rb') as f:
# Some of the test files (e.g., test__socket_dns) are # Some of the test files (e.g., test__socket_dns) are
# UTF8 encoded. Depending on the environment, Python 3 may # UTF8 encoded. Depending on the environment, Python 3 may
# try to decode those as ASCII, which fails with UnicodeDecodeError. # try to decode those as ASCII, which fails with UnicodeDecodeError.
...@@ -583,18 +622,38 @@ def format_seconds(seconds): ...@@ -583,18 +622,38 @@ def format_seconds(seconds):
return seconds return seconds
def report(total, failed, passed, exit=True, took=None, def _show_longest_running(result_collector, how_many=5):
configured_failing_tests=(), longest_running_tests = result_collector.longest_running_tests
total_cases=0, total_skipped=0): if not longest_running_tests:
# pylint:disable=redefined-builtin,too-many-branches,too-many-locals return
runtimelog = util.runtimelog # XXX: Global state! # The only tricky part is handling repeats. we want to show them,
if runtimelog: # but not count them as a distinct entry.
util.log('\nLongest-running tests:') util.log('\nLongest-running tests:')
runtimelog.sort() length_of_longest_formatted_decimal = len('%.1f' % longest_running_tests[0].run_duration)
length = len('%.1f' % -runtimelog[0][0])
frmt = '%' + str(length) + '.1f seconds: %s' frmt = '%' + str(length_of_longest_formatted_decimal) + '.1f seconds: %s'
for delta, name in runtimelog[:5]: seen_names = set()
util.log(frmt, -delta, name) for result in longest_running_tests:
util.log(frmt, result.run_duration, result.name)
seen_names.add(result.name)
if len(seen_names) >= how_many:
break
def report(result_collector, # type: ResultCollector
exit=True, took=None,
configured_failing_tests=()):
# pylint:disable=redefined-builtin,too-many-branches,too-many-locals
total = result_collector.total
failed = result_collector.failed
passed = result_collector.passed
total_cases = result_collector.total_cases
total_skipped = result_collector.total_skipped
_show_longest_running(result_collector)
if took: if took:
took = ' in %s' % format_seconds(took) took = ' in %s' % format_seconds(took)
else: else:
...@@ -745,11 +804,11 @@ def main(): ...@@ -745,11 +804,11 @@ def main():
parser.add_argument('--discover', action='store_true') parser.add_argument('--discover', action='store_true')
parser.add_argument('--full', action='store_true') parser.add_argument('--full', action='store_true')
parser.add_argument('--config', default='known_failures.py') parser.add_argument('--config', default='known_failures.py')
parser.add_argument('--failfast', '-x', action='store_true')
parser.add_argument("--coverage", action="store_true") parser.add_argument("--coverage", action="store_true")
parser.add_argument("--quiet", action="store_true", default=True) parser.add_argument("--quiet", action="store_true", default=True)
parser.add_argument("--verbose", action="store_false", dest='quiet') parser.add_argument("--verbose", action="store_false", dest='quiet')
parser.add_argument("--debug", action="store_true", default=False) parser.add_argument("--debug", action="store_true", default=False)
parser.add_argument("--package", default="gevent.tests") parser.add_argument("--package", default="gevent.tests")
parser.add_argument( parser.add_argument(
"--processes", "-j", default=DEFAULT_NWORKERS, type=int, "--processes", "-j", default=DEFAULT_NWORKERS, type=int,
...@@ -768,9 +827,17 @@ def main(): ...@@ -768,9 +827,17 @@ def main():
'For example, "-u-network". GEVENTTEST_USE_RESOURCES is used ' 'For example, "-u-network". GEVENTTEST_USE_RESOURCES is used '
'if no argument is given. To only use one resources, specify ' 'if no argument is given. To only use one resources, specify '
'"-unone,resource".') '"-unone,resource".')
parser.add_argument("--travis-fold", metavar="MSG", parser.add_argument("--travis-fold", metavar="MSG",
help="Emit Travis CI log fold markers around the output.") help="Emit Travis CI log fold markers around the output.")
fail_parser = parser.add_mutually_exclusive_group()
fail_parser.add_argument(
"--second-chance", action="store_true", default=False,
help="Give failed tests a second chance.")
fail_parser.add_argument(
'--failfast', '-x', action='store_true', default=False,
help="Stop running after the first failure.")
parser.add_argument('tests', nargs='*') parser.add_argument('tests', nargs='*')
options = parser.parse_args() options = parser.parse_args()
# options.use will be either None for not given, or a list # options.use will be either None for not given, or a list
...@@ -862,6 +929,7 @@ def main(): ...@@ -862,6 +929,7 @@ def main():
quiet=options.quiet, quiet=options.quiet,
configured_run_alone_tests=RUN_ALONE, configured_run_alone_tests=RUN_ALONE,
worker_count=options.processes, worker_count=options.processes,
second_chance=options.second_chance,
) )
if options.travis_fold: if options.travis_fold:
......
...@@ -15,8 +15,6 @@ from gevent.monkey import get_original ...@@ -15,8 +15,6 @@ from gevent.monkey import get_original
# pylint: disable=broad-except,attribute-defined-outside-init # pylint: disable=broad-except,attribute-defined-outside-init
runtimelog = []
MIN_RUNTIME = 1.0
BUFFER_OUTPUT = False BUFFER_OUTPUT = False
# This is set by the testrunner, defaulting to true (be quiet) # This is set by the testrunner, defaulting to true (be quiet)
# But if we're run standalone, default to false # But if we're run standalone, default to false
...@@ -272,7 +270,9 @@ class RunResult(object): ...@@ -272,7 +270,9 @@ class RunResult(object):
output=None, # type: str output=None, # type: str
error=None, # type: str error=None, # type: str
name=None, name=None,
run_count=0, skipped_count=0): run_count=0, skipped_count=0,
run_duration=0, # type: float
):
self.command = command self.command = command
self.run_kwargs = run_kwargs self.run_kwargs = run_kwargs
self.code = code self.code = code
...@@ -281,6 +281,7 @@ class RunResult(object): ...@@ -281,6 +281,7 @@ class RunResult(object):
self.name = name self.name = name
self.run_count = run_count self.run_count = run_count
self.skipped_count = skipped_count self.skipped_count = skipped_count
self.run_duration = run_duration
@property @property
def output_lines(self): def output_lines(self):
...@@ -383,7 +384,7 @@ def run(command, **kwargs): # pylint:disable=too-many-locals ...@@ -383,7 +384,7 @@ def run(command, **kwargs): # pylint:disable=too-many-locals
try: try:
time_start = perf_counter() time_start = perf_counter()
out, err = popen.communicate() out, err = popen.communicate()
took = perf_counter() - time_start duration = perf_counter() - time_start
if popen.was_killed or popen.poll() is None: if popen.was_killed or popen.poll() is None:
result = 'TIMEOUT' result = 'TIMEOUT'
else: else:
...@@ -402,19 +403,18 @@ def run(command, **kwargs): # pylint:disable=too-many-locals ...@@ -402,19 +403,18 @@ def run(command, **kwargs): # pylint:disable=too-many-locals
out = out.rstrip() out = out.rstrip()
out += '\n' out += '\n'
log('| %s\n%s', name, out) log('| %s\n%s', name, out)
status, run_count, skipped_count = _find_test_status(took, out) status, run_count, skipped_count = _find_test_status(duration, out)
if result: if result:
log('! %s [code %s] %s', name, result, status, color='error') log('! %s [code %s] %s', name, result, status, color='error')
elif not nested: elif not nested:
log('- %s %s', name, status) log('- %s %s', name, status)
if took >= MIN_RUNTIME:
runtimelog.append((-took, name))
return RunResult( return RunResult(
command, kwargs, result, command, kwargs, result,
output=out, error=err, output=out, error=err,
name=name, name=name,
run_count=run_count, run_count=run_count,
skipped_count=skipped_count skipped_count=skipped_count,
run_duration=duration,
) )
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment