diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b1f05cfbf330d9f3ce144a44b69a7905cd655af3..e8619951b17c23aba83fb055777fe67f0240b5b0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -284,11 +284,8 @@ jobs: pip install -U pylint python -m pylint --limit-inference-results=1 --rcfile=.pylintrc gevent - name: "Tests: Basic" - # Run them twice if they fail; this is to workaround the inability to re-run - # single jobs. Ideally the testrunner should be updated to do this for just failing - # tests. See also make-manylinux. run: | - python -m gevent.tests $G_USE_COV || python -m gevent.tests + python -m gevent.tests --second-chance $G_USE_COV # For the CPython interpreters, unless we have reason to expect # different behaviour across the versions (e.g., as measured by coverage) # it's sufficient to run the full suite on the current version @@ -303,21 +300,21 @@ jobs: env: GEVENT_FILE: thread run: | - python -m gevent.tests $G_USE_COV `(cd src/gevent/tests >/dev/null && ls test__*subprocess*.py)` + python -m gevent.tests --second-chance $G_USE_COV `(cd src/gevent/tests >/dev/null && ls test__*subprocess*.py)` - name: "Tests: c-ares resolver" # This sometimes fails on mac. Also, save mac minutes. if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux') env: GEVENT_RESOLVER: ares run: | - python -mgevent.tests $G_USE_COV --ignore tests_that_dont_use_resolver.txt + python -mgevent.tests --second-chance $G_USE_COV --ignore tests_that_dont_use_resolver.txt - name: "Tests: dnspython resolver" # This has known issues on PyPy3. Also, save mac minutes. if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux') env: GEVENT_RESOLVER: dnspython run: | - python -mgevent.tests $G_USE_COV --ignore tests_that_dont_use_resolver.txt + python -mgevent.tests --second-chance $G_USE_COV --ignore tests_that_dont_use_resolver.txt - name: "Tests: leakchecks" # Run the leaktests; this seems to be extremely slow on Python 3.7 # XXX: Figure out why. Can we reproduce locally? @@ -325,7 +322,7 @@ jobs: env: GEVENTTEST_LEAKCHECK: 1 run: | - python -m gevent.tests --ignore tests_that_dont_do_leakchecks.txt + python -m gevent.tests --second-chance --ignore tests_that_dont_do_leakchecks.txt - name: "Tests: PURE_PYTHON" # No compiled cython modules on CPython, using the default backend. Get coverage here. # We should only need to run this for a single Python 2 and a Python 3 @@ -333,19 +330,19 @@ jobs: env: PURE_PYTHON: 1 run: | - python -mgevent.tests --coverage || python -m gevent.tests + python -mgevent.tests --second-chance --coverage - name: "Tests: libuv" if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) env: GEVENT_LOOP: libuv run: | - python -m gevent.tests $G_USE_COV || python -m gevent.tests + python -m gevent.tests --second-chance $G_USE_COV - name: "Tests: libev-cffi" if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux') env: GEVENT_LOOP: libev-cffi run: | - python -m gevent.tests $G_USE_COV || python -m gevent.tests + python -m gevent.tests --second-chance $G_USE_COV - name: Report coverage if: ${{ !startsWith(matrix.python-version, 'pypy') }} run: | @@ -460,7 +457,7 @@ jobs: # Verify that we got non-embedded builds python -c 'import gevent.libev.corecffi as CF; assert not CF.LIBEV_EMBED' python -c 'import gevent.libuv.loop as CF; assert not CF.libuv.LIBUV_EMBED' - python -mgevent.tests + python -mgevent.tests --second-chance manylinux_x86_64: runs-on: ubuntu-latest diff --git a/appveyor.yml b/appveyor.yml index 47c8cec7d2656ddcdaf28458c673716e812f5d35..b39c8ed00725fcf56e22bebbadd1c225038c62f4 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -187,7 +187,7 @@ test_script: - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "import gevent.core; print(gevent.core.loop)" - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "import gevent; print(gevent.config.settings['resolver'].get_options())" - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "from gevent._compat import get_clock_info; print(get_clock_info('perf_counter'))" - - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -mgevent.tests --config known_failures.py --quiet + - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -mgevent.tests --second-chance --config known_failures.py --quiet after_test: # pycparser can't be built correctly in an isolated environment. diff --git a/scripts/releases/make-manylinux b/scripts/releases/make-manylinux index aa9509d5c7a8fce4c00c0820cdc70c7ad7d1d7d3..8b4a83e5a12dd9de8ec61c04dee750ab9b1813d8 100755 --- a/scripts/releases/make-manylinux +++ b/scripts/releases/make-manylinux @@ -134,9 +134,7 @@ if [ -d /gevent -a -d /opt/python ]; then fi if [ -z "$GEVENTTEST_SKIP_ALL" ]; then - # TODO: Make the testrunner automatically repeat flaky tests. - # See the github action. - python -mgevent.tests || python -m gevent.tests + python -mgevent.tests --second-chance else # Allow skipping the bulk of the tests. If we're emulating Arm, # running the whole thing takes forever. diff --git a/src/gevent/testing/testrunner.py b/src/gevent/testing/testrunner.py index ac2a511b45a2ddf50a648e4533c6df0637cad7be..9d84feabda40489a5386c64c3dbf77335d2cc003 100644 --- a/src/gevent/testing/testrunner.py +++ b/src/gevent/testing/testrunner.py @@ -5,6 +5,7 @@ import re import sys import os import glob +import operator import traceback import importlib @@ -80,8 +81,12 @@ class ResultCollector(object): self.passed = {} self.total_cases = 0 self.total_skipped = 0 + # Every RunResult reported: failed, passed, rerun + self._all_results = [] def __iadd__(self, result): + self._all_results.append(result) + if not result: self.failed[result.name] = result #[cmd, kwargs] else: @@ -90,6 +95,26 @@ class ResultCollector(object): self.total_skipped += result.skipped_count return self + def __ilshift__(self, result): + """ + collector <<= result + + Stores the result, but does not count it towards + the number of cases run, skipped, passed or failed. + """ + self._all_results.append(result) + return self + + @property + def longest_running_tests(self): + """ + A new list of RunResult objects, sorted from longest running + to shortest running. + """ + return sorted(self._all_results, + key=operator.attrgetter('run_duration'), + reverse=True) + class FailFast(Exception): pass @@ -105,7 +130,8 @@ class Runner(object): failfast=False, quiet=False, configured_run_alone_tests=(), - worker_count=DEFAULT_NWORKERS): + worker_count=DEFAULT_NWORKERS, + second_chance=False): """ :keyword quiet: Set to True or False to explicitly choose. Set to `None` to use the default, which may come from the environment variable @@ -113,10 +139,13 @@ class Runner(object): """ self._tests = tests self._configured_failing_tests = configured_failing_tests - self._failfast = failfast self._quiet = quiet self._configured_run_alone_tests = configured_run_alone_tests + assert not (failfast and second_chance) + self._failfast = failfast + self._second_chance = second_chance + self.results = ResultCollector() self.results.total = len(self._tests) self._running_jobs = [] @@ -127,6 +156,10 @@ class Runner(object): if self._quiet is not None: kwargs['quiet'] = self._quiet result = util.run(cmd, **kwargs) + if not result and self._second_chance: + self.results <<= result + util.log("> %s", result.name, color='warning') + result = util.run(cmd, **kwargs) if not result and self._failfast: # Under Python 3.9 (maybe older versions?), raising the # SystemExit here (a background thread belonging to the @@ -221,12 +254,10 @@ class Runner(object): def _report(self, elapsed_time, exit=False): results = self.results report( - results.total, results.failed, results.passed, + results, exit=exit, took=elapsed_time, configured_failing_tests=self._configured_failing_tests, - total_cases=results.total_cases, - total_skipped=results.total_skipped ) @@ -482,7 +513,15 @@ class Discovery(object): module_name = os.path.splitext(filename)[0] qualified_name = self.package + '.' + module_name if self.package else module_name - with open(os.path.abspath(filename), 'rb') as f: + # Also allow just 'foo' as a shortcut for 'gevent.tests.foo' + abs_filename = os.path.abspath(filename) + if ( + not os.path.exists(abs_filename) + and not filename.endswith('.py') + and os.path.exists(abs_filename + '.py') ): + abs_filename = abs_filename + '.py' + + with open(abs_filename, 'rb') as f: # Some of the test files (e.g., test__socket_dns) are # UTF8 encoded. Depending on the environment, Python 3 may # try to decode those as ASCII, which fails with UnicodeDecodeError. @@ -583,18 +622,38 @@ def format_seconds(seconds): return seconds -def report(total, failed, passed, exit=True, took=None, - configured_failing_tests=(), - total_cases=0, total_skipped=0): +def _show_longest_running(result_collector, how_many=5): + longest_running_tests = result_collector.longest_running_tests + if not longest_running_tests: + return + # The only tricky part is handling repeats. we want to show them, + # but not count them as a distinct entry. + + util.log('\nLongest-running tests:') + length_of_longest_formatted_decimal = len('%.1f' % longest_running_tests[0].run_duration) + + frmt = '%' + str(length_of_longest_formatted_decimal) + '.1f seconds: %s' + seen_names = set() + for result in longest_running_tests: + util.log(frmt, result.run_duration, result.name) + seen_names.add(result.name) + if len(seen_names) >= how_many: + break + + + +def report(result_collector, # type: ResultCollector + exit=True, took=None, + configured_failing_tests=()): # pylint:disable=redefined-builtin,too-many-branches,too-many-locals - runtimelog = util.runtimelog # XXX: Global state! - if runtimelog: - util.log('\nLongest-running tests:') - runtimelog.sort() - length = len('%.1f' % -runtimelog[0][0]) - frmt = '%' + str(length) + '.1f seconds: %s' - for delta, name in runtimelog[:5]: - util.log(frmt, -delta, name) + total = result_collector.total + failed = result_collector.failed + passed = result_collector.passed + total_cases = result_collector.total_cases + total_skipped = result_collector.total_skipped + + _show_longest_running(result_collector) + if took: took = ' in %s' % format_seconds(took) else: @@ -745,11 +804,11 @@ def main(): parser.add_argument('--discover', action='store_true') parser.add_argument('--full', action='store_true') parser.add_argument('--config', default='known_failures.py') - parser.add_argument('--failfast', '-x', action='store_true') parser.add_argument("--coverage", action="store_true") parser.add_argument("--quiet", action="store_true", default=True) parser.add_argument("--verbose", action="store_false", dest='quiet') parser.add_argument("--debug", action="store_true", default=False) + parser.add_argument("--package", default="gevent.tests") parser.add_argument( "--processes", "-j", default=DEFAULT_NWORKERS, type=int, @@ -768,9 +827,17 @@ def main(): 'For example, "-u-network". GEVENTTEST_USE_RESOURCES is used ' 'if no argument is given. To only use one resources, specify ' '"-unone,resource".') - parser.add_argument("--travis-fold", metavar="MSG", help="Emit Travis CI log fold markers around the output.") + + fail_parser = parser.add_mutually_exclusive_group() + fail_parser.add_argument( + "--second-chance", action="store_true", default=False, + help="Give failed tests a second chance.") + fail_parser.add_argument( + '--failfast', '-x', action='store_true', default=False, + help="Stop running after the first failure.") + parser.add_argument('tests', nargs='*') options = parser.parse_args() # options.use will be either None for not given, or a list @@ -862,6 +929,7 @@ def main(): quiet=options.quiet, configured_run_alone_tests=RUN_ALONE, worker_count=options.processes, + second_chance=options.second_chance, ) if options.travis_fold: diff --git a/src/gevent/testing/util.py b/src/gevent/testing/util.py index 72233976b15b42f0690fd1fc2c0c73357112d6a0..7ecb87413885fac0dc2c6676387a4456bb4294f5 100644 --- a/src/gevent/testing/util.py +++ b/src/gevent/testing/util.py @@ -15,8 +15,6 @@ from gevent.monkey import get_original # pylint: disable=broad-except,attribute-defined-outside-init -runtimelog = [] -MIN_RUNTIME = 1.0 BUFFER_OUTPUT = False # This is set by the testrunner, defaulting to true (be quiet) # But if we're run standalone, default to false @@ -272,7 +270,9 @@ class RunResult(object): output=None, # type: str error=None, # type: str name=None, - run_count=0, skipped_count=0): + run_count=0, skipped_count=0, + run_duration=0, # type: float + ): self.command = command self.run_kwargs = run_kwargs self.code = code @@ -281,6 +281,7 @@ class RunResult(object): self.name = name self.run_count = run_count self.skipped_count = skipped_count + self.run_duration = run_duration @property def output_lines(self): @@ -383,7 +384,7 @@ def run(command, **kwargs): # pylint:disable=too-many-locals try: time_start = perf_counter() out, err = popen.communicate() - took = perf_counter() - time_start + duration = perf_counter() - time_start if popen.was_killed or popen.poll() is None: result = 'TIMEOUT' else: @@ -402,19 +403,18 @@ def run(command, **kwargs): # pylint:disable=too-many-locals out = out.rstrip() out += '\n' log('| %s\n%s', name, out) - status, run_count, skipped_count = _find_test_status(took, out) + status, run_count, skipped_count = _find_test_status(duration, out) if result: log('! %s [code %s] %s', name, result, status, color='error') elif not nested: log('- %s %s', name, status) - if took >= MIN_RUNTIME: - runtimelog.append((-took, name)) return RunResult( command, kwargs, result, output=out, error=err, name=name, run_count=run_count, - skipped_count=skipped_count + skipped_count=skipped_count, + run_duration=duration, )