diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b1f05cfbf330d9f3ce144a44b69a7905cd655af3..e8619951b17c23aba83fb055777fe67f0240b5b0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -284,11 +284,8 @@ jobs:
           pip install -U pylint
           python -m pylint --limit-inference-results=1 --rcfile=.pylintrc gevent
       - name: "Tests: Basic"
-        # Run them twice if they fail; this is to workaround the inability to re-run
-        # single jobs. Ideally the testrunner should be updated to do this for just failing
-        # tests. See also make-manylinux.
         run: |
-          python -m gevent.tests $G_USE_COV || python -m gevent.tests
+          python -m gevent.tests --second-chance $G_USE_COV
       # For the CPython interpreters, unless we have reason to expect
       # different behaviour across the versions (e.g., as measured by coverage)
       # it's sufficient to run the full suite on the current version
@@ -303,21 +300,21 @@ jobs:
         env:
           GEVENT_FILE: thread
         run: |
-          python -m gevent.tests $G_USE_COV `(cd src/gevent/tests >/dev/null && ls test__*subprocess*.py)`
+          python -m gevent.tests --second-chance $G_USE_COV `(cd src/gevent/tests >/dev/null && ls test__*subprocess*.py)`
       - name: "Tests: c-ares resolver"
         # This sometimes fails on mac. Also, save mac minutes.
         if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux')
         env:
           GEVENT_RESOLVER: ares
         run: |
-          python -mgevent.tests $G_USE_COV --ignore tests_that_dont_use_resolver.txt
+          python -mgevent.tests --second-chance $G_USE_COV --ignore tests_that_dont_use_resolver.txt
       - name: "Tests: dnspython resolver"
         # This has known issues on PyPy3. Also, save mac minutes.
         if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux')
         env:
           GEVENT_RESOLVER: dnspython
         run: |
-          python -mgevent.tests $G_USE_COV --ignore tests_that_dont_use_resolver.txt
+          python -mgevent.tests --second-chance $G_USE_COV --ignore tests_that_dont_use_resolver.txt
       - name: "Tests: leakchecks"
         # Run the leaktests; this seems to be extremely slow on Python 3.7
         # XXX: Figure out why. Can we reproduce locally?
@@ -325,7 +322,7 @@ jobs:
         env:
           GEVENTTEST_LEAKCHECK: 1
         run: |
-          python -m gevent.tests --ignore tests_that_dont_do_leakchecks.txt
+          python -m gevent.tests --second-chance --ignore tests_that_dont_do_leakchecks.txt
       - name: "Tests: PURE_PYTHON"
         # No compiled cython modules on CPython, using the default backend. Get coverage here.
         # We should only need to run this for a single Python 2 and a Python 3
@@ -333,19 +330,19 @@ jobs:
         env:
           PURE_PYTHON: 1
         run: |
-          python -mgevent.tests --coverage || python -m gevent.tests
+          python -mgevent.tests --second-chance --coverage
       - name: "Tests: libuv"
         if: (matrix.python-version == 2.7 || matrix.python-version == 3.9)
         env:
           GEVENT_LOOP: libuv
         run: |
-          python -m gevent.tests $G_USE_COV || python -m gevent.tests
+          python -m gevent.tests --second-chance $G_USE_COV
       - name: "Tests: libev-cffi"
         if: (matrix.python-version == 2.7 || matrix.python-version == 3.9) && startsWith(runner.os, 'Linux')
         env:
           GEVENT_LOOP: libev-cffi
         run: |
-          python -m gevent.tests $G_USE_COV || python -m gevent.tests
+          python -m gevent.tests --second-chance $G_USE_COV
       - name: Report coverage
         if: ${{ !startsWith(matrix.python-version, 'pypy')  }}
         run: |
@@ -460,7 +457,7 @@ jobs:
           # Verify that we got non-embedded builds
           python -c 'import gevent.libev.corecffi as CF; assert not CF.LIBEV_EMBED'
           python -c 'import gevent.libuv.loop as CF; assert not CF.libuv.LIBUV_EMBED'
-          python -mgevent.tests
+          python -mgevent.tests --second-chance
 
   manylinux_x86_64:
     runs-on: ubuntu-latest
diff --git a/appveyor.yml b/appveyor.yml
index 47c8cec7d2656ddcdaf28458c673716e812f5d35..b39c8ed00725fcf56e22bebbadd1c225038c62f4 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -187,7 +187,7 @@ test_script:
   - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "import gevent.core; print(gevent.core.loop)"
   - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "import gevent; print(gevent.config.settings['resolver'].get_options())"
   - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -c "from gevent._compat import get_clock_info; print(get_clock_info('perf_counter'))"
-  - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -mgevent.tests --config known_failures.py --quiet
+  - if not "%GWHEEL_ONLY%"=="true" %PYEXE% -mgevent.tests --second-chance --config known_failures.py --quiet
 
 after_test:
   # pycparser can't be built correctly in an isolated environment.
diff --git a/scripts/releases/make-manylinux b/scripts/releases/make-manylinux
index aa9509d5c7a8fce4c00c0820cdc70c7ad7d1d7d3..8b4a83e5a12dd9de8ec61c04dee750ab9b1813d8 100755
--- a/scripts/releases/make-manylinux
+++ b/scripts/releases/make-manylinux
@@ -134,9 +134,7 @@ if [ -d /gevent -a -d /opt/python ]; then
         fi
 
         if [ -z "$GEVENTTEST_SKIP_ALL" ]; then
-            # TODO: Make the testrunner automatically repeat flaky tests.
-            # See the github action.
-            python -mgevent.tests || python -m gevent.tests
+            python -mgevent.tests --second-chance
         else
             # Allow skipping the bulk of the tests. If we're emulating Arm,
             # running the whole thing takes forever.
diff --git a/src/gevent/testing/testrunner.py b/src/gevent/testing/testrunner.py
index ac2a511b45a2ddf50a648e4533c6df0637cad7be..9d84feabda40489a5386c64c3dbf77335d2cc003 100644
--- a/src/gevent/testing/testrunner.py
+++ b/src/gevent/testing/testrunner.py
@@ -5,6 +5,7 @@ import re
 import sys
 import os
 import glob
+import operator
 import traceback
 import importlib
 
@@ -80,8 +81,12 @@ class ResultCollector(object):
         self.passed = {}
         self.total_cases = 0
         self.total_skipped = 0
+        # Every RunResult reported: failed, passed, rerun
+        self._all_results = []
 
     def __iadd__(self, result):
+        self._all_results.append(result)
+
         if not result:
             self.failed[result.name] = result #[cmd, kwargs]
         else:
@@ -90,6 +95,26 @@ class ResultCollector(object):
         self.total_skipped += result.skipped_count
         return self
 
+    def __ilshift__(self, result):
+        """
+        collector <<= result
+
+        Stores the result, but does not count it towards
+        the number of cases run, skipped, passed or failed.
+        """
+        self._all_results.append(result)
+        return self
+
+    @property
+    def longest_running_tests(self):
+        """
+        A new list of RunResult objects, sorted from longest running
+        to shortest running.
+        """
+        return sorted(self._all_results,
+                      key=operator.attrgetter('run_duration'),
+                      reverse=True)
+
 
 class FailFast(Exception):
     pass
@@ -105,7 +130,8 @@ class Runner(object):
                  failfast=False,
                  quiet=False,
                  configured_run_alone_tests=(),
-                 worker_count=DEFAULT_NWORKERS):
+                 worker_count=DEFAULT_NWORKERS,
+                 second_chance=False):
         """
         :keyword quiet: Set to True or False to explicitly choose. Set to
             `None` to use the default, which may come from the environment variable
@@ -113,10 +139,13 @@ class Runner(object):
         """
         self._tests = tests
         self._configured_failing_tests = configured_failing_tests
-        self._failfast = failfast
         self._quiet = quiet
         self._configured_run_alone_tests = configured_run_alone_tests
 
+        assert not (failfast and second_chance)
+        self._failfast = failfast
+        self._second_chance = second_chance
+
         self.results = ResultCollector()
         self.results.total = len(self._tests)
         self._running_jobs = []
@@ -127,6 +156,10 @@ class Runner(object):
         if self._quiet is not None:
             kwargs['quiet'] = self._quiet
         result = util.run(cmd, **kwargs)
+        if not result and self._second_chance:
+            self.results <<= result
+            util.log("> %s", result.name, color='warning')
+            result = util.run(cmd, **kwargs)
         if not result and self._failfast:
             # Under Python 3.9 (maybe older versions?), raising the
             # SystemExit here (a background thread belonging to the
@@ -221,12 +254,10 @@ class Runner(object):
     def _report(self, elapsed_time, exit=False):
         results = self.results
         report(
-            results.total, results.failed, results.passed,
+            results,
             exit=exit,
             took=elapsed_time,
             configured_failing_tests=self._configured_failing_tests,
-            total_cases=results.total_cases,
-            total_skipped=results.total_skipped
         )
 
 
@@ -482,7 +513,15 @@ class Discovery(object):
                 module_name = os.path.splitext(filename)[0]
                 qualified_name = self.package + '.' + module_name if self.package else module_name
 
-            with open(os.path.abspath(filename), 'rb') as f:
+            # Also allow just 'foo' as a shortcut for 'gevent.tests.foo'
+            abs_filename = os.path.abspath(filename)
+            if (
+                    not os.path.exists(abs_filename)
+                    and not filename.endswith('.py')
+                    and os.path.exists(abs_filename + '.py') ):
+                abs_filename = abs_filename + '.py'
+
+            with open(abs_filename, 'rb') as f:
                 # Some of the test files (e.g., test__socket_dns) are
                 # UTF8 encoded. Depending on the environment, Python 3 may
                 # try to decode those as ASCII, which fails with UnicodeDecodeError.
@@ -583,18 +622,38 @@ def format_seconds(seconds):
     return seconds
 
 
-def report(total, failed, passed, exit=True, took=None,
-           configured_failing_tests=(),
-           total_cases=0, total_skipped=0):
+def _show_longest_running(result_collector, how_many=5):
+    longest_running_tests = result_collector.longest_running_tests
+    if not longest_running_tests:
+        return
+    # The only tricky part is handling repeats. we want to show them,
+    # but not count them as a distinct entry.
+
+    util.log('\nLongest-running tests:')
+    length_of_longest_formatted_decimal = len('%.1f' % longest_running_tests[0].run_duration)
+
+    frmt = '%' + str(length_of_longest_formatted_decimal) + '.1f seconds: %s'
+    seen_names = set()
+    for result in longest_running_tests:
+        util.log(frmt, result.run_duration, result.name)
+        seen_names.add(result.name)
+        if len(seen_names) >= how_many:
+            break
+
+
+
+def report(result_collector, # type: ResultCollector
+           exit=True, took=None,
+           configured_failing_tests=()):
     # pylint:disable=redefined-builtin,too-many-branches,too-many-locals
-    runtimelog = util.runtimelog # XXX: Global state!
-    if runtimelog:
-        util.log('\nLongest-running tests:')
-        runtimelog.sort()
-        length = len('%.1f' % -runtimelog[0][0])
-        frmt = '%' + str(length) + '.1f seconds: %s'
-        for delta, name in runtimelog[:5]:
-            util.log(frmt, -delta, name)
+    total = result_collector.total
+    failed = result_collector.failed
+    passed = result_collector.passed
+    total_cases = result_collector.total_cases
+    total_skipped = result_collector.total_skipped
+
+    _show_longest_running(result_collector)
+
     if took:
         took = ' in %s' % format_seconds(took)
     else:
@@ -745,11 +804,11 @@ def main():
     parser.add_argument('--discover', action='store_true')
     parser.add_argument('--full', action='store_true')
     parser.add_argument('--config', default='known_failures.py')
-    parser.add_argument('--failfast', '-x', action='store_true')
     parser.add_argument("--coverage", action="store_true")
     parser.add_argument("--quiet", action="store_true", default=True)
     parser.add_argument("--verbose", action="store_false", dest='quiet')
     parser.add_argument("--debug", action="store_true", default=False)
+
     parser.add_argument("--package", default="gevent.tests")
     parser.add_argument(
         "--processes", "-j", default=DEFAULT_NWORKERS, type=int,
@@ -768,9 +827,17 @@ def main():
                         'For example, "-u-network". GEVENTTEST_USE_RESOURCES is used '
                         'if no argument is given. To only use one resources, specify '
                         '"-unone,resource".')
-
     parser.add_argument("--travis-fold", metavar="MSG",
                         help="Emit Travis CI log fold markers around the output.")
+
+    fail_parser = parser.add_mutually_exclusive_group()
+    fail_parser.add_argument(
+        "--second-chance", action="store_true", default=False,
+        help="Give failed tests a second chance.")
+    fail_parser.add_argument(
+        '--failfast', '-x', action='store_true', default=False,
+        help="Stop running after the first failure.")
+
     parser.add_argument('tests', nargs='*')
     options = parser.parse_args()
     # options.use will be either None for not given, or a list
@@ -862,6 +929,7 @@ def main():
             quiet=options.quiet,
             configured_run_alone_tests=RUN_ALONE,
             worker_count=options.processes,
+            second_chance=options.second_chance,
         )
 
         if options.travis_fold:
diff --git a/src/gevent/testing/util.py b/src/gevent/testing/util.py
index 72233976b15b42f0690fd1fc2c0c73357112d6a0..7ecb87413885fac0dc2c6676387a4456bb4294f5 100644
--- a/src/gevent/testing/util.py
+++ b/src/gevent/testing/util.py
@@ -15,8 +15,6 @@ from gevent.monkey import get_original
 
 # pylint: disable=broad-except,attribute-defined-outside-init
 
-runtimelog = []
-MIN_RUNTIME = 1.0
 BUFFER_OUTPUT = False
 # This is set by the testrunner, defaulting to true (be quiet)
 # But if we're run standalone, default to false
@@ -272,7 +270,9 @@ class RunResult(object):
                  output=None, # type: str
                  error=None, # type: str
                  name=None,
-                 run_count=0, skipped_count=0):
+                 run_count=0, skipped_count=0,
+                 run_duration=0, # type: float
+                 ):
         self.command = command
         self.run_kwargs = run_kwargs
         self.code = code
@@ -281,6 +281,7 @@ class RunResult(object):
         self.name = name
         self.run_count = run_count
         self.skipped_count = skipped_count
+        self.run_duration = run_duration
 
     @property
     def output_lines(self):
@@ -383,7 +384,7 @@ def run(command, **kwargs): # pylint:disable=too-many-locals
     try:
         time_start = perf_counter()
         out, err = popen.communicate()
-        took = perf_counter() - time_start
+        duration = perf_counter() - time_start
         if popen.was_killed or popen.poll() is None:
             result = 'TIMEOUT'
         else:
@@ -402,19 +403,18 @@ def run(command, **kwargs): # pylint:disable=too-many-locals
         out = out.rstrip()
         out += '\n'
         log('| %s\n%s', name, out)
-    status, run_count, skipped_count = _find_test_status(took, out)
+    status, run_count, skipped_count = _find_test_status(duration, out)
     if result:
         log('! %s [code %s] %s', name, result, status, color='error')
     elif not nested:
         log('- %s %s', name, status)
-    if took >= MIN_RUNTIME:
-        runtimelog.append((-took, name))
     return RunResult(
         command, kwargs, result,
         output=out, error=err,
         name=name,
         run_count=run_count,
-        skipped_count=skipped_count
+        skipped_count=skipped_count,
+        run_duration=duration,
     )