update tester infrastructure to support new CPython tests

5af38295 · Michael Arntzenius · fd785dc1 · 5af38295 · 5af38295 · 5af38295
Commit 5af38295 authored Apr 06, 2015 by Michael Arntzenius
8 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -187,6 +187,8 @@ add_test(NAME check-format COMMAND ${CMAKE_SOURCE_DIR}/tools/check_format.sh ${L
 add_test(NAME gc_unittest COMMAND gc_unittest)
 add_test(NAME analysis_unittest COMMAND analysis_unittest)
 add_test(NAME pyston_defaults COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/tester.py -R ./pyston -j${TEST_THREADS} -a=-S -k ${CMAKE_SOURCE_DIR}/test/tests)
+# we pass -I to cpython tests and skip failing ones b/c they are slooow otherwise
+add_test(NAME pyston_defaults_cpython_tests COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/tester.py -R ./pyston -j${TEST_THREADS} -a=-S -a=-I -k --exit-code-only --skip-failing ${CMAKE_SOURCE_DIR}/test/cpython)
 add_test(NAME pyston_max_compilation_tier COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/tester.py -R ./pyston -j${TEST_THREADS} -a=-O -a=-S -k ${CMAKE_SOURCE_DIR}/test/tests)
 add_test(NAME pyston_experimental_pypa_parser COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/tester.py -a=-x -R ./pyston -j${TEST_THREADS} -a=-n -a=-S -k ${CMAKE_SOURCE_DIR}/test/tests)

--- a/Makefile
+++ b/Makefile
@@ -490,6 +490,8 @@ check:
 	$(MAKE) ext_python ext_pyston pyston_dbg
 	$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston_dbg -j$(TEST_THREADS) -k -a=-S $(TESTS_DIR) $(ARGS)
+	@# we pass -I to cpython tests & skip failing ones because they are sloooow otherwise
+	$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston_dbg -j$(TEST_THREADS) -k -a=-S -a=-I --exit-code-only --skip-failing $(TEST_DIR)/cpython $(ARGS)
 	$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston_dbg -j$(TEST_THREADS) -k -a=-n -a=-x -a=-S $(TESTS_DIR) $(ARGS)
 	@# skip -O for dbg
@@ -504,6 +506,8 @@ check:
 	@# It can be useful to test release mode, since it actually exposes different functionality
 	@# since we can make different decisions about which internal functions to inline or not.
 	$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston_release -j$(TEST_THREADS) -k -a=-S $(TESTS_DIR) $(ARGS)
+	@# we pass -I to cpython tests and skip failing ones because they are sloooow otherwise
+	$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston_release -j$(TEST_THREADS) -k -a=-S -a=-I --exit-code-only --skip-failing $(TEST_DIR)/cpython $(ARGS)
 	@# skip -n for dbg
 	$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston_release -j$(TEST_THREADS) -k -a=-O -a=-x -a=-S $(TESTS_DIR) $(ARGS)
@@ -916,7 +920,8 @@ clean:
 # ex instead of saying "make tests/run_1", I can just write "make run_1"
 define make_search
 $(eval \
-$1: $(TEST_DIR)/tests/$1 ;
+$1: $(TESTS_DIR)/$1 ;
+$1: $(TEST_DIR)/cpython/$1 ;
 $1: ./microbenchmarks/$1 ;
 $1: ./minibenchmarks/$1 ;
 $1: ./benchmarks/$1 ;
@@ -932,6 +937,8 @@ $(eval \
 .PHONY: test$1 check$1
 check$1 test$1: $(PYTHON_EXE_DEPS) pyston$1 ext_pyston
 	$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston$1 -j$(TEST_THREADS) -a=-S -k $(TESTS_DIR) $(ARGS)
+	@# we pass -I to cpython tests and skip failing ones because they are sloooow otherwise
+	$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston$1 -j$(TEST_THREADS) -a=-S -a=-I -k --exit-code-only --skip-failing $(TEST_DIR)/cpython $(ARGS)
 	$(PYTHON) $(TOOLS_DIR)/tester.py -a=-x -R pyston$1 -j$(TEST_THREADS) -a=-n -a=-S -k $(TESTS_DIR) $(ARGS)
 	$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston$1 -j$(TEST_THREADS) -a=-O -a=-S -k $(TESTS_DIR) $(ARGS)

--- a/from_cpython/Lib/test/test_support.py
+++ b/from_cpython/Lib/test/test_support.py
@@ -346,25 +346,19 @@ def _is_gui_available():
 def is_resource_enabled(resource):
    """Test whether a resource is enabled.  Known resources are set by
    regrtest.py."""
+    # Pyston change: we assume that resources are not available in general
    return use_resources is not None and resource in use_resources
 def requires(resource, msg=None):
-    """Raise ResourceDenied if the specified resource is not available.
+    """Raise ResourceDenied if the specified resource is not available."""
-    If the caller's module is __main__ then automatically return True.  The
-    possibility of False being returned occurs when regrtest.py is executing."""
    if resource == 'gui' and not _is_gui_available():
        raise ResourceDenied(_is_gui_available.reason)
-    # see if the caller's module is __main__ - if so, treat as if
+    # Pyston change: we don't check if the caller's module is __main__ using sys._getframe() magic.
-    # the resource was set
-    if sys._getframe(1).f_globals.get("__name__") == "__main__":
-        return
    if not is_resource_enabled(resource):
        if msg is None:
            msg = "Use of the `%s' resource not enabled" % resource
        raise ResourceDenied(msg)
 # Don't use "localhost", since resolving it uses the DNS under recent
 # Windows versions (see issue #18792).
 HOST = "127.0.0.1"
@@ -506,6 +500,11 @@ try:
 except NameError:
    have_unicode = False
+requires_unicode = unittest.skipUnless(have_unicode, 'no unicode support')
+def u(s):
+    return unicode(s, 'unicode-escape')
 is_jython = sys.platform.startswith('java')
 # FS_NONASCII: non-ASCII Unicode character encodable by
@@ -749,42 +748,49 @@ class WarningsRecorder(object):
 def _filterwarnings(filters, quiet=False):
-    """Catch the warnings, then check if all the expected
+    # Pyston change:
-    warnings have been raised and re-raise unexpected warnings.
+    # this bare yield seems to work for now, but we might need to yield up a WarningsRecorder in some cases?
-    If 'quiet' is True, only re-raise the unexpected warnings.
+    yield
-    """
+    # TODO: Frame introspection in Pyston?
+    # old code follows:
+    # """Catch the warnings, then check if all the expected
+    # warnings have been raised and re-raise unexpected warnings.
+    # If 'quiet' is True, only re-raise the unexpected warnings.
+    # """
    # Clear the warning registry of the calling module
    # in order to re-raise the warnings.
-    frame = sys._getframe(2)
+    # frame = sys._getframe(2)
-    registry = frame.f_globals.get('__warningregistry__')
+    # registry = frame.f_globals.get('__warningregistry__')
-    if registry:
+    # if registry:
-        registry.clear()
+    #     registry.clear()
-    with warnings.catch_warnings(record=True) as w:
+    # with warnings.catch_warnings(record=True) as w:
-        # Set filter "always" to record all warnings.  Because
+    #     # Set filter "always" to record all warnings.  Because
-        # test_warnings swap the module, we need to look up in
+    #     # test_warnings swap the module, we need to look up in
-        # the sys.modules dictionary.
+    #     # the sys.modules dictionary.
-        sys.modules['warnings'].simplefilter("always")
+    #     sys.modules['warnings'].simplefilter("always")
-        yield WarningsRecorder(w)
+    #     yield WarningsRecorder(w)
-    # Filter the recorded warnings
+    # # Filter the recorded warnings
-    reraise = [warning.message for warning in w]
+    # reraise = [warning.message for warning in w]
-    missing = []
+    # missing = []
-    for msg, cat in filters:
+    # for msg, cat in filters:
-        seen = False
+    #     seen = False
-        for exc in reraise[:]:
+    #     for exc in reraise[:]:
-            message = str(exc)
+    #         message = str(exc)
-            # Filter out the matching messages
+    #         # Filter out the matching messages
-            if (re.match(msg, message, re.I) and
+    #         if (re.match(msg, message, re.I) and
-                issubclass(exc.__class__, cat)):
+    #             issubclass(exc.__class__, cat)):
-                seen = True
+    #             seen = True
-                reraise.remove(exc)
+    #             reraise.remove(exc)
-        if not seen and not quiet:
+    #     if not seen and not quiet:
-            # This filter caught nothing
+    #         # This filter caught nothing
-            missing.append((msg, cat.__name__))
+    #         missing.append((msg, cat.__name__))
-    if reraise:
+    # if reraise:
-        raise AssertionError("unhandled warning %r" % reraise[0])
+    #     raise AssertionError("unhandled warning %r" % reraise[0])
-    if missing:
+    # if missing:
-        raise AssertionError("filter (%r, %s) did not catch any warning" %
+    #     raise AssertionError("filter (%r, %s) did not catch any warning" %
-                             missing[0])
+    #                          missing[0])
 @contextlib.contextmanager

--- a/from_cpython/Lib/unittest/loader.py
+++ b/from_cpython/Lib/unittest/loader.py
@@ -139,7 +139,10 @@ class TestLoader(object):
                hasattr(getattr(testCaseClass, attrname), '__call__')
        testFnNames = filter(isTestMethod, dir(testCaseClass))
        if self.sortTestMethodsUsing:
-            testFnNames.sort(key=_CmpToKey(self.sortTestMethodsUsing))
+            # Pyston change:
+            # TODO(rntz): needs builtin `cmp` to work
+            #testFnNames.sort(key=_CmpToKey(self.sortTestMethodsUsing))
+            testFnNames.sort()
        return testFnNames
    def discover(self, start_dir, pattern='test*.py', top_level_dir=None):

--- a/from_cpython/Lib/unittest/result.py
+++ b/from_cpython/Lib/unittest/result.py
@@ -153,15 +153,18 @@ class TestResult(object):
        """Converts a sys.exc_info()-style tuple of values into a string."""
        exctype, value, tb = err
        # Skip test runner traceback levels
-        while tb and self._is_relevant_tb_level(tb):
+        # Pyston change: I've commented this out for now. - rntz
-            tb = tb.tb_next
+        # TODO(rntz): needs traceback stuff to work
+        # while tb and self._is_relevant_tb_level(tb):
-        if exctype is test.failureException:
+        #     tb = tb.tb_next
-            # Skip assert*() traceback levels
-            length = self._count_relevant_tb_levels(tb)
+        # if exctype is test.failureException:
-            msgLines = traceback.format_exception(exctype, value, tb, length)
+        #     # Skip assert*() traceback levels
-        else:
+        #     length = self._count_relevant_tb_levels(tb)
-            msgLines = traceback.format_exception(exctype, value, tb)
+        #     msgLines = traceback.format_exception(exctype, value, tb, length)
+        # else:
+        #     msgLines = traceback.format_exception(exctype, value, tb)
+        msgLines = traceback.format_exception(exctype, value, tb)
        if self.buffer:
            output = sys.stdout.getvalue()

--- a/test/tests/longargs_stackusage.py
+++ b/test/tests/longargs_stackusage.py
+# skip-if: True
 # This is a test to make sure that the stack space we allocate
 # for "long arg" calls (ie calls that take more than 4 arguments)
 # gets restored.

--- a/test/tests/tuple_depth.py
+++ b/test/tests/tuple_depth.py
+# skip-if: True
 # I was worried about using a recursive parser for obscenely-nested source code,
 # but based off this example it looks like that's what cPython and pypy both use as well.

--- a/tools/tester.py
+++ b/tools/tester.py
 # Copyright (c) 2014-2015 Dropbox, Inc.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -37,6 +37,9 @@ KEEP_GOING = False
 FN_JUST_SIZE = 20
 EXTRA_JIT_ARGS = []
 TIME_LIMIT = 25
+TESTS_TO_SKIP = []
+EXIT_CODE_ONLY = False
+SKIP_FAILING_TESTS = False
 # For fun, can test pypy.
 # Tough because the tester will check to see if the error messages are exactly the
@@ -55,6 +58,7 @@ def set_ulimits():
 EXTMODULE_DIR = os.path.abspath(os.path.dirname(os.path.realpath(__file__)) + "/../test/test_extension/build/lib.linux-x86_64-2.7/")
 THIS_FILE = os.path.abspath(__file__)
 _global_mtime = None
 def get_global_mtime():
    global _global_mtime
@@ -124,19 +128,39 @@ def canonicalize_stderr(stderr):
    return stderr
 failed = []
+class Options(object): pass
+# returns a single string, or a tuple of strings that are spliced together (with spaces between) by our caller
 def run_test(fn, check_stats, run_memcheck):
-    r = os.path.basename(fn).rjust(FN_JUST_SIZE)
+    opts = get_test_options(fn, check_stats, run_memcheck)
-    test_base_name = os.path.basename(fn).split('.')[0]
+    del check_stats, run_memcheck
-    if test_base_name in TESTS_TO_SKIP:
+    if opts.skip:
-        return r + "    (skipped due to command line option)"
+        return "(skipped: %s)" % opts.skip
-    statchecks = []
+    run_args = [os.path.abspath(IMAGE)] + opts.jit_args + [fn]
-    jit_args = ["-rq"] + EXTRA_JIT_ARGS
+    start = time.time()
-    collect_stats = True
+    p = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open("/dev/null"), preexec_fn=set_ulimits)
-    expected = "success"
+    out, stderr = p.communicate()
-    should_error = False
+    code = p.wait()
-    allow_warnings = []
+    elapsed = time.time() - start
+    return determine_test_result(fn, opts, code, out, stderr, elapsed)
+def get_test_options(fn, check_stats, run_memcheck):
+    opts = Options()
+    opts.check_stats = check_stats
+    opts.run_memcheck = run_memcheck
+    opts.statchecks = []
+    opts.jit_args = ["-rq"] + EXTRA_JIT_ARGS
+    opts.collect_stats = True
+    opts.expected = "success"
+    opts.should_error = False
+    opts.allow_warnings = []
+    opts.skip = None
    for l in open(fn):
        l = l.strip()
        if not l:
@@ -145,56 +169,54 @@ def run_test(fn, check_stats, run_memcheck):
            break
        if l.startswith("# statcheck:"):
            l = l[len("# statcheck:"):].strip()
-            statchecks.append(l)
+            opts.statchecks.append(l)
        elif l.startswith("# run_args:"):
            l = l[len("# run_args:"):].split()
-            jit_args += l
+            opts.jit_args += l
        elif l.startswith("# expected:"):
-            expected = l[len("# expected:"):].strip()
+            opts.expected = l[len("# expected:"):].strip()
        elif l.startswith("# should_error"):
-            should_error = True
+            opts.should_error = True
        elif l.startswith("# fail-if:"):
            condition = l.split(':', 1)[1].strip()
            if eval(condition):
-                expected = "fail"
+                opts.expected = "fail"
        elif l.startswith("# skip-if:"):
            skip_if = l[len("# skip-if:"):].strip()
-            skip = eval(skip_if)
+            if eval(skip_if):
-            if skip:
+                opts.skip = "skip-if: %s" % skip_if[:30]
-                return r + "    (skipped due to 'skip-if: %s')" % skip_if[:30]
-        elif fn.split('.')[0] in TESTS_TO_SKIP:
-                return r + "    (skipped due to command line option)"
        elif l.startswith("# allow-warning:"):
-            allow_warnings.append("Warning: " + l.split(':', 1)[1].strip())
+            opts.allow_warnings.append("Warning: " + l.split(':', 1)[1].strip())
        elif l.startswith("# no-collect-stats"):
-            collect_stats = False
+            opts.collect_stats = False
-    if TEST_PYPY:
+    if not opts.skip:
-        collect_stats = False
+        # consider other reasons for skipping file
+        if SKIP_FAILING_TESTS and opts.expected == 'fail':
+            opts.skip = 'expected to fail'
+        elif os.path.basename(fn).split('.')[0] in TESTS_TO_SKIP:
+            opts.skip = 'command line option'
-    if collect_stats:
+    if opts.collect_stats:
-        jit_args = ['-s'] + jit_args
+        opts.jit_args = ['-s'] + opts.jit_args
-    assert expected in ("success", "fail", "statfail"), expected
+    assert opts.expected in ("success", "fail", "statfail"), opts.expected
    if TEST_PYPY:
-        jit_args = []
+        opts.jit_args = []
-        check_stats = False
+        opts.collect_stats = False
-        expected = "success"
+        opts.check_stats = False
+        opts.expected = "success"
-    run_args = [os.path.abspath(IMAGE)] + jit_args + [fn]
+    return opts
-    start = time.time()
-    p = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open("/dev/null"), preexec_fn=set_ulimits)
-    out, stderr = p.communicate()
-    last_stderr_line = stderr.strip().split('\n')[-1]
-    code = p.wait()
+def determine_test_result(fn, opts, code, out, stderr, elapsed):
-    elapsed = time.time() - start
+    last_stderr_line = stderr.strip().split('\n')[-1]
-    if allow_warnings:
+    if opts.allow_warnings:
        out_lines = []
        for l in out.split('\n'):
-            for regex in allow_warnings:
+            for regex in opts.allow_warnings:
                if re.match(regex, l):
                    break
            else:
@@ -202,7 +224,7 @@ def run_test(fn, check_stats, run_memcheck):
        out = "\n".join(out_lines)
    stats = None
-    if code >= 0 and collect_stats:
+    if code >= 0 and opts.collect_stats:
        stats = {}
        assert out.count("Stats:") == 1
        out, stats_str = out.split("Stats:")
@@ -210,7 +232,14 @@ def run_test(fn, check_stats, run_memcheck):
            k, v = l.split(':')
            stats[k.strip()] = int(v)
-    expected_code, expected_out, expected_err = get_expected_output(fn)
+    if EXIT_CODE_ONLY:
+        # fools the rest of this function into thinking the output is OK & just checking the exit code.
+        # there oughtta be a cleaner way to do this.
+        expected_code, expected_out, expected_err = 0, out, stderr
+    else:
+        # run CPython to get the expected output
+        expected_code, expected_out, expected_err = get_expected_output(fn)
    if code != expected_code:
        color = 31 # red
@@ -227,17 +256,15 @@ def run_test(fn, check_stats, run_memcheck):
        else:
            msg = "Exited with code %d (expected code %d)" % (code, expected_code)
-        if expected == "fail":
+        if opts.expected == "fail":
-            r += "    Expected failure (got code %d, should be %d)" % (code, expected_code)
+            return "Expected failure (got code %d, should be %d)" % (code, expected_code)
-            return r
        elif KEEP_GOING:
-            r += "    \033[%dmFAILED\033[0m (%s)" % (color, msg)
            failed.append(fn)
-            return r
+            return "\033[%dmFAILED\033[0m (%s)" % (color, msg)
        else:
            raise Exception("%s\n%s\n%s" % (msg, err, stderr))
-    elif should_error == (code == 0):
+    elif opts.should_error == (code == 0):
        color = 31              # red
        if code == 0:
            msg = "Exited successfully; remove '# should_error' if this is expected"
@@ -245,23 +272,20 @@ def run_test(fn, check_stats, run_memcheck):
            msg = "Exited with code %d; add '# should_error' if this is expected" % code
        if KEEP_GOING:
-            r += "    \033[%dmFAILED\033[0m (%s)" % (color, msg)
            failed.append(fn)
-            return r
+            return "\033[%dmFAILED\033[0m (%s)" % (color, msg)
        else:
            # show last line of stderr so we have some idea went wrong
            print "Last line of stderr: " + last_stderr_line
            raise Exception(msg)
    elif out != expected_out:
-        if expected == "fail":
+        if opts.expected == "fail":
-            r += "    Expected failure (bad output)"
+            return "Expected failure (bad output)"
-            return r
        else:
            if KEEP_GOING:
-                r += "    \033[31mFAILED\033[0m (bad output)"
                failed.append(fn)
-                return r
+                return "\033[31mFAILED\033[0m (bad output)"
            exp_fd, exp_fn = tempfile.mkstemp(prefix="expected_")
            out_fd, out_fn = tempfile.mkstemp(prefix="received_")
            os.fdopen(exp_fd, 'w').write(expected_out)
@@ -273,38 +297,34 @@ def run_test(fn, check_stats, run_memcheck):
            os.unlink(out_fn)
            raise Exception("Failed on %s:\n%s" % (fn, diff))
    elif not TEST_PYPY and canonicalize_stderr(stderr) != canonicalize_stderr(expected_err):
-        if expected == "fail":
+        if opts.expected == "fail":
-            r += "    Expected failure (bad stderr)"
+            return "Expected failure (bad stderr)"
-            return r
        elif KEEP_GOING:
-            r += "    \033[31mFAILED\033[0m (bad stderr)"
            failed.append(fn)
-            return r
+            return "\033[31mFAILED\033[0m (bad stderr)"
        else:
            raise Exception((canonicalize_stderr(stderr), canonicalize_stderr(expected_err)))
-    elif expected == "fail":
+    elif opts.expected == "fail":
        if KEEP_GOING:
-            r += "    \033[31mFAILED\033[0m (unexpected success)"
            failed.append(fn)
-            return r
+            return "\033[31mFAILED\033[0m (unexpected success)"
        raise Exception("Unexpected success on %s" % fn)
-    r += "    Correct output (%5.1fms)" % (elapsed * 1000,)
+    r = ("Correct output (%5.1fms)" % (elapsed * 1000,),)
-    if check_stats:
+    if opts.check_stats:
        def noninit_count(s):
            return stats.get(s, 0) - stats.get("_init_" + s, 0)
-        for l in statchecks:
+        for l in opts.statchecks:
            test = eval(l)
            if not test:
-                if expected == "statfail":
+                if opts.expected == "statfail":
-                    r += "    (expected statfailure)"
+                    r += ("(expected statfailure)",)
                    break
                elif KEEP_GOING:
-                    r += "    \033[31mFailed statcheck\033[0m"
                    failed.append(fn)
-                    return r
+                    return r + ("\033[31mFailed statcheck\033[0m",)
                else:
                    m = re.match("""stats\[['"]([\w_]+)['"]]""", l)
                    if m:
@@ -319,17 +339,16 @@ def run_test(fn, check_stats, run_memcheck):
                    raise Exception((l, stats))
        else:
            # only can get here if all statchecks passed
-            if expected == "statfail":
+            if opts.expected == "statfail":
                if KEEP_GOING:
-                    r += "    \033[31mUnexpected statcheck success\033[0m"
                    failed.append(fn)
-                    return r
+                    return r + ("\033[31mUnexpected statcheck success\033[0m",)
                else:
                    raise Exception(("Unexpected statcheck success!", statchecks, stats))
    else:
-        r += "    (ignoring stats)"
+        r += ("(ignoring stats)",)
-    if run_memcheck:
+    if opts.run_memcheck:
        if code == 0:
            start = time.time()
            p = subprocess.Popen(["valgrind", "--tool=memcheck", "--leak-check=no"] + run_args, stdout=open("/dev/null", 'w'), stderr=subprocess.PIPE, stdin=open("/dev/null"))
@@ -337,16 +356,15 @@ def run_test(fn, check_stats, run_memcheck):
            assert p.wait() == 0
            if "Invalid read" not in err:
                elapsed = (time.time() - start)
-                r += "    Memcheck passed (%4.1fs)" % (elapsed,)
+                r += ("Memcheck passed (%4.1fs)" % (elapsed,),)
            else:
                if KEEP_GOING:
-                    r += "    \033[31mMEMCHECKS FAILED\033[0m"
                    failed.append(fn)
-                    return r
+                    return r + ("\033[31mMEMCHECKS FAILED\033[0m",)
                else:
                    raise Exception(err)
        else:
-            r += "    (Skipping memchecks)   "
+            r += ("(Skipping memchecks)",)
    return r
@@ -394,9 +412,13 @@ parser.add_argument('-t', '--time-limit', type=int, default=TIME_LIMIT,
                    help='set time limit in seconds for each test')
 parser.add_argument('-s', '--skip-tests', type=str, default='',
                    help='tests to skip (comma-separated)')
+parser.add_argument('-e', '--exit-code-only', action='store_true',
+                    help="only check exit code; don't run CPython to get expected output to compare against")
+parser.add_argument('--skip-failing', action='store_true',
+                    help="skip tests expected to fail")
 parser.add_argument('test_dir')
-parser.add_argument('patterns', nargs='*')
+parser.add_argument('pattern', nargs='*')
 def main(orig_dir):
    global KEEP_GOING
@@ -406,6 +428,8 @@ def main(orig_dir):
    global TEST_DIR
    global FN_JUST_SIZE
    global TESTS_TO_SKIP
+    global EXIT_CODE_ONLY
+    global SKIP_FAILING_TESTS
    run_memcheck = False
    start = 1
@@ -418,47 +442,21 @@ def main(orig_dir):
    EXTRA_JIT_ARGS += opts.extra_args
    TIME_LIMIT = opts.time_limit
    TESTS_TO_SKIP = opts.skip_tests.split(',')
+    EXIT_CODE_ONLY = opts.exit_code_only
+    SKIP_FAILING_TESTS = opts.skip_failing
    TEST_DIR = os.path.join(orig_dir, opts.test_dir)
-    patterns = opts.patterns
+    patterns = opts.pattern
    assert os.path.isdir(TEST_DIR), "%s doesn't look like a directory with tests in it" % TEST_DIR
-    TOSKIP = ["%s/%s.py" % (TEST_DIR, i) for i in (
+    if TEST_DIR.rstrip('/').endswith("cpython") and not EXIT_CODE_ONLY:
-        "tuple_depth",
+        print >>sys.stderr, "Test directory name ends in cpython; are you sure you don't want --exit-code-only?"
-        "longargs_stackusage",
-            )]
-    IGNORE_STATS = ["%s/%d.py" % (TEST_DIR, i) for i in (
+    # do we need this any more?
-        )] + [
+    IGNORE_STATS = ["%s/%d.py" % (TEST_DIR, i) for i in ()] + []
-        ]
-    def _addto(l, tests):
+    tests = [t for t in glob.glob("%s/*.py" % TEST_DIR)]
-        if isinstance(tests, str):
-            tests = [tests]
-        for t in tests:
-            l.append("%s/%s.py" % (TEST_DIR, t))
-    skip = functools.partial(_addto, TOSKIP)
-    if not patterns:
-        skip(["t", "t2"])
-    def tryParse(s):
-        if s.isdigit():
-            return int(s)
-        return s
-    def key(name):
-        i = tryParse(name)
-        if i < start:
-            return i + 100000
-        return i
-    tests = sorted([t for t in glob.glob("%s/*.py" % TEST_DIR)], key=lambda t:key(t[6:-3]))
-    tests += [
-            ]
-    big_tests = [
-            ]
-    tests += big_tests
    LIB_DIR = os.path.join(sys.prefix, "lib/python2.7")
    for t in tests:
@@ -471,18 +469,16 @@ def main(orig_dir):
           module_name in sys.builtin_module_names:
            raise Exception("Error: %s hides builtin module '%s'" % (t, module_name))
-    for t in TOSKIP:
-        assert t in ("%s/t.py" % TEST_DIR, "%s/t2.py" % TEST_DIR) or t in tests, t
    if patterns:
        filtered_tests = []
        for t in tests:
-            if any(re.match("%s/%s.*\.py" % (TEST_DIR, p), t) for p in patterns):
+            if any(re.match(os.path.join(TEST_DIR, p) + ".*\.py", t) for p in patterns):
                filtered_tests.append(t)
        tests = filtered_tests
    if not tests:
-        print >>sys.stderr, "No tests specified!"
+        print >>sys.stderr, "No tests matched the given patterns. OK by me!"
-        sys.exit(1)
+        # this can happen legitimately in e.g. `make check_test_foo` if test_foo.py is a CPython regression test.
+        sys.exit(0)
    FN_JUST_SIZE = max(20, 2 + max(len(os.path.basename(fn)) for fn in tests))
@@ -493,8 +489,6 @@ def main(orig_dir):
        tests.sort(key=fileSize)
    for fn in tests:
-        if fn in TOSKIP:
-            continue
        check_stats = fn not in IGNORE_STATS
        q.put((fn, check_stats, run_memcheck))
@@ -507,11 +501,6 @@ def main(orig_dir):
        q.put(None)
    for fn in tests:
-        if fn in TOSKIP:
-            print os.path.basename(fn).rjust(FN_JUST_SIZE),
-            print "   Skipping"
-            continue
        with cv:
            while fn not in results:
                try:
@@ -527,7 +516,11 @@ def main(orig_dir):
                print "(%s also failed)" % fn
            sys.exit(1)
            break
-        print results[fn]
+        name = os.path.basename(fn).rjust(FN_JUST_SIZE)
+        msgs = results[fn]
+        if isinstance(msgs,str):
+            msgs = [msgs]
+        print '    '.join([name] + list(msgs))
    for t in threads:
        t.join()