Merge pull request #542 from rntz/unwinder

unwinder

Merge pull request #542 from rntz/unwinder
unwinder
9de31610 · Kevin Modzelewski · 2c33f2ee · 1552ac34 · 9de31610 · 9de31610
Commit 9de31610 authored May 22, 2015 by Kevin Modzelewski
35 changed files
--- a/.gitmodules
+++ b/.gitmodules
 [submodule "libunwind"]
 	path = libunwind
 	url = git://git.sv.gnu.org/libunwind.git
+        ignore = all
 [submodule "libpypa"]
 	path = libpypa
 	url = git://github.com/vinzenz/libpypa.git

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,15 +71,22 @@ endif()

 execute_process(COMMAND cat llvm_revision.txt WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE LLVMREV OUTPUT_STRIP_TRAILING_WHITESPACE)

-# llvm and clang patches
+# llvm, clang, and libunwind patches
 add_custom_target(llvm_gotorev python ${CMAKE_SOURCE_DIR}/tools/git_svn_gotorev.py ${DEPS_DIR}/llvm-trunk ${LLVMREV} llvm_patches WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
 add_custom_target(clang_gotorev python ${CMAKE_SOURCE_DIR}/tools/git_svn_gotorev.py ${DEPS_DIR}/llvm-trunk/tools/clang ${LLVMREV} clang_patches WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
 add_custom_target(llvm_up DEPENDS llvm_gotorev clang_gotorev)

+set(LIBUNWIND_PATCHES
+  ${CMAKE_SOURCE_DIR}/libunwind_patches/0001-pyston-add-lots-of-comments.patch
+  ${CMAKE_SOURCE_DIR}/libunwind_patches/0002-pyston-stop-x86_64-setcontext-restoring-uninitialize.patch)
+
 add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/gitmodules
-                   COMMAND git submodule update --init WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+                   COMMAND git submodule update --init
+                   COMMAND python ${CMAKE_SOURCE_DIR}/tools/git_am_automated.py libunwind ${LIBUNWIND_PATCHES}
                   COMMAND cmake -E touch ${CMAKE_BINARY_DIR}/gitmodules
-                   DEPENDS ${CMAKE_SOURCE_DIR}/.gitmodules)
+                   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+                   DEPENDS ${CMAKE_SOURCE_DIR}/.gitmodules
+                   DEPENDS ${LIBUNWIND_PATCHES})
 add_custom_target(gitsubmodules DEPENDS ${CMAKE_BINARY_DIR}/gitmodules)

 # llvm
@@ -100,13 +107,17 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
  set(LIBUNWIND_DEBUG_CFLAGS "CFLAGS=-O0 -g")
  set(LIBUNWIND_DEBUG "--enable-debug")
  set(LIBUNWIND_DEBUG_FRAME "--enable-debug-frame")
+  set(LIBUNWIND_CONSERVATIVE_CHECKS "--enable-conservative-checks")
+else()
+  set(LIBUNWIND_CONSERVATIVE_CHECKS "--disable-conservative-checks")
 endif()
 ExternalProject_Add(libunwind
                    PREFIX libunwind
                    SOURCE_DIR ${CMAKE_SOURCE_DIR}/libunwind
+                    # TODO: more accurate DEPENDS - should depend on *contents* of libunwind/ source directory
                    DEPENDS gitsubmodules
                    UPDATE_COMMAND autoreconf -i
-                    CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/libunwind/configure ${LIBUNWIND_DEBUG_CFLAGS} --prefix=${CMAKE_BINARY_DIR}/libunwind --enable-shared=0 ${LIBUNWIND_DEBUG} ${LIBUNWIND_DEBUG_FRAME}
+                    CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/libunwind/configure ${LIBUNWIND_DEBUG_CFLAGS} --prefix=${CMAKE_BINARY_DIR}/libunwind --enable-shared=0 --disable-block-signals ${LIBUNWIND_CONSERVATIVE_CHECKS} ${LIBUNWIND_DEBUG} ${LIBUNWIND_DEBUG_FRAME}
                    LOG_UPDATE ON
                    LOG_CONFIGURE ON
                    LOG_BUILD ON

--- a/Makefile
+++ b/Makefile
+# prints variables for debugging
+print-%: ; @echo $($*)
+
 # Disable builtin rules:
 .SUFFIXES:

@@ -955,11 +958,11 @@ CMAKE_SHAREDMODS := sharedmods ext_pyston

 .PHONY: pyston_dbg pyston_release
 pyston_dbg: $(CMAKE_SETUP_DBG)
-	$(NINJA) -C $(HOME)/pyston-build-dbg pyston copy_stdlib copy_libpyston $(CMAKE_SHAREDMODS) ext_cpython $(NINJAFLAGS)
-	ln -sf $(HOME)/pyston-build-dbg/pyston pyston_dbg
+	$(NINJA) -C $(CMAKE_DIR_DBG) pyston copy_stdlib copy_libpyston $(CMAKE_SHAREDMODS) ext_cpython $(NINJAFLAGS)
+	ln -sf $(CMAKE_DIR_DBG)/pyston pyston_dbg
 pyston_release: $(CMAKE_SETUP_RELEASE)
-	$(NINJA) -C $(HOME)/pyston-build-release pyston copy_stdlib copy_libpyston $(CMAKE_SHAREDMODS) ext_cpython $(NINJAFLAGS)
-	ln -sf $(HOME)/pyston-build-release/pyston pyston_release
+	$(NINJA) -C $(CMAKE_DIR_RELEASE) pyston copy_stdlib copy_libpyston $(CMAKE_SHAREDMODS) ext_cpython $(NINJAFLAGS)
+	ln -sf $(CMAKE_DIR_RELEASE)/pyston pyston_release
 endif
 CMAKE_DIR_GCC := $(HOME)/pyston-build-gcc
 CMAKE_SETUP_GCC := $(CMAKE_DIR_GCC)/build.ninja

--- a/docs/EXCEPTION-SAFETY.md
+++ b/docs/EXCEPTION-SAFETY.md
+# Using exceptions safely in Pyston
+
+In addition to following general best practices for writing exception-safe C++, when writing Pyston there are a few special rules (because it has a custom unwinder):
+
+1. **Only throw `ExcInfo` values.** All Pyston exceptions are of type `ExcInfo`, which represents a Python exception. In fact, usually you should never `throw`; instead, call `raiseRaw`, `raiseExc`, `raise3`, or similar.
+
+2. **Always catch by value.** That is, always write:
+
+   ```c++
+   try { ... } catch (ExcInfo e) { ... } // Do this!
+   ```
+
+   And **never** write:
+
+   ```c++
+   try { ... } catch (ExcInfo& e) { ... } // DO NOT DO THIS!
+   ```
+
+   The reason for this has to do with the way exceptions are stored in thread-local storage in Pyston; see `docs/UNWINDING.md` for the gory details.
+
+3. **Never rethrow with bare `throw;`.** Instead, write `throw e;`, where `e` is the exception you caught previously.
+
+4. **Never invoke the GC from a destructor.** The GC is not currently aware of the place the exception-currently-being-unwound is stored. Invoking the GC from a destructor might collect the exception, producing a use-after-free bug!
+
+5. **Never throw an exception inside a destructor.** This is a general rule in C++ anyways, but worth reiterating here. In fact, don't even invoke code that *throws an exception but handles it*! This, again, has to do with the way exceptions are stored.
+
+6. **Don't throw exceptions inside signal handlers.** It should be okay if you throw an exception and *always* catch it inside the handler, but I haven't tested this. In theory the exception should just unwind through the signal frame, and libunwind will take care of resetting the signal mask. However, as this codepath hasn't been tested, it's best avoided.
+
+Most of these restrictions could be eliminated in principle. See `docs/UNWINDING.md` for the gory details.
--- a/docs/UNWINDING.md
+++ b/docs/UNWINDING.md
--- a/from_cpython/Include/dictobject.h
+++ b/from_cpython/Include/dictobject.h
@@ -94,7 +94,8 @@ struct _dictobject {
 #endif
 typedef struct {
    PyObject_HEAD;
-    char _filler[48];
+    char _filler[48];           // gcc 4.8
+    // char _filler[56];           // gcc 4.9
 } PyDictObject;

 // Pyston change: these are no longer static objects:

--- a/from_cpython/Include/object.h
+++ b/from_cpython/Include/object.h
@@ -454,7 +454,9 @@ struct _typeobject {

    void* _hcls;
    void* _hcattrs;
-    char _dep_getattrs[56]; // FIXME: this is hardcoding the size of this particular implementation of std::unordered_map
+    // FIXME: this is hardcoding the size of this particular implementation of std::unordered_map
+    char _dep_getattrs[56];     // gcc 4.8
+    // char _dep_getattrs[64];     // gcc 4.9
    char _ics[32];
    void* _gcvisit_func;
    void* _dtor;

--- a/libunwind_patches/0001-Change-the-RBP-validation-heuristic-to-allow-size-0-.patch
+++ b/libunwind_patches/0001-Change-the-RBP-validation-heuristic-to-allow-size-0-.patch
-From e1d7c78d95e4b73a311f10149d0a54547d307d5d Mon Sep 17 00:00:00 2001
-From: Kevin Modzelewski <kmod@dropbox.com>
-Date: Tue, 22 Apr 2014 15:50:40 -0700
-Subject: [PATCH] Change the RBP-validation heuristic to allow size-0 call frames
-
---
- include/libunwind.h |    3 +++
- src/x86_64/Gstep.c  |    2 +-
- 2 files changed, 4 insertions(+), 1 deletions(-)
-
-diff --git a/include/libunwind.h b/include/libunwind.h
-index d11c823..d9a5f03 100644
--- a/include/libunwind.h
-+++ b/include/libunwind.h
-@@ -30,3 +30,6 @@
- # include "libunwind-x86_64.h"
- 
- #endif /* UNW_REMOTE_ONLY */
-+
-+#define LIBUNWIND_PYSTON_PATCH_VERSION 0x01
-+
-diff --git a/src/x86_64/Gstep.c b/src/x86_64/Gstep.c
-index 9fa0967..809d60b 100644
--- a/src/x86_64/Gstep.c
-+++ b/src/x86_64/Gstep.c
-@@ -173,7 +173,7 @@ unw_step (unw_cursor_t *cursor)
- 		 anything about new RBP (rbp1) since it may not be a frame
- 		 pointer in the frame above.  Just check we get the value. */
-               if (ret < 0
-		  || rbp <= c->dwarf.cfa
-+		  || rbp < c->dwarf.cfa
- 		  || (rbp - c->dwarf.cfa) > 0x4000)
- 	        {
-                   rip_loc = DWARF_NULL_LOC;
-- 
-1.7.4.1
-
--- a/libunwind_patches/0001-pyston-add-lots-of-comments.patch
+++ b/libunwind_patches/0001-pyston-add-lots-of-comments.patch
+From 851b35ec5f1e27273fcf271e94364ced31baa2b5 Mon Sep 17 00:00:00 2001
+From: Michael Arntzenius <daekharel@gmail.com>
+Date: Mon, 18 May 2015 17:47:38 -0700
+Subject: [PATCH 1/2] pyston: add lots of comments
+
+---
+ src/dwarf/Gparser.c     | 3 +++
+ src/mi/Gdyn-extract.c   | 3 ++-
+ src/mi/Gget_proc_name.c | 3 +++
+ src/x86_64/Gstep.c      | 2 +-
+ 4 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
+index fefd809..a5dd943 100644
+--- a/src/dwarf/Gparser.c
+++ b/src/dwarf/Gparser.c
+@@ -429,6 +429,7 @@ fetch_proc_info (struct dwarf_cursor *c, unw_word_t ip, int need_unwind_info)
+   memset (&c->pi, 0, sizeof (c->pi));
+ 
+   /* check dynamic info first --- it overrides everything else */
+  /* TODO rntz: this might be slow when there are lots of jitted functions */
+   ret = unwi_find_dynamic_proc_info (c->as, ip, &c->pi, need_unwind_info,
+ 				     c->as_arg);
+   if (ret == -UNW_ENOINFO)
+@@ -543,6 +544,7 @@ get_rs_cache (unw_addr_space_t as, intrmask_t *saved_maskp)
+       lock_acquire (&cache->lock, *saved_maskp);
+     }
+ 
+  /* XXX rntz: this looks dangerous. why does it need to be atomic? */
+   if (atomic_read (&as->cache_generation) != atomic_read (&cache->generation))
+     {
+       flush_rs_cache (cache);
+@@ -921,6 +923,7 @@ dwarf_create_state_record (struct dwarf_cursor *c, dwarf_state_record_t *sr)
+ HIDDEN int
+ dwarf_make_proc_info (struct dwarf_cursor *c)
+ {
+    /* TODO rntz: why is this #if 0'd? does the cache not work? check upstream. */
+ #if 0
+   if (c->as->caching_policy == UNW_CACHE_NONE
+       || get_cached_proc_info (c) < 0)
+diff --git a/src/mi/Gdyn-extract.c b/src/mi/Gdyn-extract.c
+index 5258839..12ba31f 100644
+--- a/src/mi/Gdyn-extract.c
+++ b/src/mi/Gdyn-extract.c
+@@ -33,7 +33,8 @@ unwi_extract_dynamic_proc_info (unw_addr_space_t as, unw_word_t ip,
+   pi->start_ip = di->start_ip;
+   pi->end_ip = di->end_ip;
+   pi->gp = di->gp;
+-  pi->format = di->format;
+  pi->format = di->format;      /* XXX rntz: is this wrong? */
+  /* This is the point at which we can end up knowing we'll return a non-UNW_INFO_FORMAT_DYNAMIC unwind_info field. */
+   switch (di->format)
+     {
+     case UNW_INFO_FORMAT_DYNAMIC:
+diff --git a/src/mi/Gget_proc_name.c b/src/mi/Gget_proc_name.c
+index 7251c59..485346c 100644
+--- a/src/mi/Gget_proc_name.c
+++ b/src/mi/Gget_proc_name.c
+@@ -55,9 +55,12 @@ get_proc_name (unw_addr_space_t as, unw_word_t ip,
+ 
+   buf[0] = '\0';	/* always return a valid string, even if it's empty */
+ 
+  /* FIXME rntz: this ends up copying a dwarf_cie_info pi.unwind_info,
+   * and then reading it back as an unw_dyn_info_t! */
+   ret = unwi_find_dynamic_proc_info (as, ip, &pi, 1, arg);
+   if (ret == 0)
+     {
+      assert(pi.format == UNW_INFO_FORMAT_DYNAMIC); /* FIXME rntz: handle this being false. */
+       unw_dyn_info_t *di = pi.unwind_info;
+ 
+       if (offp)
+diff --git a/src/x86_64/Gstep.c b/src/x86_64/Gstep.c
+index 809d60b..e4312af 100644
+--- a/src/x86_64/Gstep.c
+++ b/src/x86_64/Gstep.c
+@@ -158,7 +158,7 @@ unw_step (unw_cursor_t *cursor)
+ 	    }
+ 	  else
+ 	    {
+-	      unw_word_t rbp1 = 0;
+	      unw_word_t rbp1 = 0; /* might want to put an assert here to check for guessing */
+ 	      rbp_loc = DWARF_LOC(rbp, 0);
+ 	      rsp_loc = DWARF_NULL_LOC;
+ 	      rip_loc = DWARF_LOC (rbp + 8, 0);
+-- 
+2.1.0
+
--- a/libunwind_patches/0002-pyston-stop-x86_64-setcontext-restoring-uninitialize.patch
+++ b/libunwind_patches/0002-pyston-stop-x86_64-setcontext-restoring-uninitialize.patch
+From 3faf9111fa09e26209eb01091a8ad61c28ae6197 Mon Sep 17 00:00:00 2001
+From: Michael Arntzenius <daekharel@gmail.com>
+Date: Tue, 19 May 2015 14:11:27 -0700
+Subject: [PATCH 2/2] pyston: stop x86_64 setcontext() restoring
+ (uninitialized) signal mask
+
+---
+ src/x86_64/setcontext.S | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/src/x86_64/setcontext.S b/src/x86_64/setcontext.S
+index 1af8b67..7bee005 100644
+--- a/src/x86_64/setcontext.S
+++ b/src/x86_64/setcontext.S
+@@ -47,6 +47,14 @@ _Ux86_64_setcontext:
+ #if defined __linux__
+ 	/* restore signal mask
+            sigprocmask(SIG_SETMASK, ucp->uc_sigmask, NULL, sizeof(sigset_t)) */
+
+	/* PYSTON CHANGE: for some reason, libunwind restores the signal mask
+	 * in _Ux86_64_setcontext() even though _Ux86_64_getcontext doesn't
+	 * initialize it! This sets our signal mask to random stack garbage,
+	 * so I've commented it out. - rntz
+	 */
+
+	/*
+ 	push %rdi
+ 	mov $__NR_rt_sigprocmask, %rax
+ 	lea UC_SIGMASK(%rdi), %rsi
+@@ -55,21 +63,29 @@ _Ux86_64_setcontext:
+ 	mov $SIGSET_BYTE_SIZE, %r10
+ 	syscall
+ 	pop %rdi
+	*/
+ 
+         /* restore fp state */
+ 	mov    UC_MCONTEXT_FPREGS_PTR(%rdi),%r8
+ 	fldenv (%r8)
+ 	ldmxcsr FPREGS_OFFSET_MXCSR(%r8)
+ #elif defined __FreeBSD__
+	/* PYSTON CHANGE */
+#error Pyston doesn't support FreeBSD yet.
+ 	/* restore signal mask */
+	/* PYSTON CHANGE: Commented out for same reason as the linux code
+	 * above, but I haven't tested this one. Use at your own risk. - rntz
+	 */
+	/*
+ 	pushq	%rdi
+ 	xorl	%edx,%edx
+ 	leaq	UC_SIGMASK(%rdi),%rsi
+-	movl	$3,%edi/* SIG_SETMASK */
+	movl	$3,%edi/\* SIG_SETMASK *\/
+ 	movl	$SYS_sigprocmask,%eax
+ 	movq	%rcx,%r10
+ 	syscall
+ 	popq	%rdi
+	*/
+ 
+ 	/* restore fp state */
+ 	cmpq $UC_MCONTEXT_FPOWNED_FPU,UC_MCONTEXT_OWNEDFP(%rdi)
+-- 
+2.1.0
+
--- a/microbenchmarks/exceptions_2_ubench.py
+++ b/microbenchmarks/exceptions_2_ubench.py
+NUM_ITERS = 100 * 1000
+WRAPPER_DEPTH = 10
+RECURSE_DEPTH = 0
+TRACEBACK_DEPTH = 0
+
+counter = 0
+
+def gtor():
+    yield 1
+    raise Exception('bad wrong')
+    yield 2
+
+def wrapper(n=WRAPPER_DEPTH):
+    global counter
+    if n:
+        try:
+            wrapper(n-1)
+        finally:
+            counter += 1
+    else:
+        for x in gtor():
+            pass
+
+def recurser(n=RECURSE_DEPTH):
+    if n:
+        return recurser(n-1)
+    else:
+        return wrapper()
+
+def f(niters, traceback_depth=TRACEBACK_DEPTH):
+    global counter
+    if traceback_depth:
+        f(niters, traceback_depth - 1)
+    else:
+        for i in xrange(niters):
+            try:
+                recurser()
+            except Exception:
+                counter = 0
+
+f(NUM_ITERS)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -68,6 +68,7 @@ add_library(PYSTON_OBJECTS OBJECT ${OPTIONAL_SRCS}
 		core/stats.cpp
 		core/threading.cpp
 		core/util.cpp
+		deadlock_debug_helper.cpp
 		gc/collector.cpp
 		gc/gc_alloc.cpp
 		gc/heap.cpp
@@ -83,6 +84,7 @@ add_library(PYSTON_OBJECTS OBJECT ${OPTIONAL_SRCS}
 		runtime/code.cpp
 		runtime/complex.cpp
 		runtime/ctxswitching.S
+		runtime/cxx_unwind.cpp
 		runtime/descr.cpp
 		runtime/dict.cpp
 		runtime/file.cpp

--- a/src/asm_writing/icinfo.h
+++ b/src/asm_writing/icinfo.h
@@ -90,6 +90,7 @@ private:
    // This is probably a bunch worse than LRU, but it's also
    // probably a bunch better than the "always evict slot #0" policy
    // that it's replacing.
+    // TODO: experiment with different IC eviction strategies.
    int next_slot_to_try;

    const StackInfo stack_info;

--- a/src/codegen/ast_interpreter.cpp
+++ b/src/codegen/ast_interpreter.cpp
@@ -76,7 +76,12 @@ public:

    void initArguments(int nargs, BoxedClosure* closure, BoxedGenerator* generator, Box* arg1, Box* arg2, Box* arg3,
                       Box** args);
-    static Value execute(ASTInterpreter& interpreter, CFGBlock* start_block = NULL, AST_stmt* start_at = NULL);
+
+    // This must not be inlined, because we rely on being able to detect when we're inside of it (by checking whether
+    // %rip is inside its instruction range) during a stack-trace in order to produce tracebacks inside interpreted
+    // code.
+    __attribute__((__no_inline__)) static Value
+        execute(ASTInterpreter& interpreter, CFGBlock* start_block = NULL, AST_stmt* start_at = NULL);

 private:
    Box* createFunction(AST* node, AST_arguments* args, const std::vector<AST_stmt*>& body);
@@ -274,6 +279,9 @@ void ASTInterpreter::initArguments(int nargs, BoxedClosure* _closure, BoxedGener
    }
 }

+// Map from stack frame pointers for frames corresponding to ASTInterpreter::execute() to the ASTInterpreter handling
+// them. Used to look up information about that frame. This is used for getting tracebacks, for CPython introspection
+// (sys._getframe & co), and for GC scanning.
 static std::unordered_map<void*, ASTInterpreter*> s_interpreterMap;
 static_assert(THREADING_USE_GIL, "have to make the interpreter map thread safe!");


--- a/src/codegen/irgen/irgenerator.cpp
+++ b/src/codegen/irgen/irgenerator.cpp
@@ -1855,6 +1855,7 @@ private:
        static const std::string newline_str("\n");
        static const std::string space_str(" ");

+        // TODO: why are we inline-generating all this code instead of just emitting a call to some runtime function?
        int nvals = node->values.size();
        for (int i = 0; i < nvals; i++) {
            CompilerVariable* var = evalExpr(node->values[i], unw_info);

--- a/src/codegen/unwinding.cpp
+++ b/src/codegen/unwinding.cpp
@@ -59,14 +59,18 @@ namespace pyston {

 // Parse an .eh_frame section, and construct a "binary search table" such as you would find in a .eh_frame_hdr section.
 // Currently only supports .eh_frame sections with exactly one fde.
-void parseEhFrame(uint64_t start_addr, uint64_t size, uint64_t* out_data, uint64_t* out_len) {
+// See http://www.airs.com/blog/archives/460 for some useful info.
+void parseEhFrame(uint64_t start_addr, uint64_t size, uint64_t func_addr, uint64_t* out_data, uint64_t* out_len) {
+    // NB. according to sully@msully.net, this is not legal C++ b/c type-punning through unions isn't allowed.
+    // But I can't find a compiler flag that warns on it, and it seems to work.
    union {
        uint8_t* u8;
        uint32_t* u32;
    };
    u32 = (uint32_t*)start_addr;

-    int cie_length = *u32;
+    int32_t cie_length = *u32;
+    assert(cie_length != 0xffffffff); // 0xffffffff would indicate a 64-bit DWARF format
    u32++;

    assert(*u32 == 0); // CIE ID
@@ -80,13 +84,37 @@ void parseEhFrame(uint64_t start_addr, uint64_t size, uint64_t* out_data, uint64

    int nentries = 1;
    uw_table_entry* table_data = new uw_table_entry[nentries];
-    table_data->start_ip_offset = 0;
+    table_data->start_ip_offset = func_addr - start_addr;
    table_data->fde_offset = 4 + cie_length;

    *out_data = (uintptr_t)table_data;
    *out_len = nentries;
 }

+void registerDynamicEhFrame(uint64_t code_addr, size_t code_size, uint64_t eh_frame_addr, size_t eh_frame_size) {
+    unw_dyn_info_t* dyn_info = new unw_dyn_info_t();
+    dyn_info->start_ip = code_addr;
+    dyn_info->end_ip = code_addr + code_size;
+    // TODO: It's not clear why we use UNW_INFO_FORMAT_REMOTE_TABLE instead of UNW_INFO_FORMAT_TABLE. kmod reports that
+    // he tried FORMAT_TABLE and it didn't work, but it wasn't clear why. However, using FORMAT_REMOTE_TABLE forces
+    // indirection through an access_mem() callback, and indeed, a function named access_mem() shows up in our `perf`
+    // results! So it's possible there's a performance win lurking here.
+    dyn_info->format = UNW_INFO_FORMAT_REMOTE_TABLE;
+
+    dyn_info->u.rti.name_ptr = 0;
+    dyn_info->u.rti.segbase = eh_frame_addr;
+    parseEhFrame(eh_frame_addr, eh_frame_size, code_addr, &dyn_info->u.rti.table_data, &dyn_info->u.rti.table_len);
+
+    if (VERBOSITY() >= 2)
+        printf("dyn_info = %p, table_data = %p\n", dyn_info, (void*)dyn_info->u.rti.table_data);
+    _U_dyn_register(dyn_info);
+
+    // TODO: it looks like libunwind does a linear search over anything dynamically registered,
+    // as opposed to the binary search it can do within a dyn_info.
+    // If we're registering a lot of dyn_info's, it might make sense to coalesce them into a single
+    // dyn_info that contains a binary search table.
+}
+
 class CFRegistry {
 private:
    std::vector<CompiledFunction*> cfs;
@@ -156,55 +184,59 @@ public:

        assert(g.cur_cf);

-        llvm_error_code ec;
+        uint64_t func_addr = 0; // remains 0 until we find a function
+
+        // Search through the symbols to find the function that got JIT'ed.
+        // (We only JIT one function at a time.)
        for (const auto& sym : Obj.symbols()) {
            llvm::object::SymbolRef::Type SymType;
-            if (sym.getType(SymType))
+            if (sym.getType(SymType) || SymType != llvm::object::SymbolRef::ST_Function)
+                continue;
+
+            llvm::StringRef Name;
+            uint64_t Size;
+            if (sym.getName(Name) || sym.getSize(Size))
                continue;
-            if (SymType == llvm::object::SymbolRef::ST_Function) {
-                llvm::StringRef Name;
-                uint64_t Addr;
-                uint64_t Size;
-                if (sym.getName(Name))
-                    continue;
-                Addr = L.getSymbolLoadAddress(Name);
-                assert(Addr);
-                if (sym.getSize(Size))
-                    continue;
+
+            // Found a function!
+            assert(!func_addr);
+            func_addr = L.getSymbolLoadAddress(Name);
+            assert(func_addr);

 // TODO this should be the Python name, not the C name:
 #if LLVMREV < 208921
-                llvm::DILineInfoTable lines = Context->getLineInfoForAddressRange(
-                    Addr, Size, llvm::DILineInfoSpecifier::FunctionName | llvm::DILineInfoSpecifier::FileLineInfo
-                                    | llvm::DILineInfoSpecifier::AbsoluteFilePath);
+            llvm::DILineInfoTable lines = Context->getLineInfoForAddressRange(
+                func_addr, Size, llvm::DILineInfoSpecifier::FunctionName | llvm::DILineInfoSpecifier::FileLineInfo
+                                     | llvm::DILineInfoSpecifier::AbsoluteFilePath);
 #else
-                llvm::DILineInfoTable lines = Context->getLineInfoForAddressRange(
-                    Addr, Size, llvm::DILineInfoSpecifier(llvm::DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
-                                                          llvm::DILineInfoSpecifier::FunctionNameKind::LinkageName));
+            llvm::DILineInfoTable lines = Context->getLineInfoForAddressRange(
+                func_addr, Size,
+                llvm::DILineInfoSpecifier(llvm::DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+                                          llvm::DILineInfoSpecifier::FunctionNameKind::LinkageName));
 #endif
-                if (VERBOSITY() >= 3) {
-                    for (int i = 0; i < lines.size(); i++) {
-                        printf("%s:%d, %s: %lx\n", lines[i].second.FileName.c_str(), lines[i].second.Line,
-                               lines[i].second.FunctionName.c_str(), lines[i].first);
-                    }
+            if (VERBOSITY() >= 3) {
+                for (int i = 0; i < lines.size(); i++) {
+                    printf("%s:%d, %s: %lx\n", lines[i].second.FileName.c_str(), lines[i].second.Line,
+                           lines[i].second.FunctionName.c_str(), lines[i].first);
                }
-
-                assert(g.cur_cf->code_start == 0);
-                g.cur_cf->code_start = Addr;
-                g.cur_cf->code_size = Size;
-                cf_registry.registerCF(g.cur_cf);
            }
+
+            assert(g.cur_cf->code_start == 0);
+            g.cur_cf->code_start = func_addr;
+            g.cur_cf->code_size = Size;
+            cf_registry.registerCF(g.cur_cf);
        }

-        // Currently-unused libunwind support:
-        llvm_error_code code;
+        assert(func_addr);
+
+        // Libunwind support:
        bool found_text = false, found_eh_frame = false;
        uint64_t text_addr = -1, text_size = -1;
        uint64_t eh_frame_addr = -1, eh_frame_size = -1;

        for (const auto& sec : Obj.sections()) {
            llvm::StringRef name;
-            code = sec.getName(name);
+            llvm_error_code code = sec.getName(name);
            assert(!code);

            uint64_t addr, size;
@@ -229,24 +261,9 @@ public:

        assert(found_text);
        assert(found_eh_frame);
+        assert(text_addr == func_addr);

-        unw_dyn_info_t* dyn_info = new unw_dyn_info_t();
-        dyn_info->start_ip = text_addr;
-        dyn_info->end_ip = text_addr + text_size;
-        dyn_info->format = UNW_INFO_FORMAT_REMOTE_TABLE;
-
-        dyn_info->u.rti.name_ptr = 0;
-        dyn_info->u.rti.segbase = eh_frame_addr;
-        parseEhFrame(eh_frame_addr, eh_frame_size, &dyn_info->u.rti.table_data, &dyn_info->u.rti.table_len);
-
-        if (VERBOSITY() >= 2)
-            printf("dyn_info = %p, table_data = %p\n", dyn_info, (void*)dyn_info->u.rti.table_data);
-        _U_dyn_register(dyn_info);
-
-        // TODO: it looks like libunwind does a linear search over anything dynamically registered,
-        // as opposed to the binary search it can do within a dyn_info.
-        // If we're registering a lot of dyn_info's, it might make sense to coalesce them into a single
-        // dyn_info that contains a binary search table.
+        registerDynamicEhFrame(text_addr, text_size, eh_frame_addr, eh_frame_size);
    }
 };

@@ -513,6 +530,36 @@ static const LineInfo* lineInfoForFrame(PythonFrameIteratorImpl& frame_it) {
    return new LineInfo(current_stmt->lineno, current_stmt->col_offset, source->fn, source->getName());
 }

+// To produce a traceback, we:
+//
+// 1. Use libunwind to produce a cursor into our stack.
+//
+// 2. Grab the next frame in the stack and check what function it is from. There are four options:
+//
+//    (a) A JIT-compiled Python function.
+//    (b) ASTInterpreter::execute() in codegen/ast_interpreter.cpp.
+//    (c) generatorEntry() in runtime/generator.cpp.
+//    (d) Something else.
+//
+//    By cases:
+//
+//    (2a, 2b) If the previous frame we visited was an OSR frame (which we know from its CompiledFunction*), then we
+//    skip this frame (it's the frame we replaced on-stack) and keep unwinding. (FIXME: Why are we guaranteed that we
+//    on-stack-replaced at most one frame?) Otherwise, we found a frame for our traceback! Proceed to step 3.
+//
+//    (2c) Continue unwinding in the stack of whatever called the generator. This involves some hairy munging of
+//    undocumented fields in libunwind structs to swap the context.
+//
+//    (2d) Ignore it and keep unwinding. It's some C or C++ function that we don't want in our traceback.
+//
+// 3. We've found a frame for our traceback, along with a CompiledFunction* and some other information about it.
+//
+//    We grab the current statement it is in (as an AST_stmt*) and use it and the CompiledFunction*'s source info to
+//    produce the line information for the traceback. For JIT-compiled functions, getting the statement involves the
+//    CF's location_map.
+//
+// 4. Unless we've hit the end of the stack, go to 2 and keep unwinding.
+//
 static StatCounter us_gettraceback("us_gettraceback");
 BoxedTraceback* getTraceback() {
    STAT_TIMER(t0, "us_timer_gettraceback");

--- a/src/codegen/unwinding.h
+++ b/src/codegen/unwinding.h
@@ -27,9 +27,12 @@ class BoxedModule;
 class BoxedTraceback;
 struct FrameInfo;

+void registerDynamicEhFrame(uint64_t code_addr, size_t code_size, uint64_t eh_frame_addr, size_t eh_frame_size);
+
 BoxedModule* getCurrentModule();
 Box* getGlobals();     // returns either the module or a globals dict
 Box* getGlobalsDict(); // always returns a dict-like object
+CompiledFunction* getCFForAddress(uint64_t addr);

 BoxedTraceback* getTraceback();


--- a/src/core/common.h
+++ b/src/core/common.h
@@ -41,17 +41,6 @@

 #define ARRAY_LEN(arr) (sizeof(arr) / sizeof((arr)[0]))

-// GCC and clang handle always_inline very differently;
-// we mostly only care about it for the stdlib, so just remove the attributes
-// if we're not in clang
-#ifdef __clang__
-#define ALWAYSINLINE __attribute__((always_inline))
-#define NOINLINE __attribute__((noinline))
-#else
-#define ALWAYSINLINE
-#define NOINLINE
-#endif
-
 #if LLVMREV < 210783
 #define llvm_error_code llvm::error_code
 #else

--- a/src/core/options.h
+++ b/src/core/options.h
@@ -20,7 +20,7 @@ namespace pyston {
 extern "C" {

 extern int GLOBAL_VERBOSITY;
-#define VERBOSITY(x) GLOBAL_VERBOSITY
+#define VERBOSITY(x) pyston::GLOBAL_VERBOSITY
 extern int PYSTON_VERSION_MAJOR, PYSTON_VERSION_MINOR;
 // Version number we're targeting:
 extern int PYTHON_VERSION_MAJOR, PYTHON_VERSION_MINOR, PYTHON_VERSION_MICRO, PYTHON_VERSION_HEX;

--- a/src/core/thread_utils.h
+++ b/src/core/thread_utils.h
@@ -52,11 +52,20 @@ public:

 class PthreadFastMutex {
 private:
+    // NB. I tried using error-checking mutexes (PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP) here in debug-mode but got
+    // some funky errors. I think we might be deliberately locking/unlocking mutexes on different threads in some
+    // circumstances. - rntz
    pthread_mutex_t mutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP;

 public:
-    void lock() { pthread_mutex_lock(&mutex); }
-    void unlock() { pthread_mutex_unlock(&mutex); }
+    void lock() {
+        int err = pthread_mutex_lock(&mutex);
+        ASSERT(!err, "pthread_mutex_lock failed, error code %d", err);
+    }
+    void unlock() {
+        int err = pthread_mutex_unlock(&mutex);
+        ASSERT(!err, "pthread_mutex_unlock failed, error code %d", err);
+    }

    PthreadFastMutex* asRead() { return this; }
    PthreadFastMutex* asWrite() { return this; }
@@ -64,11 +73,18 @@ public:

 class PthreadMutex {
 private:
+    // Ditto comment in PthreadFastMutex re error-checking mutexes. - rntz
    pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

 public:
-    void lock() { pthread_mutex_lock(&mutex); }
-    void unlock() { pthread_mutex_unlock(&mutex); }
+    void lock() {
+        int err = pthread_mutex_lock(&mutex);
+        ASSERT(!err, "pthread_mutex_lock failed, error code %d", err);
+    }
+    void unlock() {
+        int err = pthread_mutex_unlock(&mutex);
+        ASSERT(!err, "pthread_mutex_unlock failed, error code %d", err);
+    }

    PthreadMutex* asRead() { return this; }
    PthreadMutex* asWrite() { return this; }

--- a/src/core/threading.cpp
+++ b/src/core/threading.cpp
@@ -489,6 +489,13 @@ extern "C" void PyEval_ReInitThreads() noexcept {
        }
    }

+    // We need to make sure the threading lock is released, so we unconditionally unlock it. After a fork, we are the
+    // only thread, so this won't race; and since it's a "fast" mutex (see `man pthread_mutex_lock`), this works even
+    // if it isn't locked. If we needed to avoid unlocking a non-locked mutex, though, we could trylock it first:
+    //
+    //     int err = pthread_mutex_trylock(&threading_lock.mutex);
+    //     ASSERT(!err || err == EBUSY, "pthread_mutex_trylock failed, but not with EBUSY");
+    //
    threading_lock.unlock();

    num_starting_threads = 0;

--- a/src/core/types.h
+++ b/src/core/types.h
@@ -689,6 +689,8 @@ struct FrameInfo {
    // In Pyston, exc is the frame-local value of sys.exc_info.
    // - This makes frame entering+leaving faster at the expense of slower exceptions.
    //
+    // TODO: do we want exceptions to be slower? benchmark this!
+    //
    // exc.type is initialized to NULL at function entry, and exc.value and exc.tb are left
    // uninitialized.  When one wants to access any of the values, you need to check if exc.type
    // is NULL, and if so crawl up the stack looking for the first frame with a non-null exc.type

--- a/src/core/util.cpp
+++ b/src/core/util.cpp
@@ -41,8 +41,13 @@ uint64_t getCPUTicks() {
    return rdtsc();
 }

+#if !DISABLE_TIMERS
+
 int Timer::level = 0;

+Timer::Timer(long min_usec) : min_usec(min_usec), ended(true) {
+}
+
 Timer::Timer(const char* desc, long min_usec) : min_usec(min_usec), ended(true) {
    restart(desc);
 }
@@ -101,6 +106,8 @@ Timer::~Timer() {
    }
 }

+#endif // !DISABLE_TIMERS
+
 bool startswith(const std::string& s, const std::string& pattern) {
    if (pattern.size() > s.size())
        return false;

--- a/src/core/util.h
+++ b/src/core/util.h
@@ -26,6 +26,9 @@ namespace pyston {

 uint64_t getCPUTicks();

+#define DISABLE_TIMERS 0
+
+#if !DISABLE_TIMERS
 class Timer {
 private:
    static int level;
@@ -36,7 +39,9 @@ private:
    std::function<void(uint64_t)> exit_callback;

 public:
+    // Timers with non-NULL desc will print times longer than min_usec for debugging when VERBOSITY("time") >= 2
    Timer(const char* desc = NULL, long min_usec = -1);
+    Timer(long min_usec); // doesn't start the timer
    ~Timer();

    void setExitCallback(std::function<void(uint64_t)> _exit_callback) { exit_callback = _exit_callback; }
@@ -56,6 +61,23 @@ public:
    uint64_t getStartTime() const { return start_time; }
 };

+#else // DISABLE_TIMERS
+class Timer {
+public:
+    Timer(const char* desc = NULL, long min_usec = -1) {}
+    Timer(long min_usec) {}
+
+    void setExitCallback(std::function<void(uint64_t)> _exit_callback) {}
+
+    void restart(const char* newdesc, long new_min_usec) {}
+    void restart(const char* newdesc = NULL) {}
+
+    long end() { return 0; }
+    long split(const char* newdesc = NULL) { return 0; }
+};
+
+#endif // #else DISABLE_TIMERS
+
 bool startswith(const std::string& s, const std::string& pattern);
 bool endswith(const std::string& s, const std::string& pattern);


--- a/src/deadlock_debug_helper.cpp
+++ b/src/deadlock_debug_helper.cpp
+// Copyright (c) 2014-2015 Dropbox, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file is a hack for debugging deadlocks. It makes pthread_mutex_lock() complain if it takes more than given time
+// (TIMEOUT_S) to grab a lock. Perhaps it will be useful in future.
+
+#if 0 // set to 1 to enable
+
+#include <errno.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "core/common.h"
+
+#define TIMEOUT_S 2
+
+extern "C" int pthread_mutex_lock(pthread_mutex_t* lock) {
+    struct timespec timeout;
+    memset(&timeout, 0, sizeof timeout);
+    timeout.tv_sec = TIMEOUT_S;
+
+    pid_t tid = syscall(SYS_gettid);
+    RELEASE_ASSERT(tid > 1, "negative or invalid TID");
+
+    time_t started = time(NULL);
+    RELEASE_ASSERT(started != (time_t)-1, "could not get time()");
+
+    int err;
+    for (;;) {
+        err = pthread_mutex_timedlock(lock, &timeout);
+        if (err != ETIMEDOUT)
+            break;
+        time_t now = time(NULL);
+        RELEASE_ASSERT(now != (time_t)-1, "could not get time()");
+        if (now - started >= TIMEOUT_S) {
+            printf("%d: mutex %p TIMED OUT\n", tid, (void*)lock);
+            started = now;
+        }
+    }
+    RELEASE_ASSERT(!err, "could not lock mutex, error %d", err);
+    return err;
+}
+
+#endif
--- a/src/runtime/builtin_modules/builtins.cpp
+++ b/src/runtime/builtin_modules/builtins.cpp
@@ -213,7 +213,7 @@ extern "C" Box* next(Box* iterator, Box* _default) {
    } catch (ExcInfo e) {
        if (_default && e.matches(StopIteration))
            return _default;
-        throw;
+        throw e;
    }
 }

@@ -877,6 +877,7 @@ Box* print(BoxedTuple* args, BoxedDict* kwargs) {
    Box* space_box = boxStrConstant(" ");

    // TODO softspace handling?
+    // TODO: duplicates code with ASTInterpreter::visit_print()
    bool first = true;
    for (auto e : *args) {
        BoxedString* s = str(e);

--- a/src/runtime/builtin_modules/thread.cpp
+++ b/src/runtime/builtin_modules/thread.cpp
@@ -88,6 +88,9 @@ Box* startNewThread(Box* target, Box* args, Box* kw) {
 * codes in the return value of the sem_ calls (like the pthread_ functions).
 * Correct implementations return -1 and put the code in errno. This supports
 * either.
+ *
+ * NOTE (2015-05-14): According to `man pthread_mutex_lock` on my system (Ubuntu
+ * 14.10), returning the error code is expected behavior. - rntz
 */
 static int fix_status(int status) {
    return (status == -1) ? errno : status;
@@ -134,6 +137,7 @@ public:

        success = (status == 0) ? 1 : 0;

+        RELEASE_ASSERT(status == 0 || !waitflag, "could not lock mutex! error %d", status);
        return boxBool(status == 0);
    }


--- a/src/runtime/cxx_unwind.cpp
+++ b/src/runtime/cxx_unwind.cpp
--- a/src/runtime/descr.cpp
+++ b/src/runtime/descr.cpp
@@ -42,7 +42,7 @@ static void propertyDocCopy(BoxedProperty* prop, Box* fget) {
        get_doc = getattrInternal(fget, "__doc__", NULL);
    } catch (ExcInfo e) {
        if (!e.matches(Exception)) {
-            throw;
+            throw e;
        }
        get_doc = NULL;
    }

--- a/src/runtime/ics.cpp
+++ b/src/runtime/ics.cpp
@@ -20,6 +20,7 @@
 #include "codegen/memmgr.h"
 #include "codegen/patchpoints.h"
 #include "codegen/stackmaps.h"
+#include "codegen/unwinding.h" // registerDynamicEhFrame
 #include "core/common.h"
 #include "core/options.h"
 #include "core/stats.h"
@@ -142,10 +143,12 @@ static const char _eh_frame_template[] =
    "\x00\x00\x00\x00" // terminator
    ;
 #endif
-#define EH_FRAME_SIZE sizeof(_eh_frame_template)
+#define EH_FRAME_SIZE (sizeof(_eh_frame_template) - 1) // omit string-terminating null byte
+
+static_assert(sizeof("") == 1, "strings are null-terminated");

 static void writeTrivialEhFrame(void* eh_frame_addr, void* func_addr, uint64_t func_size) {
-    memcpy(eh_frame_addr, _eh_frame_template, sizeof(_eh_frame_template));
+    memcpy(eh_frame_addr, _eh_frame_template, EH_FRAME_SIZE);

    int32_t* offset_ptr = (int32_t*)((uint8_t*)eh_frame_addr + 0x20);
    int32_t* size_ptr = (int32_t*)((uint8_t*)eh_frame_addr + 0x24);
@@ -162,6 +165,9 @@ void EHFrameManager::writeAndRegister(void* func_addr, uint64_t func_size) {
    assert(eh_frame_addr == NULL);
    eh_frame_addr = malloc(EH_FRAME_SIZE);
    writeTrivialEhFrame(eh_frame_addr, func_addr, func_size);
+    // (EH_FRAME_SIZE - 4) to omit the 4-byte null terminator, otherwise we trip an assert in parseEhFrame.
+    // TODO: can we omit the terminator in general?
+    registerDynamicEhFrame((uint64_t)func_addr, func_size, (uint64_t)eh_frame_addr, EH_FRAME_SIZE - 4);
    registerEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, EH_FRAME_SIZE);
 }


--- a/src/runtime/objmodel.cpp
+++ b/src/runtime/objmodel.cpp
@@ -4968,7 +4968,7 @@ extern "C" Box* boxedLocalsGet(Box* boxedLocals, const char* attr, Box* globals)
            // If it throws a KeyError, then the variable doesn't exist so move on
            // and check the globals (below); otherwise, just propogate the exception.
            if (!isSubclass(e.value->cls, KeyError)) {
-                throw;
+                throw e;
            }
        }
    }

--- a/src/runtime/stacktrace.cpp
+++ b/src/runtime/stacktrace.cpp
@@ -45,59 +45,10 @@ void showBacktrace() {
    }
 }

-// Currently-unused libunwind-based unwinding:
-void unwindExc(Box* exc_obj) __attribute__((noreturn));
-void unwindExc(Box* exc_obj) {
-    unw_cursor_t cursor;
-    unw_context_t uc;
-    unw_word_t ip, sp;
-
-    unw_getcontext(&uc);
-    unw_init_local(&cursor, &uc);
-
-    int code;
-    unw_proc_info_t pip;
-
-    while (unw_step(&cursor) > 0) {
-        unw_get_reg(&cursor, UNW_REG_IP, &ip);
-        unw_get_reg(&cursor, UNW_REG_SP, &sp);
-        printf("ip = %lx, sp = %lx\n", (long)ip, (long)sp);
-
-        code = unw_get_proc_info(&cursor, &pip);
-        RELEASE_ASSERT(code == 0, "");
-
-        // printf("%lx %lx %lx %lx %lx %lx %d %d %p\n", pip.start_ip, pip.end_ip, pip.lsda, pip.handler, pip.gp,
-        // pip.flags, pip.format, pip.unwind_info_size, pip.unwind_info);
-
-        assert((pip.lsda == 0) == (pip.handler == 0));
-        assert(pip.flags == 0);
-
-        if (pip.handler == 0) {
-            if (VERBOSITY())
-                printf("Skipping frame without handler\n");
-
-            continue;
-        }
-
-        printf("%lx %lx %lx\n", pip.lsda, pip.handler, pip.flags);
-        // assert(pip.handler == (uintptr_t)__gxx_personality_v0 || pip.handler == (uintptr_t)__py_personality_v0);
-
-        // auto handler_fn = (int (*)(int, int, uint64_t, void*, void*))pip.handler;
-        ////handler_fn(1, 1 /* _UA_SEARCH_PHASE */, 0 /* exc_class */, NULL, NULL);
-        // handler_fn(2, 2 /* _UA_SEARCH_PHASE */, 0 /* exc_class */, NULL, NULL);
-        unw_set_reg(&cursor, UNW_REG_IP, 1);
-
-        // TODO testing:
-        // unw_resume(&cursor);
-    }
-
-    abort();
-}
-
 void raiseRaw(const ExcInfo& e) __attribute__((__noreturn__));
 void raiseRaw(const ExcInfo& e) {
    STAT_TIMER(t0, "us_timer_raiseraw");
-    // Should set these to None before getting here:
+    // Should set these to None rather than null before getting here:
    assert(e.type);
    assert(e.value);
    assert(e.traceback);
@@ -105,11 +56,7 @@ void raiseRaw(const ExcInfo& e) {
    assert(gc::isValidGCObject(e.value));
    assert(gc::isValidGCObject(e.traceback));

-    // Using libgcc:
    throw e;
-
-    // Using libunwind
-    // unwindExc(exc_obj);
 }

 void raiseExc(Box* exc_obj) {

--- a/test/tests/dash_c.py
+++ b/test/tests/dash_c.py
@@ -7,7 +7,10 @@ with open('/dev/null')as ignore:
    # We don't (yet?) require exact stderr or return code compatibility w/
    # python. So we just check that we succeed or fail as appropriate.
    def run(args):
-        print subprocess.call([me] + args, stderr=ignore)
+        code = 0 == subprocess.call([me] + args, stderr=ignore)
+        sys.stdout.flush()
+        print code
+        sys.stdout.flush()

    run(["-c", "print 2 + 2"])
    run(["-c", "import sys; print sys.argv", "hello", "world"])

--- a/test/tests/raise_2arg.py
+++ b/test/tests/raise_2arg.py
+# two-argument `raise' statements where second argument is itself an exception
+class A(Exception): pass
+class B(Exception): pass
+
+def f():
+    try: raise A, B(2)
+    except A as e:
+        print 'A', e
+    except B as e:
+        print 'B', e
+f()
--- a/tools/git_am_automated.py
+++ b/tools/git_am_automated.py
+import os, os.path
+import subprocess
+import sys
+
+def main():
+    repo = sys.argv[1]
+    patches = sys.argv[2:]
+    gitfile = os.path.join(repo, '.git')
+
+    assert os.path.isdir(repo), "Expected to find repo at %s" % (repo,)
+    assert os.path.exists(gitfile), "Expected %s to exist" % (gitfile,)
+    for fn in patches:
+        assert os.path.exists(fn), "Expected a patch file/dir at %s" % (fn,)
+
+    os.chdir(repo)
+    code = subprocess.call(["git", "am", "--"] + patches)
+    if not code:
+        sys.exit(0)
+
+    # git am errored. recover by unconditionally aborting.
+    print >>sys.stderr, "----- Running `git am --abort' -----"
+    subprocess.check_call(["git", "am", "--abort"])
+    sys.exit(1)
+
+if __name__ == '__main__':
+    main()