bpo-33608: Factor out a private, per-interpreter _Py_AddPendingCall(). (gh-12360)

This is effectively an un-revert of #11617 and #12024 (reverted in #12159). Portions of those were merged in other PRs (with lower risk) and this represents the remainder. Note that I found 3 different bugs in the original PRs and have fixed them here.

bpo-33608: Factor out a private, per-interpreter _Py_AddPendingCall(). (gh-12360)
This is effectively an un-revert of #11617 and #12024 (reverted in #12159). Portions of those were merged in other PRs (with lower risk) and this represents the remainder. Note that I found 3 different bugs in the original PRs and have fixed them here.
f13c5c8b · Eric Snow · GitHub · 44235041 · f13c5c8b · f13c5c8b
Commit f13c5c8b authored Apr 12, 2019 by Eric Snow Committed by GitHub Apr 12, 2019
11 changed files
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -221,7 +221,7 @@ PyAPI_FUNC(Py_ssize_t) _PyEval_RequestCodeExtraIndex(freefunc);
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *);
 PyAPI_FUNC(int) _PyEval_SliceIndexNotNone(PyObject *, Py_ssize_t *);
-PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void);
+PyAPI_FUNC(void) _PyEval_SignalAsyncExc(PyInterpreterState *);
 #endif

 /* Masks and values used by FORMAT_VALUE opcode. */

--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -11,7 +11,11 @@ extern "C" {
 #include "pycore_atomic.h"
 #include "pythread.h"

-PyAPI_FUNC(void) _Py_FinishPendingCalls(void);
+struct _is;  // See PyInterpreterState in cpython/pystate.h.
+
+PyAPI_FUNC(int) _Py_AddPendingCall(struct _is*, unsigned long, int (*)(void *), void *);
+PyAPI_FUNC(int) _Py_MakePendingCalls(struct _is*);
+PyAPI_FUNC(void) _Py_FinishPendingCalls(struct _is*);

 struct _pending_calls {
    int finishing;
@@ -24,6 +28,7 @@ struct _pending_calls {
    int async_exc;
 #define NPENDINGCALLS 32
    struct {
+        unsigned long thread_id;
        int (*func)(void *);
        void *arg;
    } calls[NPENDINGCALLS];
@@ -31,6 +36,13 @@ struct _pending_calls {
    int last;
 };

+struct _ceval_interpreter_state {
+    /* This single variable consolidates all requests to break out of
+       the fast path in the eval loop. */
+    _Py_atomic_int eval_breaker;
+    struct _pending_calls pending;
+};
+
 #include "pycore_gil.h"

 struct _ceval_runtime_state {
@@ -41,12 +53,8 @@ struct _ceval_runtime_state {
       c_tracefunc.  This speeds up the if statement in
       PyEval_EvalFrameEx() after fast_next_opcode. */
    int tracing_possible;
-    /* This single variable consolidates all requests to break out of
-       the fast path in the eval loop. */
-    _Py_atomic_int eval_breaker;
    /* Request for dropping the GIL */
    _Py_atomic_int gil_drop_request;
-    struct _pending_calls pending;
    /* Request for checking signals. */
    _Py_atomic_int signals_pending;
    struct _gil_runtime_state gil;

--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -12,6 +12,7 @@ extern "C" {
 #include "pystate.h"
 #include "pythread.h"

+#include "pycore_atomic.h"
 #include "pycore_ceval.h"
 #include "pycore_pathconfig.h"
 #include "pycore_pymem.h"
@@ -83,6 +84,8 @@ struct _is {
    PyObject *pyexitmodule;

    uint64_t tstate_next_unique_id;
+
+    struct _ceval_interpreter_state ceval;
 };

 PyAPI_FUNC(struct _is*) _PyInterpreterState_LookUpID(PY_INT64_T);

--- a/Lib/test/test_capi.py
+++ b/Lib/test/test_capi.py
@@ -373,7 +373,7 @@ class TestPendingCalls(unittest.TestCase):
    def test_pendingcalls_threaded(self):

        #do every callback on a separate thread
-        n = 32 #total callbacks
+        n = 32 #total callbacks (see NPENDINGCALLS in pycore_ceval.h)
        threads = []
        class foo(object):pass
        context = foo()

--- a/Misc/NEWS.d/next/Core and Builtins/2018-09-15-12-13-46.bpo-33608.avmvVP.rst
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-09-15-12-13-46.bpo-33608.avmvVP.rst
+We added a new internal _Py_AddPendingCall() that operates relative to the
+provided interpreter.  This allows us to use the existing implementation to
+ask another interpreter to do work that cannot be done in the current
+interpreter, like decref an object the other interpreter owns.  The existing
+Py_AddPendingCall() only operates relative to the main interpreter.
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -2445,6 +2445,7 @@ pending_threadfunc(PyObject *self, PyObject *arg)
    Py_INCREF(callable);

    Py_BEGIN_ALLOW_THREADS
+    /* XXX Use the internal _Py_AddPendingCall(). */
    r = Py_AddPendingCall(&_pending_callback, callable);
    Py_END_ALLOW_THREADS


--- a/Modules/signalmodule.c
+++ b/Modules/signalmodule.c
@@ -19,6 +19,7 @@
 #include <process.h>
 #endif
 #endif
+#include "internal/pycore_pystate.h"

 #ifdef HAVE_SIGNAL_H
 #include <signal.h>
@@ -295,8 +296,10 @@ trip_signal(int sig_num)
                {
                    /* Py_AddPendingCall() isn't signal-safe, but we
                       still use it for this exceptional case. */
-                    Py_AddPendingCall(report_wakeup_send_error,
-                                      (void *)(intptr_t) last_error);
+                    _Py_AddPendingCall(_PyRuntime.interpreters.main,
+                                       main_thread,
+                                       report_wakeup_send_error,
+                                       (void *)(intptr_t) last_error);
                }
            }
        }
@@ -313,8 +316,10 @@ trip_signal(int sig_num)
                {
                    /* Py_AddPendingCall() isn't signal-safe, but we
                       still use it for this exceptional case. */
-                    Py_AddPendingCall(report_wakeup_write_error,
-                                      (void *)(intptr_t)errno);
+                    _Py_AddPendingCall(_PyRuntime.interpreters.main,
+                                       main_thread,
+                                       report_wakeup_write_error,
+                                       (void *)(intptr_t)errno);
                }
            }
        }

--- a/Python/ceval.c
+++ b/Python/ceval.c
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -176,7 +176,7 @@ static void drop_gil(PyThreadState *tstate)
                    &_PyRuntime.ceval.gil.last_holder)
            ) == tstate)
        {
-        RESET_GIL_DROP_REQUEST();
+        RESET_GIL_DROP_REQUEST(tstate->interp);
            /* NOTE: if COND_WAIT does not atomically start waiting when
               releasing the mutex, another thread can run through, take
               the GIL and drop it again, and reset the condition
@@ -213,7 +213,7 @@ static void take_gil(PyThreadState *tstate)
        if (timed_out &&
            _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked) &&
            _PyRuntime.ceval.gil.switch_number == saved_switchnum) {
-            SET_GIL_DROP_REQUEST();
+            SET_GIL_DROP_REQUEST(tstate->interp);
        }
    }
 _ready:
@@ -239,10 +239,10 @@ _ready:
    MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
 #endif
    if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)) {
-        RESET_GIL_DROP_REQUEST();
+        RESET_GIL_DROP_REQUEST(tstate->interp);
    }
    if (tstate->async_exc != NULL) {
-        _PyEval_SignalAsyncExc();
+        _PyEval_SignalAsyncExc(tstate->interp);
    }

    MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);

--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1146,7 +1146,7 @@ Py_FinalizeEx(void)
    interp = tstate->interp;

    // Make any remaining pending calls.
-    _Py_FinishPendingCalls();
+    _Py_FinishPendingCalls(interp);

    /* The interpreter is still entirely intact at this point, and the
     * exit funcs may be relying on that.  In particular, if some thread
@@ -1552,6 +1552,9 @@ Py_EndInterpreter(PyThreadState *tstate)
    // Wrap up existing "threading"-module-created, non-daemon threads.
    wait_for_thread_shutdown();

+    // Make any remaining pending calls.
+    _Py_FinishPendingCalls(interp);
+
    call_py_exitfuncs(interp);

    if (tstate != interp->tstate_head || tstate->next != NULL)

--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -173,6 +173,14 @@ PyInterpreterState_New(void)
    memset(interp, 0, sizeof(*interp));
    interp->id_refcount = -1;
    interp->check_interval = 100;
+
+    interp->ceval.pending.lock = PyThread_allocate_lock();
+    if (interp->ceval.pending.lock == NULL) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "failed to create interpreter ceval pending mutex");
+        return NULL;
+    }
+
    interp->core_config = _PyCoreConfig_INIT;
    interp->eval_frame = _PyEval_EvalFrameDefault;
 #ifdef HAVE_DLOPEN
@@ -279,6 +287,9 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
    if (interp->id_mutex != NULL) {
        PyThread_free_lock(interp->id_mutex);
    }
+    if (interp->ceval.pending.lock != NULL) {
+        PyThread_free_lock(interp->ceval.pending.lock);
+    }
    PyMem_RawFree(interp);
 }

@@ -928,7 +939,7 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
            p->async_exc = exc;
            HEAD_UNLOCK();
            Py_XDECREF(old_exc);
-            _PyEval_SignalAsyncExc();
+            _PyEval_SignalAsyncExc(interp);
            return 1;
        }
    }
@@ -1342,7 +1353,7 @@ _PyObject_GetCrossInterpreterData(PyObject *obj, _PyCrossInterpreterData *data)
    return 0;
 }

-static void
+static int
 _release_xidata(void *arg)
 {
    _PyCrossInterpreterData *data = (_PyCrossInterpreterData *)arg;
@@ -1350,30 +1361,8 @@ _release_xidata(void *arg)
        data->free(data->data);
    }
    Py_XDECREF(data->obj);
-}
-
-static void
-_call_in_interpreter(PyInterpreterState *interp,
-                     void (*func)(void *), void *arg)
-{
-    /* We would use Py_AddPendingCall() if it weren't specific to the
-     * main interpreter (see bpo-33608).  In the meantime we take a
-     * naive approach.
-     */
-    PyThreadState *save_tstate = NULL;
-    if (interp != _PyInterpreterState_Get()) {
-        // XXX Using the "head" thread isn't strictly correct.
-        PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
-        // XXX Possible GILState issues?
-        save_tstate = PyThreadState_Swap(tstate);
-    }
-
-    func(arg);
-
-    // Switch back.
-    if (save_tstate != NULL) {
-        PyThreadState_Swap(save_tstate);
-    }
+    PyMem_Free(data);
+    return 0;
 }

 void
@@ -1384,7 +1373,7 @@ _PyCrossInterpreterData_Release(_PyCrossInterpreterData *data)
        return;
    }

-    // Switch to the original interpreter.
+    // Get the original interpreter.
    PyInterpreterState *interp = _PyInterpreterState_LookUpID(data->interp);
    if (interp == NULL) {
        // The intepreter was already destroyed.
@@ -1393,9 +1382,24 @@ _PyCrossInterpreterData_Release(_PyCrossInterpreterData *data)
        }
        return;
    }
+    // XXX There's an ever-so-slight race here...
+    if (interp->finalizing) {
+        // XXX Someone leaked some memory...
+        return;
+    }

    // "Release" the data and/or the object.
-    _call_in_interpreter(interp, _release_xidata, data);
+    _PyCrossInterpreterData *copied = PyMem_Malloc(sizeof(_PyCrossInterpreterData));
+    if (copied == NULL) {
+        PyErr_SetString(PyExc_MemoryError,
+                        "Not enough memory to preserve cross-interpreter data");
+        PyErr_Print();
+        return;
+    }
+    memcpy(copied, data, sizeof(_PyCrossInterpreterData));
+    if (_Py_AddPendingCall(interp, 0, _release_xidata, copied) != 0) {
+        // XXX Queue full or couldn't get lock.  Try again somehow?
+    }
 }

 PyObject *