Commit 7fd78399 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Try to encourage the AGRP fast-path to get inlined

Put the common-case (where we don't do any work) in an
inline-able function, and keep the slow stuff hidden.
parent 544d93c7
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
.text._ZN6pyston15objectNewNoArgsEPNS_10BoxedClassE .text._ZN6pyston15objectNewNoArgsEPNS_10BoxedClassE
.text._PyIndex_Check .text._PyIndex_Check
.text._ZN6pyston9threading21allowGLReadPreemptionEv .text._ZN6pyston9threading21allowGLReadPreemptionEv
.text._ZN6pyston9threading22_allowGLReadPreemptionEv
.text._ZN6pyston9getOpNameEi .text._ZN6pyston9getOpNameEi
.text._ZN6pyston8callFuncEPNS_17BoxedFunctionBaseEPNS_15CallRewriteArgsENS_11ArgPassSpecEPNS_3BoxES6_S6_PS6_PKSt6vectorIPKSsSaISA_EE .text._ZN6pyston8callFuncEPNS_17BoxedFunctionBaseEPNS_15CallRewriteArgsENS_11ArgPassSpecEPNS_3BoxES6_S6_PS6_PKSt6vectorIPKSsSaISA_EE
.text._ZN6pyston2gc9GCVisitor5visitEPv .text._ZN6pyston2gc9GCVisitor5visitEPv
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include "core/threading.h" #include "core/threading.h"
#include <atomic>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <err.h> #include <err.h>
...@@ -481,7 +480,7 @@ extern "C" void endAllowThreads() noexcept { ...@@ -481,7 +480,7 @@ extern "C" void endAllowThreads() noexcept {
static pthread_mutex_t gil = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t gil = PTHREAD_MUTEX_INITIALIZER;
static std::atomic<int> threads_waiting_on_gil(0); std::atomic<int> threads_waiting_on_gil(0);
static pthread_cond_t gil_acquired = PTHREAD_COND_INITIALIZER; static pthread_cond_t gil_acquired = PTHREAD_COND_INITIALIZER;
extern "C" void PyEval_ReInitThreads() noexcept { extern "C" void PyEval_ReInitThreads() noexcept {
...@@ -524,9 +523,6 @@ void releaseGLWrite() { ...@@ -524,9 +523,6 @@ void releaseGLWrite() {
pthread_mutex_unlock(&gil); pthread_mutex_unlock(&gil);
} }
#define GIL_CHECK_INTERVAL 1000
// Note: this doesn't need to be an atomic, since it should
// only be accessed by the thread that holds the gil:
int gil_check_count = 0; int gil_check_count = 0;
// TODO: this function is fair in that it forces a thread to give up the GIL // TODO: this function is fair in that it forces a thread to give up the GIL
...@@ -535,37 +531,19 @@ int gil_check_count = 0; ...@@ -535,37 +531,19 @@ int gil_check_count = 0;
// switching back and forth, and a third that never gets run. // switching back and forth, and a third that never gets run.
// We could enforce fairness by having a FIFO of events (implementd with mutexes?) // We could enforce fairness by having a FIFO of events (implementd with mutexes?)
// and make sure to always wake up the longest-waiting one. // and make sure to always wake up the longest-waiting one.
void allowGLReadPreemption() { void _allowGLReadPreemption() {
#if ENABLE_SAMPLING_PROFILER assert(gil_check_count >= GIL_CHECK_INTERVAL);
if (unlikely(sigprof_pending)) { gil_check_count = 0;
// Output multiple stacktraces if we received multiple signals
// between being able to handle it (such as being in LLVM or the GC),
// to try to fully account for that time.
while (sigprof_pending) {
_printStacktrace();
sigprof_pending--;
}
}
#endif
// Double-checked locking: first read with no ordering constraint: // Double check this, since if we are wrong about there being a thread waiting on the gil,
if (!threads_waiting_on_gil.load(std::memory_order_relaxed)) // we're going to get stuck in the following pthread_cond_wait:
if (!threads_waiting_on_gil.load(std::memory_order_seq_cst))
return; return;
gil_check_count++; threads_waiting_on_gil++;
if (gil_check_count >= GIL_CHECK_INTERVAL) { pthread_cond_wait(&gil_acquired, &gil);
gil_check_count = 0; threads_waiting_on_gil--;
pthread_cond_signal(&gil_acquired);
// Double check this, since if we are wrong about there being a thread waiting on the gil,
// we're going to get stuck in the following pthread_cond_wait:
if (!threads_waiting_on_gil.load(std::memory_order_seq_cst))
return;
threads_waiting_on_gil++;
pthread_cond_wait(&gil_acquired, &gil);
threads_waiting_on_gil--;
pthread_cond_signal(&gil_acquired);
}
} }
#elif THREADING_USE_GRWL #elif THREADING_USE_GRWL
static pthread_rwlock_t grwl = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP; static pthread_rwlock_t grwl = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#ifndef PYSTON_CORE_THREADING_H #ifndef PYSTON_CORE_THREADING_H
#define PYSTON_CORE_THREADING_H #define PYSTON_CORE_THREADING_H
#include <atomic>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <ucontext.h> #include <ucontext.h>
...@@ -81,7 +82,36 @@ void acquireGLRead(); ...@@ -81,7 +82,36 @@ void acquireGLRead();
void releaseGLRead(); void releaseGLRead();
void acquireGLWrite(); void acquireGLWrite();
void releaseGLWrite(); void releaseGLWrite();
void allowGLReadPreemption(); void _allowGLReadPreemption();
#define GIL_CHECK_INTERVAL 1000
// Note: this doesn't need to be an atomic, since it should
// only be accessed by the thread that holds the gil:
extern int gil_check_count;
extern std::atomic<int> threads_waiting_on_gil;
inline void allowGLReadPreemption() {
#if ENABLE_SAMPLING_PROFILER
if (unlikely(sigprof_pending)) {
// Output multiple stacktraces if we received multiple signals
// between being able to handle it (such as being in LLVM or the GC),
// to try to fully account for that time.
while (sigprof_pending) {
_printStacktrace();
sigprof_pending--;
}
}
#endif
// Double-checked locking: first read with no ordering constraint:
if (!threads_waiting_on_gil.load(std::memory_order_relaxed))
return;
gil_check_count++;
if (likely(gil_check_count < GIL_CHECK_INTERVAL))
return;
_allowGLReadPreemption();
}
// Note: promoteGL is free to drop the lock and then reacquire // Note: promoteGL is free to drop the lock and then reacquire
void promoteGL(); void promoteGL();
void demoteGL(); void demoteGL();
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "codegen/irgen/hooks.h" #include "codegen/irgen/hooks.h"
#include "core/ast.h" #include "core/ast.h"
#include "core/threading.h"
#include "core/types.h" #include "core/types.h"
#include "gc/heap.h" #include "gc/heap.h"
#include "runtime/complex.h" #include "runtime/complex.h"
...@@ -138,6 +139,8 @@ void force() { ...@@ -138,6 +139,8 @@ void force() {
FORCE(boxedLocalsGet); FORCE(boxedLocalsGet);
FORCE(boxedLocalsDel); FORCE(boxedLocalsDel);
FORCE(threading::allowGLReadPreemption);
// FORCE(listIter); // FORCE(listIter);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment