Commit de36c6be authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #676 from kmod/perf

An assortment of misc small perf changes
parents 2e9a9e35 ac3dedc2
...@@ -108,11 +108,7 @@ else ...@@ -108,11 +108,7 @@ else
LLVM_BUILD := $(LLVM_TRUNK_BUILD) LLVM_BUILD := $(LLVM_TRUNK_BUILD)
endif endif
ifeq ($(FORCE_TRUNK_BINARIES),1) LLVM_BIN := ./build/Release/llvm/bin
LLVM_BIN := $(LLVM_TRUNK_BUILD)/Release/bin
else
LLVM_BIN := $(LLVM_BUILD)/Release/bin
endif
LLVM_LINK_LIBS := core mcjit native bitreader bitwriter ipo irreader debuginfodwarf instrumentation LLVM_LINK_LIBS := core mcjit native bitreader bitwriter ipo irreader debuginfodwarf instrumentation
ifneq ($(ENABLE_INTEL_JIT_EVENTS),0) ifneq ($(ENABLE_INTEL_JIT_EVENTS),0)
......
class C(object):
pass
def f():
g = getattr
c = C()
c.o = 1
for i in xrange(10000000):
g(c, "o")
f()
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
.text._ZN6pyston15objectNewNoArgsEPNS_10BoxedClassE .text._ZN6pyston15objectNewNoArgsEPNS_10BoxedClassE
.text._PyIndex_Check .text._PyIndex_Check
.text._ZN6pyston9threading21allowGLReadPreemptionEv .text._ZN6pyston9threading21allowGLReadPreemptionEv
.text._ZN6pyston9threading22_allowGLReadPreemptionEv
.text._ZN6pyston9getOpNameEi .text._ZN6pyston9getOpNameEi
.text._ZN6pyston8callFuncEPNS_17BoxedFunctionBaseEPNS_15CallRewriteArgsENS_11ArgPassSpecEPNS_3BoxES6_S6_PS6_PKSt6vectorIPKSsSaISA_EE .text._ZN6pyston8callFuncEPNS_17BoxedFunctionBaseEPNS_15CallRewriteArgsENS_11ArgPassSpecEPNS_3BoxES6_S6_PS6_PKSt6vectorIPKSsSaISA_EE
.text._ZN6pyston2gc9GCVisitor5visitEPv .text._ZN6pyston2gc9GCVisitor5visitEPv
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#ifndef PYSTON_ANALYSIS_SCOPINGANALYSIS_H #ifndef PYSTON_ANALYSIS_SCOPINGANALYSIS_H
#define PYSTON_ANALYSIS_SCOPINGANALYSIS_H #define PYSTON_ANALYSIS_SCOPINGANALYSIS_H
#include "llvm/ADT/DenseMap.h"
#include "core/common.h" #include "core/common.h"
#include "core/stringpool.h" #include "core/stringpool.h"
...@@ -146,14 +148,14 @@ public: ...@@ -146,14 +148,14 @@ public:
class ScopingAnalysis { class ScopingAnalysis {
public: public:
struct ScopeNameUsage; struct ScopeNameUsage;
typedef std::unordered_map<AST*, ScopeNameUsage*> NameUsageMap; typedef llvm::DenseMap<AST*, ScopeNameUsage*> NameUsageMap;
private: private:
std::unordered_map<AST*, ScopeInfo*> scopes; llvm::DenseMap<AST*, ScopeInfo*> scopes;
AST_Module* parent_module; AST_Module* parent_module;
InternedStringPool* interned_strings; InternedStringPool* interned_strings;
std::unordered_map<AST*, AST*> scope_replacements; llvm::DenseMap<AST*, AST*> scope_replacements;
ScopeInfo* analyzeSubtree(AST* node); ScopeInfo* analyzeSubtree(AST* node);
void processNameUsages(NameUsageMap* usages); void processNameUsages(NameUsageMap* usages);
......
...@@ -186,8 +186,9 @@ void Rewriter::ConstLoader::moveImmediate(uint64_t val, assembler::Register dst_ ...@@ -186,8 +186,9 @@ void Rewriter::ConstLoader::moveImmediate(uint64_t val, assembler::Register dst_
assembler::Register Rewriter::ConstLoader::findConst(uint64_t val, bool& found_value) { assembler::Register Rewriter::ConstLoader::findConst(uint64_t val, bool& found_value) {
assert(rewriter->phase_emitting); assert(rewriter->phase_emitting);
if (constToVar.count(val) > 0) { auto it = constToVar.find(val);
RewriterVar* var = constToVar[val]; if (it != constToVar.end()) {
RewriterVar* var = it->second;
for (Location l : var->locations) { for (Location l : var->locations) {
if (l.type == Location::Register) { if (l.type == Location::Register) {
found_value = true; found_value = true;
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include "core/threading.h" #include "core/threading.h"
#include <atomic>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <err.h> #include <err.h>
...@@ -481,7 +480,7 @@ extern "C" void endAllowThreads() noexcept { ...@@ -481,7 +480,7 @@ extern "C" void endAllowThreads() noexcept {
static pthread_mutex_t gil = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t gil = PTHREAD_MUTEX_INITIALIZER;
static std::atomic<int> threads_waiting_on_gil(0); std::atomic<int> threads_waiting_on_gil(0);
static pthread_cond_t gil_acquired = PTHREAD_COND_INITIALIZER; static pthread_cond_t gil_acquired = PTHREAD_COND_INITIALIZER;
extern "C" void PyEval_ReInitThreads() noexcept { extern "C" void PyEval_ReInitThreads() noexcept {
...@@ -524,9 +523,6 @@ void releaseGLWrite() { ...@@ -524,9 +523,6 @@ void releaseGLWrite() {
pthread_mutex_unlock(&gil); pthread_mutex_unlock(&gil);
} }
#define GIL_CHECK_INTERVAL 1000
// Note: this doesn't need to be an atomic, since it should
// only be accessed by the thread that holds the gil:
int gil_check_count = 0; int gil_check_count = 0;
// TODO: this function is fair in that it forces a thread to give up the GIL // TODO: this function is fair in that it forces a thread to give up the GIL
...@@ -535,37 +531,19 @@ int gil_check_count = 0; ...@@ -535,37 +531,19 @@ int gil_check_count = 0;
// switching back and forth, and a third that never gets run. // switching back and forth, and a third that never gets run.
// We could enforce fairness by having a FIFO of events (implementd with mutexes?) // We could enforce fairness by having a FIFO of events (implementd with mutexes?)
// and make sure to always wake up the longest-waiting one. // and make sure to always wake up the longest-waiting one.
void allowGLReadPreemption() { void _allowGLReadPreemption() {
#if ENABLE_SAMPLING_PROFILER assert(gil_check_count >= GIL_CHECK_INTERVAL);
if (unlikely(sigprof_pending)) { gil_check_count = 0;
// Output multiple stacktraces if we received multiple signals
// between being able to handle it (such as being in LLVM or the GC),
// to try to fully account for that time.
while (sigprof_pending) {
_printStacktrace();
sigprof_pending--;
}
}
#endif
// Double-checked locking: first read with no ordering constraint: // Double check this, since if we are wrong about there being a thread waiting on the gil,
if (!threads_waiting_on_gil.load(std::memory_order_relaxed)) // we're going to get stuck in the following pthread_cond_wait:
if (!threads_waiting_on_gil.load(std::memory_order_seq_cst))
return; return;
gil_check_count++; threads_waiting_on_gil++;
if (gil_check_count >= GIL_CHECK_INTERVAL) { pthread_cond_wait(&gil_acquired, &gil);
gil_check_count = 0; threads_waiting_on_gil--;
pthread_cond_signal(&gil_acquired);
// Double check this, since if we are wrong about there being a thread waiting on the gil,
// we're going to get stuck in the following pthread_cond_wait:
if (!threads_waiting_on_gil.load(std::memory_order_seq_cst))
return;
threads_waiting_on_gil++;
pthread_cond_wait(&gil_acquired, &gil);
threads_waiting_on_gil--;
pthread_cond_signal(&gil_acquired);
}
} }
#elif THREADING_USE_GRWL #elif THREADING_USE_GRWL
static pthread_rwlock_t grwl = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP; static pthread_rwlock_t grwl = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#ifndef PYSTON_CORE_THREADING_H #ifndef PYSTON_CORE_THREADING_H
#define PYSTON_CORE_THREADING_H #define PYSTON_CORE_THREADING_H
#include <atomic>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <ucontext.h> #include <ucontext.h>
...@@ -81,7 +82,36 @@ void acquireGLRead(); ...@@ -81,7 +82,36 @@ void acquireGLRead();
void releaseGLRead(); void releaseGLRead();
void acquireGLWrite(); void acquireGLWrite();
void releaseGLWrite(); void releaseGLWrite();
void allowGLReadPreemption(); void _allowGLReadPreemption();
#define GIL_CHECK_INTERVAL 1000
// Note: this doesn't need to be an atomic, since it should
// only be accessed by the thread that holds the gil:
extern int gil_check_count;
extern std::atomic<int> threads_waiting_on_gil;
inline void allowGLReadPreemption() {
#if ENABLE_SAMPLING_PROFILER
if (unlikely(sigprof_pending)) {
// Output multiple stacktraces if we received multiple signals
// between being able to handle it (such as being in LLVM or the GC),
// to try to fully account for that time.
while (sigprof_pending) {
_printStacktrace();
sigprof_pending--;
}
}
#endif
// Double-checked locking: first read with no ordering constraint:
if (!threads_waiting_on_gil.load(std::memory_order_relaxed))
return;
gil_check_count++;
if (likely(gil_check_count < GIL_CHECK_INTERVAL))
return;
_allowGLReadPreemption();
}
// Note: promoteGL is free to drop the lock and then reacquire // Note: promoteGL is free to drop the lock and then reacquire
void promoteGL(); void promoteGL();
void demoteGL(); void demoteGL();
......
...@@ -393,9 +393,11 @@ Box* BoxedWrapperDescriptor::descr_get(Box* _self, Box* inst, Box* owner) noexce ...@@ -393,9 +393,11 @@ Box* BoxedWrapperDescriptor::descr_get(Box* _self, Box* inst, Box* owner) noexce
if (inst == None) if (inst == None)
return self; return self;
if (!isSubclass(inst->cls, self->type)) if (!isSubclass(inst->cls, self->type)) {
PyErr_Format(TypeError, "Descriptor '' for '%s' objects doesn't apply to '%s' object", PyErr_Format(TypeError, "Descriptor '' for '%s' objects doesn't apply to '%s' object",
getFullNameOfClass(self->type).c_str(), getFullTypeName(inst).c_str()); getFullNameOfClass(self->type).c_str(), getFullTypeName(inst).c_str());
return NULL;
}
return new BoxedWrapperObject(self, inst); return new BoxedWrapperObject(self, inst);
} }
......
...@@ -249,6 +249,8 @@ extern "C" PyObject* PyDict_GetItem(PyObject* dict, PyObject* key) noexcept { ...@@ -249,6 +249,8 @@ extern "C" PyObject* PyDict_GetItem(PyObject* dict, PyObject* key) noexcept {
return d->getOrNull(key); return d->getOrNull(key);
} }
// XXX this would be easy to make much faster.
// This path doesn't exist in CPython; we have it to support extension modules that do // This path doesn't exist in CPython; we have it to support extension modules that do
// something along the lines of PyDict_GetItem(PyModule_GetDict()): // something along the lines of PyDict_GetItem(PyModule_GetDict()):
try { try {
...@@ -304,6 +306,9 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb ...@@ -304,6 +306,9 @@ extern "C" int PyDict_Next(PyObject* op, Py_ssize_t* ppos, PyObject** pkey, PyOb
} }
extern "C" PyObject* PyDict_GetItemString(PyObject* dict, const char* key) noexcept { extern "C" PyObject* PyDict_GetItemString(PyObject* dict, const char* key) noexcept {
if (dict->cls == attrwrapper_cls)
return unwrapAttrWrapper(dict)->getattr(key);
Box* key_s; Box* key_s;
try { try {
key_s = boxString(key); key_s = boxString(key);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "codegen/irgen/hooks.h" #include "codegen/irgen/hooks.h"
#include "core/ast.h" #include "core/ast.h"
#include "core/threading.h"
#include "core/types.h" #include "core/types.h"
#include "gc/heap.h" #include "gc/heap.h"
#include "runtime/complex.h" #include "runtime/complex.h"
...@@ -138,6 +139,8 @@ void force() { ...@@ -138,6 +139,8 @@ void force() {
FORCE(boxedLocalsGet); FORCE(boxedLocalsGet);
FORCE(boxedLocalsDel); FORCE(boxedLocalsDel);
FORCE(threading::allowGLReadPreemption);
// FORCE(listIter); // FORCE(listIter);
} }
} }
......
...@@ -594,6 +594,10 @@ Box* listIAdd(BoxedList* self, Box* _rhs) { ...@@ -594,6 +594,10 @@ Box* listIAdd(BoxedList* self, Box* _rhs) {
int s1 = self->size; int s1 = self->size;
int s2 = rhs->size; int s2 = rhs->size;
if (s2 == 0)
return self;
self->ensure(s1 + s2); self->ensure(s1 + s2);
memcpy(self->elts->elts + s1, rhs->elts->elts, sizeof(rhs->elts->elts[0]) * s2); memcpy(self->elts->elts + s1, rhs->elts->elts, sizeof(rhs->elts->elts[0]) * s2);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment