Commit b9daab7a authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #677 from kmod/perf

More small improvements
parents de36c6be 180192b5
...@@ -3,7 +3,11 @@ ...@@ -3,7 +3,11 @@
.text.boxBool .text.boxBool
.text._ZN6pyston2gc10SmallArena6_allocEmi .text._ZN6pyston2gc10SmallArena6_allocEmi
.text._ZNSt17_Function_handlerIFvPN6pyston2gc10SmallArena16ThreadBlockCacheEEZNS2_12freeUnmarkedERSt6vectorIPNS0_3BoxESaIS8_EEE3$_0E9_M_invokeERKSt9_Any_dataS4_ .text._ZNSt17_Function_handlerIFvPN6pyston2gc10SmallArena16ThreadBlockCacheEEZNS2_12freeUnmarkedERSt6vectorIPNS0_3BoxESaIS8_EEE3$_0E9_M_invokeERKSt9_Any_dataS4_
.text._ZN6pyston2gc10SmallArena10_freeChainEPPNS1_5BlockERSt6vectorIPNS_3BoxESaIS7_EERS5_IPNS_10BoxedClassESaISC_EE
.text._ZN6pyston9getICInfoEPv
.text.memset .text.memset
.text.__memcmp_sse4_1
.text.__memcpy_sse2_unaligned
.text.gc_alloc .text.gc_alloc
.text.gc_realloc .text.gc_realloc
.text._ZN6pyston9BoxedList6ensureEi .text._ZN6pyston9BoxedList6ensureEi
...@@ -19,3 +23,13 @@ ...@@ -19,3 +23,13 @@
.text.intModInt .text.intModInt
.text.intAddInt .text.intAddInt
.text.intMulInt .text.intMulInt
.text._ZNK4llvm9StringRef4findES0_m
.text._ZNK4llvm13StringMapImpl7FindKeyENS_9StringRefE
.text._int_malloc
.text._int_free
.text.malloc
.text.free
.text._ULx86_64_dwarf_extract_proc_info_from_fde
.text.createTuple
.text._ZN6pyston18rearrangeArgumentsENS_16ParamReceiveSpecEPKNS_10ParamNamesEPKcPPNS_3BoxEPNS_15CallRewriteArgsERbNS_11ArgPassSpecES7_S7_S7_S8_PKSt6vectorIPNS_11BoxedStringESaISF_EERS7_SK_SK_S8_
.text.sre_match
...@@ -1006,7 +1006,7 @@ PyObject* slot_sq_item(PyObject* self, Py_ssize_t i) noexcept { ...@@ -1006,7 +1006,7 @@ PyObject* slot_sq_item(PyObject* self, Py_ssize_t i) noexcept {
} }
} }
static Py_ssize_t slot_sq_length(PyObject* self) noexcept { /* Pyston change: static */ Py_ssize_t slot_sq_length(PyObject* self) noexcept {
STAT_TIMER(t0, "us_timer_slot_sqlength", SLOT_AVOIDABILITY(self)); STAT_TIMER(t0, "us_timer_slot_sqlength", SLOT_AVOIDABILITY(self));
static PyObject* len_str; static PyObject* len_str;
...@@ -1078,7 +1078,7 @@ static int slot_sq_ass_slice(PyObject* self, Py_ssize_t i, Py_ssize_t j, PyObjec ...@@ -1078,7 +1078,7 @@ static int slot_sq_ass_slice(PyObject* self, Py_ssize_t i, Py_ssize_t j, PyObjec
return 0; return 0;
} }
static int slot_sq_contains(PyObject* self, PyObject* value) noexcept { /* Pyston change: static*/ int slot_sq_contains(PyObject* self, PyObject* value) noexcept {
STAT_TIMER(t0, "us_timer_slot_sqcontains", SLOT_AVOIDABILITY(self)); STAT_TIMER(t0, "us_timer_slot_sqcontains", SLOT_AVOIDABILITY(self));
PyObject* func, *res, *args; PyObject* func, *res, *args;
...@@ -1116,7 +1116,7 @@ static int slot_sq_contains(PyObject* self, PyObject* value) noexcept { ...@@ -1116,7 +1116,7 @@ static int slot_sq_contains(PyObject* self, PyObject* value) noexcept {
} }
#define SLOT1(FUNCNAME, OPSTR, ARG1TYPE, ARGCODES) \ #define SLOT1(FUNCNAME, OPSTR, ARG1TYPE, ARGCODES) \
static PyObject* FUNCNAME(PyObject* self, ARG1TYPE arg1) noexcept { \ /* Pyston change: static */ PyObject* FUNCNAME(PyObject* self, ARG1TYPE arg1) noexcept { \
static PyObject* cache_str; \ static PyObject* cache_str; \
return call_method(self, OPSTR, &cache_str, "(" ARGCODES ")", arg1); \ return call_method(self, OPSTR, &cache_str, "(" ARGCODES ")", arg1); \
} }
......
...@@ -37,6 +37,9 @@ int type_set_bases(PyTypeObject* type, PyObject* value, void* context) noexcept; ...@@ -37,6 +37,9 @@ int type_set_bases(PyTypeObject* type, PyObject* value, void* context) noexcept;
PyObject* slot_tp_richcompare(PyObject* self, PyObject* other, int op) noexcept; PyObject* slot_tp_richcompare(PyObject* self, PyObject* other, int op) noexcept;
PyObject* slot_tp_iternext(PyObject* self) noexcept; PyObject* slot_tp_iternext(PyObject* self) noexcept;
PyObject* slot_tp_new(PyTypeObject* self, PyObject* args, PyObject* kwds) noexcept; PyObject* slot_tp_new(PyTypeObject* self, PyObject* args, PyObject* kwds) noexcept;
PyObject* slot_mp_subscript(PyObject* self, PyObject* arg1) noexcept;
int slot_sq_contains(PyObject* self, PyObject* value) noexcept;
Py_ssize_t slot_sq_length(PyObject* self) noexcept;
} }
#endif #endif
...@@ -56,7 +56,7 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name) ...@@ -56,7 +56,7 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
// generate eh frame... // generate eh frame...
frame_manager.writeAndRegister(code.get(), code_size); frame_manager.writeAndRegister(code.get(), code_size);
g.func_addr_registry.registerFunction(("bjit: " + name).str(), code.get(), code_size, NULL); g.func_addr_registry.registerFunction(("bjit_" + name).str(), code.get(), code_size, NULL);
} }
std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, int patch_jump_offset) { std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, int patch_jump_offset) {
...@@ -176,6 +176,8 @@ RewriterVar* JitFragmentWriter::emitCallattr(RewriterVar* obj, BoxedString* attr ...@@ -176,6 +176,8 @@ RewriterVar* JitFragmentWriter::emitCallattr(RewriterVar* obj, BoxedString* attr
} }
RewriterVar* JitFragmentWriter::emitCompare(RewriterVar* lhs, RewriterVar* rhs, int op_type) { RewriterVar* JitFragmentWriter::emitCompare(RewriterVar* lhs, RewriterVar* rhs, int op_type) {
// TODO: can directly emit the assembly for Is/IsNot
#if ENABLE_BASELINEJIT_ICS #if ENABLE_BASELINEJIT_ICS
return call(false, (void*)compareICHelper, imm(new CompareIC), lhs, rhs, imm(op_type)); return call(false, (void*)compareICHelper, imm(new CompareIC), lhs, rhs, imm(op_type));
#else #else
......
...@@ -811,6 +811,20 @@ private: ...@@ -811,6 +811,20 @@ private:
assert(left); assert(left);
assert(right); assert(right);
if (node->ops[0] == AST_TYPE::Is || node->ops[0] == AST_TYPE::IsNot) {
// TODO: I think we can do better here and not force the types to box themselves
ConcreteCompilerVariable* converted_left = left->makeConverted(emitter, UNKNOWN);
ConcreteCompilerVariable* converted_right = right->makeConverted(emitter, UNKNOWN);
llvm::Value* cmp;
if (node->ops[0] == AST_TYPE::Is)
cmp = emitter.getBuilder()->CreateICmpEQ(converted_left->getValue(), converted_right->getValue());
else
cmp = emitter.getBuilder()->CreateICmpNE(converted_left->getValue(), converted_right->getValue());
return boolFromI1(emitter, cmp);
}
CompilerVariable* rtn = _evalBinExp(node, left, right, node->ops[0], Compare, unw_info); CompilerVariable* rtn = _evalBinExp(node, left, right, node->ops[0], Compare, unw_info);
left->decvref(emitter); left->decvref(emitter);
right->decvref(emitter); right->decvref(emitter);
......
...@@ -37,6 +37,12 @@ extern "C" inline Box* boxBool(bool b) { ...@@ -37,6 +37,12 @@ extern "C" inline Box* boxBool(bool b) {
return rtn; return rtn;
} }
extern "C" inline Box* boxBoolNegated(bool b) __attribute__((visibility("default")));
extern "C" inline Box* boxBoolNegated(bool b) {
Box* rtn = b ? False : True;
return rtn;
}
extern "C" inline bool unboxBool(Box* b) __attribute__((visibility("default"))); extern "C" inline bool unboxBool(Box* b) __attribute__((visibility("default")));
extern "C" inline bool unboxBool(Box* b) { extern "C" inline bool unboxBool(Box* b) {
assert(b->cls == bool_cls); assert(b->cls == bool_cls);
......
...@@ -57,6 +57,7 @@ void force() { ...@@ -57,6 +57,7 @@ void force() {
FORCE(unboxCLFunction); FORCE(unboxCLFunction);
FORCE(boxInstanceMethod); FORCE(boxInstanceMethod);
FORCE(boxBool); FORCE(boxBool);
FORCE(boxBoolNegated);
FORCE(unboxBool); FORCE(unboxBool);
FORCE(createTuple); FORCE(createTuple);
FORCE(createDict); FORCE(createDict);
......
...@@ -2254,6 +2254,16 @@ extern "C" bool nonzero(Box* obj) { ...@@ -2254,6 +2254,16 @@ extern "C" bool nonzero(Box* obj) {
rewriter->commitReturning(r_rtn); rewriter->commitReturning(r_rtn);
} }
return r; return r;
} else if (obj->cls == unicode_cls) {
PyUnicodeObject* unicode_obj = reinterpret_cast<PyUnicodeObject*>(obj);
bool r = (unicode_obj->length != 0);
if (rewriter.get()) {
RewriterVar* r_rtn
= r_obj->getAttr(offsetof(PyUnicodeObject, length))->toBool(rewriter->getReturnDestination());
rewriter->commitReturning(r_rtn);
}
return r;
} }
// TODO: rewrite these. // TODO: rewrite these.
...@@ -2412,6 +2422,35 @@ extern "C" BoxedInt* hash(Box* obj) { ...@@ -2412,6 +2422,35 @@ extern "C" BoxedInt* hash(Box* obj) {
extern "C" BoxedInt* lenInternal(Box* obj, LenRewriteArgs* rewrite_args) { extern "C" BoxedInt* lenInternal(Box* obj, LenRewriteArgs* rewrite_args) {
static BoxedString* len_str = static_cast<BoxedString*>(PyString_InternFromString("__len__")); static BoxedString* len_str = static_cast<BoxedString*>(PyString_InternFromString("__len__"));
// Corresponds to the first part of PyObject_Size:
PySequenceMethods* m = obj->cls->tp_as_sequence;
if (m != NULL && m->sq_length != NULL && m->sq_length != slot_sq_length) {
if (rewrite_args) {
RewriterVar* r_obj = rewrite_args->obj;
RewriterVar* r_cls = r_obj->getAttr(offsetof(Box, cls));
RewriterVar* r_m = r_cls->getAttr(offsetof(BoxedClass, tp_as_sequence));
r_m->addGuardNotEq(0);
// Currently, guard that the value of sq_length didn't change, and then
// emit a call to the current function address.
// It might be better to just load the current value of sq_length and call it
// (after guarding it's not null), or maybe not. But the rewriter doesn't currently
// support calling a RewriterVar (can only call fixed function addresses).
r_m->addAttrGuard(offsetof(PySequenceMethods, sq_length), (intptr_t)m->sq_length);
RewriterVar* r_n = rewrite_args->rewriter->call(true, (void*)m->sq_length, r_obj);
rewrite_args->rewriter->call(true, (void*)checkAndThrowCAPIException);
RewriterVar* r_r = rewrite_args->rewriter->call(false, (void*)boxInt, r_n);
rewrite_args->out_success = true;
rewrite_args->out_rtn = r_r;
}
int r = (*m->sq_length)(obj);
if (r == -1)
throwCAPIException();
return (BoxedInt*)boxInt(r);
}
Box* rtn; Box* rtn;
if (rewrite_args) { if (rewrite_args) {
CallRewriteArgs crewrite_args(rewrite_args->rewriter, rewrite_args->obj, rewrite_args->destination); CallRewriteArgs crewrite_args(rewrite_args->rewriter, rewrite_args->obj, rewrite_args->destination);
...@@ -4008,6 +4047,46 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit ...@@ -4008,6 +4047,46 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit
if (op_type == AST_TYPE::In || op_type == AST_TYPE::NotIn) { if (op_type == AST_TYPE::In || op_type == AST_TYPE::NotIn) {
static BoxedString* contains_str = static_cast<BoxedString*>(PyString_InternFromString("__contains__")); static BoxedString* contains_str = static_cast<BoxedString*>(PyString_InternFromString("__contains__"));
// The checks for this branch are taken from CPython's PySequence_Contains
if (PyType_HasFeature(rhs->cls, Py_TPFLAGS_HAVE_SEQUENCE_IN)) {
PySequenceMethods* sqm = rhs->cls->tp_as_sequence;
if (sqm != NULL && sqm->sq_contains != NULL && sqm->sq_contains != slot_sq_contains) {
if (rewrite_args) {
RewriterVar* r_lhs = rewrite_args->lhs;
RewriterVar* r_rhs = rewrite_args->rhs;
RewriterVar* r_cls = r_rhs->getAttr(offsetof(Box, cls));
RewriterVar* r_sqm = r_cls->getAttr(offsetof(BoxedClass, tp_as_sequence));
r_sqm->addGuardNotEq(0);
// We might need to guard on tp_flags if they can change?
// Currently, guard that the value of sq_contains didn't change, and then
// emit a call to the current function address.
// It might be better to just load the current value of sq_contains and call it
// (after guarding it's not null), or maybe not. But the rewriter doesn't currently
// support calling a RewriterVar (can only call fixed function addresses).
r_sqm->addAttrGuard(offsetof(PySequenceMethods, sq_contains), (intptr_t)sqm->sq_contains);
RewriterVar* r_b = rewrite_args->rewriter->call(true, (void*)sqm->sq_contains, r_rhs, r_lhs);
rewrite_args->rewriter->call(true, (void*)checkAndThrowCAPIException);
// This could be inlined:
RewriterVar* r_r;
if (op_type == AST_TYPE::NotIn)
r_r = rewrite_args->rewriter->call(false, (void*)boxBoolNegated, r_b);
else
r_r = rewrite_args->rewriter->call(false, (void*)boxBool, r_b);
rewrite_args->out_success = true;
rewrite_args->out_rtn = r_r;
}
int r = (*sqm->sq_contains)(rhs, lhs);
if (r == -1)
throwCAPIException();
if (op_type == AST_TYPE::NotIn)
r = !r;
return boxBool(r);
}
}
Box* contained; Box* contained;
RewriterVar* r_contained; RewriterVar* r_contained;
if (rewrite_args) { if (rewrite_args) {
...@@ -4267,6 +4346,39 @@ extern "C" Box* getitem(Box* value, Box* slice) { ...@@ -4267,6 +4346,39 @@ extern "C" Box* getitem(Box* value, Box* slice) {
std::unique_ptr<Rewriter> rewriter( std::unique_ptr<Rewriter> rewriter(
Rewriter::createRewriter(__builtin_extract_return_addr(__builtin_return_address(0)), 2, "getitem")); Rewriter::createRewriter(__builtin_extract_return_addr(__builtin_return_address(0)), 2, "getitem"));
// The PyObject_GetItem logic is:
// - call mp_subscript if it exists
// - if tp_as_sequence exists, try using that (with a number of conditions)
// - else throw an exception.
//
// For now, just use the first clause: call mp_subscript if it exists.
// And only if we think it's better than calling __getitem__, which should
// exist if mp_subscript exists.
PyMappingMethods* m = value->cls->tp_as_mapping;
if (m && m->mp_subscript && m->mp_subscript != slot_mp_subscript) {
if (rewriter.get()) {
RewriterVar* r_obj = rewriter->getArg(0);
RewriterVar* r_slice = rewriter->getArg(1);
RewriterVar* r_cls = r_obj->getAttr(offsetof(Box, cls));
RewriterVar* r_m = r_cls->getAttr(offsetof(BoxedClass, tp_as_mapping));
r_m->addGuardNotEq(0);
// Currently, guard that the value of mp_subscript didn't change, and then
// emit a call to the current function address.
// It might be better to just load the current value of mp_subscript and call it
// (after guarding it's not null), or maybe not. But the rewriter doesn't currently
// support calling a RewriterVar (can only call fixed function addresses).
r_m->addAttrGuard(offsetof(PyMappingMethods, mp_subscript), (intptr_t)m->mp_subscript);
RewriterVar* r_rtn = rewriter->call(true, (void*)m->mp_subscript, r_obj, r_slice);
rewriter->call(true, (void*)checkAndThrowCAPIException);
rewriter->commitReturning(r_rtn);
}
Box* r = m->mp_subscript(value, slice);
if (!r)
throwCAPIException();
return r;
}
static BoxedString* getitem_str = static_cast<BoxedString*>(PyString_InternFromString("__getitem__")); static BoxedString* getitem_str = static_cast<BoxedString*>(PyString_InternFromString("__getitem__"));
Box* rtn; Box* rtn;
if (rewriter.get()) { if (rewriter.get()) {
......
...@@ -106,6 +106,7 @@ extern BoxedModule* sys_module, *builtins_module, *math_module, *time_module, *t ...@@ -106,6 +106,7 @@ extern BoxedModule* sys_module, *builtins_module, *math_module, *time_module, *t
} }
extern "C" Box* boxBool(bool); extern "C" Box* boxBool(bool);
extern "C" Box* boxBoolNegated(bool);
extern "C" Box* boxInt(i64) __attribute__((visibility("default"))); extern "C" Box* boxInt(i64) __attribute__((visibility("default")));
extern "C" i64 unboxInt(Box*); extern "C" i64 unboxInt(Box*);
extern "C" Box* boxFloat(double d); extern "C" Box* boxFloat(double d);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment