Commit 584e85e4 authored by Chris Toshok's avatar Chris Toshok

reduce mallocs by using llvm::SmallVector in a lot of hot paths

There were a lot of std::vectors in the rewriter and in the invoke
machinery (callFunc and friends), and every std::vector usage involves
a call to malloc (and free when is destroyed.)  we should be using
llvm::SmallVector wherever we can in performance sensitive code, since it
allows a configurable stack allocated buffer.  It reverts to malloc/free
if you blow the buffer's capacity, but as long as things are tuned well,
we can get a pretty significant speedup.

There is more work to be done, but this change gets us ~3% on geomean.
parent f58caf8c
...@@ -511,20 +511,23 @@ void Rewriter::_loadConst(RewriterVar* result, int64_t val, Location dest) { ...@@ -511,20 +511,23 @@ void Rewriter::_loadConst(RewriterVar* result, int64_t val, Location dest) {
} }
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr) { RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr) {
std::vector<RewriterVar*> args = {}; RewriterVar::SmallVector args;
std::vector<RewriterVar*> args_xmm = {}; RewriterVar::SmallVector args_xmm;
return call(can_call_into_python, func_addr, args, args_xmm); return call(can_call_into_python, func_addr, args, args_xmm);
} }
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, RewriterVar* arg0) { RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, RewriterVar* arg0) {
std::vector<RewriterVar*> args = { arg0 }; RewriterVar::SmallVector args;
std::vector<RewriterVar*> args_xmm = {}; RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
return call(can_call_into_python, func_addr, args, args_xmm); return call(can_call_into_python, func_addr, args, args_xmm);
} }
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, RewriterVar* arg0, RewriterVar* arg1) { RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, RewriterVar* arg0, RewriterVar* arg1) {
std::vector<RewriterVar*> args = { arg0, arg1 }; RewriterVar::SmallVector args;
std::vector<RewriterVar*> args_xmm = {}; RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
args.push_back(arg1);
return call(can_call_into_python, func_addr, args, args_xmm); return call(can_call_into_python, func_addr, args, args_xmm);
} }
...@@ -536,8 +539,8 @@ static const Location caller_save_registers[]{ ...@@ -536,8 +539,8 @@ static const Location caller_save_registers[]{
assembler::XMM11, assembler::XMM12, assembler::XMM13, assembler::XMM14, assembler::XMM15, assembler::XMM11, assembler::XMM12, assembler::XMM13, assembler::XMM14, assembler::XMM15,
}; };
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, const std::vector<RewriterVar*>& args, RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, const RewriterVar::SmallVector& args,
const std::vector<RewriterVar*>& args_xmm) { const RewriterVar::SmallVector& args_xmm) {
RewriterVar* result = createNewVar(); RewriterVar* result = createNewVar();
std::vector<RewriterVar*> uses; std::vector<RewriterVar*> uses;
for (RewriterVar* v : args) { for (RewriterVar* v : args) {
...@@ -554,7 +557,7 @@ RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, const st ...@@ -554,7 +557,7 @@ RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, const st
} }
void Rewriter::_call(RewriterVar* result, bool can_call_into_python, void* func_addr, void Rewriter::_call(RewriterVar* result, bool can_call_into_python, void* func_addr,
const std::vector<RewriterVar*>& args, const std::vector<RewriterVar*>& args_xmm) { const RewriterVar::SmallVector& args, const RewriterVar::SmallVector& args_xmm) {
// TODO figure out why this is here -- what needs to be done differently // TODO figure out why this is here -- what needs to be done differently
// if can_call_into_python is true? // if can_call_into_python is true?
// assert(!can_call_into_python); // assert(!can_call_into_python);
......
...@@ -214,6 +214,8 @@ class RewriterAction; ...@@ -214,6 +214,8 @@ class RewriterAction;
// you can't forward-declare that :/ // you can't forward-declare that :/
class RewriterVar { class RewriterVar {
public: public:
typedef llvm::SmallVector<RewriterVar*, 8> SmallVector;
void addGuard(uint64_t val); void addGuard(uint64_t val);
void addGuardNotEq(uint64_t val); void addGuardNotEq(uint64_t val);
void addAttrGuard(int offset, uint64_t val, bool negate = false); void addAttrGuard(int offset, uint64_t val, bool negate = false);
...@@ -386,8 +388,8 @@ private: ...@@ -386,8 +388,8 @@ private:
void _trap(); void _trap();
void _loadConst(RewriterVar* result, int64_t val, Location loc); void _loadConst(RewriterVar* result, int64_t val, Location loc);
void _call(RewriterVar* result, bool can_call_into_python, void* func_addr, const std::vector<RewriterVar*>& args, void _call(RewriterVar* result, bool can_call_into_python, void* func_addr, const RewriterVar::SmallVector& args,
const std::vector<RewriterVar*>& args_xmm); const RewriterVar::SmallVector& args_xmm);
void _add(RewriterVar* result, RewriterVar* a, int64_t b, Location dest); void _add(RewriterVar* result, RewriterVar* a, int64_t b, Location dest);
int _allocate(RewriterVar* result, int n); int _allocate(RewriterVar* result, int n);
void _allocateAndCopy(RewriterVar* result, RewriterVar* array, int n); void _allocateAndCopy(RewriterVar* result, RewriterVar* array, int n);
...@@ -452,8 +454,8 @@ public: ...@@ -452,8 +454,8 @@ public:
// This causes some extra bookkeeping to prevent, ex this patchpoint to be rewritten when // This causes some extra bookkeeping to prevent, ex this patchpoint to be rewritten when
// entered recursively. Setting to false disables this for slightly better performance, but // entered recursively. Setting to false disables this for slightly better performance, but
// it's not huge so if in doubt just pass "true". // it's not huge so if in doubt just pass "true".
RewriterVar* call(bool can_call_into_python, void* func_addr, const std::vector<RewriterVar*>& args, RewriterVar* call(bool can_call_into_python, void* func_addr, const RewriterVar::SmallVector& args,
const std::vector<RewriterVar*>& args_xmm = std::vector<RewriterVar*>()); const RewriterVar::SmallVector& args_xmm = RewriterVar::SmallVector());
RewriterVar* call(bool can_call_into_python, void* func_addr); RewriterVar* call(bool can_call_into_python, void* func_addr);
RewriterVar* call(bool can_call_into_python, void* func_addr, RewriterVar* arg0); RewriterVar* call(bool can_call_into_python, void* func_addr, RewriterVar* arg0);
RewriterVar* call(bool can_call_into_python, void* func_addr, RewriterVar* arg0, RewriterVar* arg1); RewriterVar* call(bool can_call_into_python, void* func_addr, RewriterVar* arg0, RewriterVar* arg1);
......
...@@ -889,8 +889,8 @@ Box* dataDescriptorInstanceSpecialCases(GetattrRewriteArgs* rewrite_args, const ...@@ -889,8 +889,8 @@ Box* dataDescriptorInstanceSpecialCases(GetattrRewriteArgs* rewrite_args, const
case BoxedMemberDescriptor::DOUBLE: { case BoxedMemberDescriptor::DOUBLE: {
if (rewrite_args) { if (rewrite_args) {
RewriterVar* r_unboxed_val = rewrite_args->obj->getAttrDouble(member_desc->offset, assembler::XMM0); RewriterVar* r_unboxed_val = rewrite_args->obj->getAttrDouble(member_desc->offset, assembler::XMM0);
std::vector<RewriterVar*> normal_args; RewriterVar::SmallVector normal_args;
std::vector<RewriterVar*> float_args; RewriterVar::SmallVector float_args;
float_args.push_back(r_unboxed_val); float_args.push_back(r_unboxed_val);
rewrite_args->out_rtn rewrite_args->out_rtn
= rewrite_args->rewriter->call(true, (void*)boxFloat, normal_args, float_args); = rewrite_args->rewriter->call(true, (void*)boxFloat, normal_args, float_args);
...@@ -903,8 +903,8 @@ Box* dataDescriptorInstanceSpecialCases(GetattrRewriteArgs* rewrite_args, const ...@@ -903,8 +903,8 @@ Box* dataDescriptorInstanceSpecialCases(GetattrRewriteArgs* rewrite_args, const
case BoxedMemberDescriptor::FLOAT: { case BoxedMemberDescriptor::FLOAT: {
if (rewrite_args) { if (rewrite_args) {
RewriterVar* r_unboxed_val = rewrite_args->obj->getAttrFloat(member_desc->offset, assembler::XMM0); RewriterVar* r_unboxed_val = rewrite_args->obj->getAttrFloat(member_desc->offset, assembler::XMM0);
std::vector<RewriterVar*> normal_args; RewriterVar::SmallVector normal_args;
std::vector<RewriterVar*> float_args; RewriterVar::SmallVector float_args;
float_args.push_back(r_unboxed_val); float_args.push_back(r_unboxed_val);
rewrite_args->out_rtn rewrite_args->out_rtn
= rewrite_args->rewriter->call(true, (void*)boxFloat, normal_args, float_args); = rewrite_args->rewriter->call(true, (void*)boxFloat, normal_args, float_args);
...@@ -1548,8 +1548,12 @@ bool dataDescriptorSetSpecialCases(Box* obj, Box* val, Box* descr, SetattrRewrit ...@@ -1548,8 +1548,12 @@ bool dataDescriptorSetSpecialCases(Box* obj, Box* val, Box* descr, SetattrRewrit
r_descr->addAttrGuard(offsetof(BoxedGetsetDescriptor, set), (intptr_t)getset_descr->set); r_descr->addAttrGuard(offsetof(BoxedGetsetDescriptor, set), (intptr_t)getset_descr->set);
RewriterVar* r_closure = r_descr->getAttr(offsetof(BoxedGetsetDescriptor, closure)); RewriterVar* r_closure = r_descr->getAttr(offsetof(BoxedGetsetDescriptor, closure));
RewriterVar::SmallVector args;
args.push_back(r_obj);
args.push_back(r_val);
args.push_back(r_closure);
rewrite_args->rewriter->call( rewrite_args->rewriter->call(
/* can_call_into_python */ true, (void*)getset_descr->set, { r_obj, r_val, r_closure }); /* can_call_into_python */ true, (void*)getset_descr->set, args);
if (descr->cls == capi_getset_cls) if (descr->cls == capi_getset_cls)
// TODO I think we are supposed to check the return value? // TODO I think we are supposed to check the return value?
...@@ -2379,7 +2383,7 @@ enum class KeywordDest { ...@@ -2379,7 +2383,7 @@ enum class KeywordDest {
POSITIONAL, POSITIONAL,
KWARGS, KWARGS,
}; };
static KeywordDest placeKeyword(const ParamNames& param_names, std::vector<bool>& params_filled, static KeywordDest placeKeyword(const ParamNames& param_names, llvm::SmallVector<bool, 8>& params_filled,
const std::string& kw_name, Box* kw_val, Box*& oarg1, Box*& oarg2, Box*& oarg3, const std::string& kw_name, Box* kw_val, Box*& oarg1, Box*& oarg2, Box*& oarg3,
Box** oargs, BoxedDict* okwargs, CLFunction* cl) { Box** oargs, BoxedDict* okwargs, CLFunction* cl) {
assert(kw_val); assert(kw_val);
...@@ -2484,7 +2488,7 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe ...@@ -2484,7 +2488,7 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe
} }
} }
std::vector<Box*> varargs; std::vector<Box*, StlCompatAllocator<Box*>> varargs;
if (argspec.has_starargs) { if (argspec.has_starargs) {
Box* given_varargs = getArg(argspec.num_args + argspec.num_keywords, arg1, arg2, arg3, args); Box* given_varargs = getArg(argspec.num_args + argspec.num_keywords, arg1, arg2, arg3, args);
for (Box* e : given_varargs->pyElements()) { for (Box* e : given_varargs->pyElements()) {
...@@ -2520,7 +2524,7 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe ...@@ -2520,7 +2524,7 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe
getArg(i + positional_to_positional, oarg1, oarg2, oarg3, oargs) = varargs[i]; getArg(i + positional_to_positional, oarg1, oarg2, oarg3, oargs) = varargs[i];
} }
std::vector<bool> params_filled(num_output_args, false); llvm::SmallVector<bool, 8> params_filled(num_output_args);
for (int i = 0; i < positional_to_positional + varargs_to_positional; i++) { for (int i = 0; i < positional_to_positional + varargs_to_positional; i++) {
params_filled[i] = true; params_filled[i] = true;
} }
...@@ -2554,7 +2558,7 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe ...@@ -2554,7 +2558,7 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe
rewrite_args->args->setAttr((varargs_idx - 3) * sizeof(Box*), emptyTupleConst); rewrite_args->args->setAttr((varargs_idx - 3) * sizeof(Box*), emptyTupleConst);
} }
Box* ovarargs = new BoxedTuple(unused_positional); Box* ovarargs = new BoxedTuple(BoxedTuple::GCVector(unused_positional.begin(), unused_positional.end()));
getArg(varargs_idx, oarg1, oarg2, oarg3, oargs) = ovarargs; getArg(varargs_idx, oarg1, oarg2, oarg3, oargs) = ovarargs;
} else if (unused_positional.size()) { } else if (unused_positional.size()) {
raiseExcHelper(TypeError, "%s() takes at most %d argument%s (%d given)", getFunctionName(f).c_str(), raiseExcHelper(TypeError, "%s() takes at most %d argument%s (%d given)", getFunctionName(f).c_str(),
...@@ -2728,7 +2732,7 @@ Box* callCLFunc(CLFunction* f, CallRewriteArgs* rewrite_args, int num_output_arg ...@@ -2728,7 +2732,7 @@ Box* callCLFunc(CLFunction* f, CallRewriteArgs* rewrite_args, int num_output_arg
if (rewrite_args) { if (rewrite_args) {
rewrite_args->rewriter->addDependenceOn(chosen_cf->dependent_callsites); rewrite_args->rewriter->addDependenceOn(chosen_cf->dependent_callsites);
std::vector<RewriterVar*> arg_vec; RewriterVar::SmallVector arg_vec;
// TODO this kind of embedded reference needs to be tracked by the GC somehow? // TODO this kind of embedded reference needs to be tracked by the GC somehow?
// Or maybe it's ok, since we've guarded on the function object? // Or maybe it's ok, since we've guarded on the function object?
if (closure) if (closure)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment