Commit 584e85e4 authored by Chris Toshok's avatar Chris Toshok

reduce mallocs by using llvm::SmallVector in a lot of hot paths

There were a lot of std::vectors in the rewriter and in the invoke
machinery (callFunc and friends), and every std::vector usage involves
a call to malloc (and free when is destroyed.)  we should be using
llvm::SmallVector wherever we can in performance sensitive code, since it
allows a configurable stack allocated buffer.  It reverts to malloc/free
if you blow the buffer's capacity, but as long as things are tuned well,
we can get a pretty significant speedup.

There is more work to be done, but this change gets us ~3% on geomean.
parent f58caf8c
......@@ -511,20 +511,23 @@ void Rewriter::_loadConst(RewriterVar* result, int64_t val, Location dest) {
}
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr) {
std::vector<RewriterVar*> args = {};
std::vector<RewriterVar*> args_xmm = {};
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
return call(can_call_into_python, func_addr, args, args_xmm);
}
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, RewriterVar* arg0) {
std::vector<RewriterVar*> args = { arg0 };
std::vector<RewriterVar*> args_xmm = {};
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
return call(can_call_into_python, func_addr, args, args_xmm);
}
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, RewriterVar* arg0, RewriterVar* arg1) {
std::vector<RewriterVar*> args = { arg0, arg1 };
std::vector<RewriterVar*> args_xmm = {};
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
args.push_back(arg1);
return call(can_call_into_python, func_addr, args, args_xmm);
}
......@@ -536,8 +539,8 @@ static const Location caller_save_registers[]{
assembler::XMM11, assembler::XMM12, assembler::XMM13, assembler::XMM14, assembler::XMM15,
};
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, const std::vector<RewriterVar*>& args,
const std::vector<RewriterVar*>& args_xmm) {
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, const RewriterVar::SmallVector& args,
const RewriterVar::SmallVector& args_xmm) {
RewriterVar* result = createNewVar();
std::vector<RewriterVar*> uses;
for (RewriterVar* v : args) {
......@@ -554,7 +557,7 @@ RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr, const st
}
void Rewriter::_call(RewriterVar* result, bool can_call_into_python, void* func_addr,
const std::vector<RewriterVar*>& args, const std::vector<RewriterVar*>& args_xmm) {
const RewriterVar::SmallVector& args, const RewriterVar::SmallVector& args_xmm) {
// TODO figure out why this is here -- what needs to be done differently
// if can_call_into_python is true?
// assert(!can_call_into_python);
......
......@@ -214,6 +214,8 @@ class RewriterAction;
// you can't forward-declare that :/
class RewriterVar {
public:
typedef llvm::SmallVector<RewriterVar*, 8> SmallVector;
void addGuard(uint64_t val);
void addGuardNotEq(uint64_t val);
void addAttrGuard(int offset, uint64_t val, bool negate = false);
......@@ -386,8 +388,8 @@ private:
void _trap();
void _loadConst(RewriterVar* result, int64_t val, Location loc);
void _call(RewriterVar* result, bool can_call_into_python, void* func_addr, const std::vector<RewriterVar*>& args,
const std::vector<RewriterVar*>& args_xmm);
void _call(RewriterVar* result, bool can_call_into_python, void* func_addr, const RewriterVar::SmallVector& args,
const RewriterVar::SmallVector& args_xmm);
void _add(RewriterVar* result, RewriterVar* a, int64_t b, Location dest);
int _allocate(RewriterVar* result, int n);
void _allocateAndCopy(RewriterVar* result, RewriterVar* array, int n);
......@@ -452,8 +454,8 @@ public:
// This causes some extra bookkeeping to prevent, ex this patchpoint to be rewritten when
// entered recursively. Setting to false disables this for slightly better performance, but
// it's not huge so if in doubt just pass "true".
RewriterVar* call(bool can_call_into_python, void* func_addr, const std::vector<RewriterVar*>& args,
const std::vector<RewriterVar*>& args_xmm = std::vector<RewriterVar*>());
RewriterVar* call(bool can_call_into_python, void* func_addr, const RewriterVar::SmallVector& args,
const RewriterVar::SmallVector& args_xmm = RewriterVar::SmallVector());
RewriterVar* call(bool can_call_into_python, void* func_addr);
RewriterVar* call(bool can_call_into_python, void* func_addr, RewriterVar* arg0);
RewriterVar* call(bool can_call_into_python, void* func_addr, RewriterVar* arg0, RewriterVar* arg1);
......
......@@ -889,8 +889,8 @@ Box* dataDescriptorInstanceSpecialCases(GetattrRewriteArgs* rewrite_args, const
case BoxedMemberDescriptor::DOUBLE: {
if (rewrite_args) {
RewriterVar* r_unboxed_val = rewrite_args->obj->getAttrDouble(member_desc->offset, assembler::XMM0);
std::vector<RewriterVar*> normal_args;
std::vector<RewriterVar*> float_args;
RewriterVar::SmallVector normal_args;
RewriterVar::SmallVector float_args;
float_args.push_back(r_unboxed_val);
rewrite_args->out_rtn
= rewrite_args->rewriter->call(true, (void*)boxFloat, normal_args, float_args);
......@@ -903,8 +903,8 @@ Box* dataDescriptorInstanceSpecialCases(GetattrRewriteArgs* rewrite_args, const
case BoxedMemberDescriptor::FLOAT: {
if (rewrite_args) {
RewriterVar* r_unboxed_val = rewrite_args->obj->getAttrFloat(member_desc->offset, assembler::XMM0);
std::vector<RewriterVar*> normal_args;
std::vector<RewriterVar*> float_args;
RewriterVar::SmallVector normal_args;
RewriterVar::SmallVector float_args;
float_args.push_back(r_unboxed_val);
rewrite_args->out_rtn
= rewrite_args->rewriter->call(true, (void*)boxFloat, normal_args, float_args);
......@@ -1548,8 +1548,12 @@ bool dataDescriptorSetSpecialCases(Box* obj, Box* val, Box* descr, SetattrRewrit
r_descr->addAttrGuard(offsetof(BoxedGetsetDescriptor, set), (intptr_t)getset_descr->set);
RewriterVar* r_closure = r_descr->getAttr(offsetof(BoxedGetsetDescriptor, closure));
RewriterVar::SmallVector args;
args.push_back(r_obj);
args.push_back(r_val);
args.push_back(r_closure);
rewrite_args->rewriter->call(
/* can_call_into_python */ true, (void*)getset_descr->set, { r_obj, r_val, r_closure });
/* can_call_into_python */ true, (void*)getset_descr->set, args);
if (descr->cls == capi_getset_cls)
// TODO I think we are supposed to check the return value?
......@@ -2379,7 +2383,7 @@ enum class KeywordDest {
POSITIONAL,
KWARGS,
};
static KeywordDest placeKeyword(const ParamNames& param_names, std::vector<bool>& params_filled,
static KeywordDest placeKeyword(const ParamNames& param_names, llvm::SmallVector<bool, 8>& params_filled,
const std::string& kw_name, Box* kw_val, Box*& oarg1, Box*& oarg2, Box*& oarg3,
Box** oargs, BoxedDict* okwargs, CLFunction* cl) {
assert(kw_val);
......@@ -2484,7 +2488,7 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe
}
}
std::vector<Box*> varargs;
std::vector<Box*, StlCompatAllocator<Box*>> varargs;
if (argspec.has_starargs) {
Box* given_varargs = getArg(argspec.num_args + argspec.num_keywords, arg1, arg2, arg3, args);
for (Box* e : given_varargs->pyElements()) {
......@@ -2520,7 +2524,7 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe
getArg(i + positional_to_positional, oarg1, oarg2, oarg3, oargs) = varargs[i];
}
std::vector<bool> params_filled(num_output_args, false);
llvm::SmallVector<bool, 8> params_filled(num_output_args);
for (int i = 0; i < positional_to_positional + varargs_to_positional; i++) {
params_filled[i] = true;
}
......@@ -2554,7 +2558,7 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe
rewrite_args->args->setAttr((varargs_idx - 3) * sizeof(Box*), emptyTupleConst);
}
Box* ovarargs = new BoxedTuple(unused_positional);
Box* ovarargs = new BoxedTuple(BoxedTuple::GCVector(unused_positional.begin(), unused_positional.end()));
getArg(varargs_idx, oarg1, oarg2, oarg3, oargs) = ovarargs;
} else if (unused_positional.size()) {
raiseExcHelper(TypeError, "%s() takes at most %d argument%s (%d given)", getFunctionName(f).c_str(),
......@@ -2728,7 +2732,7 @@ Box* callCLFunc(CLFunction* f, CallRewriteArgs* rewrite_args, int num_output_arg
if (rewrite_args) {
rewrite_args->rewriter->addDependenceOn(chosen_cf->dependent_callsites);
std::vector<RewriterVar*> arg_vec;
RewriterVar::SmallVector arg_vec;
// TODO this kind of embedded reference needs to be tracked by the GC somehow?
// Or maybe it's ok, since we've guarded on the function object?
if (closure)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment