Commit 7b24661f authored by Marius Wachtler's avatar Marius Wachtler

new frame introspection using vregs for non compiler generated names

This splits up the handling of deopts and normal frame introspection (e.g. for a traceback).
We have to add to nearly all call sites frame introspection which makes it very important that it does not introduce much overhead over a normal call instruction.
By always storing the user visible variables into a vregs array (layout the same as in the interpreter/bjit) we can make introspection cheaper.
Frame introspection only needs to access user facing variables therefore we don't have to generate extra bytes for spilling variables which get clobbered in the callee because all values we need to access are inside the vregs array.
This let's use remove the 95byte overhead and reduces the stackmap size.
It adds a slight cost of maintaining the vregs array but we were already doing some of this work before with our manual spilling with the additional benefit of faster frame introspection.

The deopts case stays pretty much the same with the exception that we don't add the user visible vars to the stackmap because they are already in the vreg.
We could reduce the overhead by implementing a special "deopt()" function in asm which stores and restores all variables thereby we would not have to manualy spill the registers when filling the deopt IC.
Alternatively we could handle it inside llvm by either switching to a stackmap intrinsic which already supports this case or adding it it does not exist...
But I think it's not worth it because deopts should be uncommen...
parent 1a07ff88
......@@ -145,7 +145,7 @@ private:
Box** vregs;
ExcInfo last_exception;
BoxedClosure* passed_closure, *created_closure;
BoxedClosure* created_closure;
BoxedGenerator* generator;
unsigned edgecount;
FrameInfo frame_info;
......@@ -174,7 +174,7 @@ public:
FunctionMetadata* getMD() { return md; }
FrameInfo* getFrameInfo() { return &frame_info; }
BoxedClosure* getPassedClosure() { return passed_closure; }
BoxedClosure* getPassedClosure() { return frame_info.passed_closure; }
Box** getVRegs() { return vregs; }
const ScopeInfo* getScopeInfo() { return scope_info; }
......@@ -203,9 +203,9 @@ void ASTInterpreter::setGenerator(Box* gen) {
}
void ASTInterpreter::setPassedClosure(Box* closure) {
assert(!this->passed_closure); // This should only used for initialization
assert(closure->cls == closure_cls);
this->passed_closure = static_cast<BoxedClosure*>(closure);
assert(!frame_info.passed_closure); // This should only used for initialization
assert(!closure || closure->cls == closure_cls);
frame_info.passed_closure = static_cast<BoxedClosure*>(closure);
}
void ASTInterpreter::setCreatedClosure(Box* closure) {
......@@ -236,7 +236,6 @@ ASTInterpreter::ASTInterpreter(FunctionMetadata* md, Box** vregs)
phis(NULL),
vregs(vregs),
last_exception(NULL, NULL, NULL),
passed_closure(0),
created_closure(0),
generator(0),
edgecount(0),
......@@ -246,17 +245,18 @@ ASTInterpreter::ASTInterpreter(FunctionMetadata* md, Box** vregs)
should_jit(false) {
scope_info = source_info->getScopeInfo();
frame_info.vregs = vregs;
assert(scope_info);
}
void ASTInterpreter::initArguments(BoxedClosure* _closure, BoxedGenerator* _generator, Box* arg1, Box* arg2, Box* arg3,
Box** args) {
passed_closure = _closure;
setPassedClosure(_closure);
generator = _generator;
if (scope_info->createsClosure())
created_closure = createClosure(passed_closure, scope_info->getClosureSize());
created_closure = createClosure(_closure, scope_info->getClosureSize());
const ParamNames& param_names = md->param_names;
......@@ -724,8 +724,8 @@ Box* ASTInterpreter::doOSR(AST_Jump* node) {
if (generator)
sorted_symbol_table[source_info->getInternedStrings().get(PASSED_GENERATOR_NAME)] = generator;
if (passed_closure)
sorted_symbol_table[source_info->getInternedStrings().get(PASSED_CLOSURE_NAME)] = passed_closure;
if (frame_info.passed_closure)
sorted_symbol_table[source_info->getInternedStrings().get(PASSED_CLOSURE_NAME)] = frame_info.passed_closure;
if (created_closure)
sorted_symbol_table[source_info->getInternedStrings().get(CREATED_CLOSURE_NAME)] = created_closure;
......@@ -1038,9 +1038,9 @@ Value ASTInterpreter::createFunction(AST* node, AST_arguments* args, const std::
closure_var = jit->getInterp()->getAttr(offsetof(ASTInterpreter, created_closure));
} else {
assert(scope_info->passesThroughClosure());
closure = passed_closure;
closure = frame_info.passed_closure;
if (jit)
closure_var = jit->getInterp()->getAttr(offsetof(ASTInterpreter, passed_closure));
closure_var = jit->getInterp()->getAttr(offsetof(ASTInterpreter, frame_info.passed_closure));
}
assert(closure);
}
......@@ -1105,7 +1105,7 @@ Value ASTInterpreter::visit_makeClass(AST_MakeClass* mkclass) {
BoxedClosure* closure = NULL;
if (scope_info->takesClosure()) {
if (this->scope_info->passesThroughClosure())
closure = passed_closure;
closure = getPassedClosure();
else
closure = created_closure;
assert(closure);
......@@ -1633,8 +1633,8 @@ void ASTInterpreterJitInterface::delNameHelper(void* _interpreter, InternedStrin
Box* ASTInterpreterJitInterface::derefHelper(void* _interpreter, InternedString s) {
ASTInterpreter* interpreter = (ASTInterpreter*)_interpreter;
DerefInfo deref_info = interpreter->scope_info->getDerefInfo(s);
assert(interpreter->passed_closure);
BoxedClosure* closure = interpreter->passed_closure;
assert(interpreter->getPassedClosure());
BoxedClosure* closure = interpreter->getPassedClosure();
for (int i = 0; i < deref_info.num_parents_from_passed_closure; i++) {
closure = closure->parent;
}
......@@ -1965,12 +1965,15 @@ FrameInfo* getFrameInfoForInterpretedFrame(void* frame_ptr) {
return interpreter->getFrameInfo();
}
BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg, bool only_user_visible) {
BoxedDict* rtn = new BoxedDict();
for (auto& l : cfg->sym_vreg_map) {
if (only_user_visible && (l.first.s()[0] == '!' || l.first.s()[0] == '#'))
continue;
Box** getVRegsForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getVRegs();
}
BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg) {
BoxedDict* rtn = new BoxedDict();
for (auto& l : cfg->sym_vreg_map_user_visible) {
Box* val = vregs[l.second];
if (val) {
assert(gc::isValidGCObject(val));
......@@ -1981,15 +1984,9 @@ BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg, bool only_user_visib
return rtn;
}
BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible) {
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return localsForInterpretedFrame(interpreter->getVRegs(), interpreter->getMD()->source->cfg, only_user_visible);
}
BoxedClosure* passedClosureForInterpretedFrame(void* frame_ptr) {
BoxedDict* localsForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getPassedClosure();
return localsForInterpretedFrame(interpreter->getVRegs(), interpreter->getMD()->source->cfg);
}
}
......@@ -81,10 +81,10 @@ Box* getGlobalsForInterpretedFrame(void* frame_ptr);
FunctionMetadata* getMDForInterpretedFrame(void* frame_ptr);
struct FrameInfo;
FrameInfo* getFrameInfoForInterpretedFrame(void* frame_ptr);
BoxedClosure* passedClosureForInterpretedFrame(void* frame_ptr);
BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg, bool only_user_visible);
BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible);
Box** getVRegsForInterpretedFrame(void* frame_ptr);
BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg);
BoxedDict* localsForInterpretedFrame(void* frame_ptr);
// Executes the equivalent of CPython's PRINT_EXPR opcode (call sys.displayhook)
extern "C" void printExprHelper(Box* b);
......
......@@ -857,7 +857,7 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm::
// make space for patchpoint
uint8_t* pp_start = rewrite->getSlotStart() + assembler->bytesWritten();
constexpr int call_size = 16;
constexpr int call_size = 13;
assembler->skipBytes(pp_size + call_size);
uint8_t* pp_end = rewrite->getSlotStart() + assembler->bytesWritten();
assert(assembler->hasFailed() || (pp_start + pp_size + call_size == pp_end));
......
......@@ -343,6 +343,7 @@ void PystonObjectCache::calculateModuleHash(const llvm::Module* M, EffortLevel e
HashOStream hash_stream;
llvm::WriteBitcodeToFile(M, hash_stream);
hash_stream << (int)effort;
hash_stream << USE_REGALLOC_BASIC;
hash_before_codegen = hash_stream.getHash();
}
......
......@@ -102,6 +102,8 @@ public:
virtual void checkAndPropagateCapiException(const UnwindInfo& unw_info, llvm::Value* returned_val,
llvm::Value* exc_val, bool double_check = false) = 0;
virtual llvm::Value* createDeopt(AST_stmt* current_stmt, AST_expr* node, llvm::Value* node_value) = 0;
virtual Box* getIntConstant(int64_t n) = 0;
virtual Box* getFloatConstant(double d) = 0;
};
......
......@@ -58,6 +58,7 @@ IRGenState::IRGenState(FunctionMetadata* md, CompiledFunction* cf, SourceInfo* s
frame_info(NULL),
frame_info_arg(NULL),
globals(NULL),
vregs(NULL),
scratch_size(0) {
assert(cf->func);
assert(!cf->md); // in this case don't need to pass in sourceinfo
......@@ -143,7 +144,7 @@ static llvm::Value* getExcinfoGep(llvm::IRBuilder<true>& builder, llvm::Value* v
return builder.CreateConstInBoundsGEP2_32(v, 0, 0);
}
static llvm::Value* getFrameObjGep(llvm::IRBuilder<true>& builder, llvm::Value* v) {
template <typename Builder> static llvm::Value* getFrameObjGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, exc) == 0, "");
static_assert(sizeof(ExcInfo) == 24, "");
static_assert(sizeof(Box*) == 8, "");
......@@ -153,6 +154,16 @@ static llvm::Value* getFrameObjGep(llvm::IRBuilder<true>& builder, llvm::Value*
// gep->accumulateConstantOffset(g.tm->getDataLayout(), ap_offset)
}
template <typename Builder> static llvm::Value* getPassedClosureGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, passed_closure) == 40, "");
return builder.CreateConstInBoundsGEP2_32(v, 0, 3);
}
template <typename Builder> static llvm::Value* getVRegsGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, vregs) == 48, "");
return builder.CreateConstInBoundsGEP2_32(v, 0, 4);
}
llvm::Value* IRGenState::getFrameInfoVar() {
/*
There is a matrix of possibilities here.
......@@ -180,10 +191,6 @@ llvm::Value* IRGenState::getFrameInfoVar() {
if (entry_block.begin() != entry_block.end())
builder.SetInsertPoint(&entry_block, entry_block.getFirstInsertionPt());
llvm::AllocaInst* al = builder.CreateAlloca(g.llvm_frame_info_type, NULL, "frame_info");
assert(al->isStaticAlloca());
if (entry_block.getTerminator())
builder.SetInsertPoint(entry_block.getTerminator());
else
......@@ -194,13 +201,34 @@ llvm::Value* IRGenState::getFrameInfoVar() {
this->frame_info = frame_info_arg;
// use vrags array from the interpreter
vregs = builder.CreateLoad(getVRegsGep(builder, frame_info_arg));
if (getScopeInfo()->usesNameLookup()) {
// load frame_info.boxedLocals
this->boxed_locals = builder.CreateLoad(getBoxedLocalsGep(builder, this->frame_info));
}
} else {
// The "normal" case
assert(!vregs);
getMD()->calculateNumVRegs();
int num_user_visible_vregs = getMD()->source->cfg->sym_vreg_map_user_visible.size();
if (num_user_visible_vregs > 0) {
auto* vregs_alloca
= builder.CreateAlloca(g.llvm_value_type_ptr, getConstantInt(num_user_visible_vregs), "vregs");
// Clear the vregs array because 0 means undefined valued.
builder.CreateMemSet(vregs_alloca, getConstantInt(0, g.i8),
getConstantInt(num_user_visible_vregs * sizeof(Box*)),
vregs_alloca->getAlignment());
vregs = vregs_alloca;
} else
vregs = getNullPtr(g.llvm_value_type_ptr_ptr);
llvm::AllocaInst* al = builder.CreateAlloca(g.llvm_frame_info_type, NULL, "frame_info");
assert(al->isStaticAlloca());
// frame_info.exc.type = NULL
llvm::Constant* null_value = getNullPtr(g.llvm_value_type_ptr);
llvm::Value* exc_info = getExcinfoGep(builder, al);
......@@ -223,6 +251,11 @@ llvm::Value* IRGenState::getFrameInfoVar() {
= llvm::cast<llvm::StructType>(g.llvm_frame_info_type)->getElementType(2);
builder.CreateStore(getNullPtr(llvm_frame_obj_type_ptr), getFrameObjGep(builder, al));
// frame_info.passed_closure = NULL
builder.CreateStore(getNullPtr(g.llvm_closure_type_ptr), getPassedClosureGep(builder, al));
// set frame_info.vregs
builder.CreateStore(vregs, getVRegsGep(builder, al));
this->frame_info = al;
}
}
......@@ -237,6 +270,15 @@ llvm::Value* IRGenState::getBoxedLocalsVar() {
return this->boxed_locals;
}
llvm::Value* IRGenState::getVRegsVar() {
if (!vregs) {
// calling this sets also the vregs member
getFrameInfoVar();
assert(vregs);
}
return vregs;
}
ScopeInfo* IRGenState::getScopeInfo() {
return getSourceInfo()->getScopeInfo();
}
......@@ -477,6 +519,14 @@ public:
return rtn.getInstruction();
}
llvm::Value* createDeopt(AST_stmt* current_stmt, AST_expr* node, llvm::Value* node_value) override {
ICSetupInfo* pp = createDeoptIC();
llvm::Value* v
= createIC(pp, (void*)pyston::deopt, { embedRelocatablePtr(node, g.llvm_astexpr_type_ptr), node_value },
UnwindInfo(current_stmt, NULL));
return getBuilder()->CreateIntToPtr(v, g.llvm_value_type_ptr);
}
void checkAndPropagateCapiException(const UnwindInfo& unw_info, llvm::Value* returned_val, llvm::Value* exc_val,
bool double_check = false) override {
assert(!double_check); // need to call PyErr_Occurred
......@@ -625,8 +675,7 @@ private:
curblock = deopt_bb;
emitter.getBuilder()->SetInsertPoint(curblock);
llvm::Value* v = emitter.createCall2(UnwindInfo(current_statement, NULL), g.funcs.deopt,
embedRelocatablePtr(node, g.llvm_astexpr_type_ptr), node_value);
llvm::Value* v = emitter.createDeopt(current_statement, (AST_expr*)node, node_value);
emitter.getBuilder()->CreateRet(v);
curblock = success_bb;
......@@ -1701,6 +1750,22 @@ private:
return rtn;
}
template <typename GetLLVMValCB> void _setVRegIfUserVisible(InternedString name, GetLLVMValCB get_llvm_val_cb) {
auto cfg = irstate->getSourceInfo()->cfg;
if (!cfg->hasVregsAssigned())
irstate->getMD()->calculateNumVRegs();
assert(cfg->sym_vreg_map.count(name));
int vreg = cfg->sym_vreg_map[name];
assert(vreg >= 0);
if (vreg < cfg->sym_vreg_map_user_visible.size()) {
// looks like this store don't have to be volatile because llvm knows that the vregs are visible thru the
// FrameInfo which escapes.
auto* gep = emitter.getBuilder()->CreateConstInBoundsGEP1_64(irstate->getVRegsVar(), vreg);
emitter.getBuilder()->CreateStore(get_llvm_val_cb(), gep);
}
}
// only updates symbol_table if we're *not* setting a global
void _doSet(InternedString name, CompilerVariable* val, const UnwindInfo& unw_info) {
assert(name.s() != "None");
......@@ -1755,6 +1820,9 @@ private:
llvm::Value* gep = getClosureElementGep(emitter, closureValue, offset);
emitter.getBuilder()->CreateStore(val->makeConverted(emitter, UNKNOWN)->getValue(), gep);
}
auto&& get_llvm_val = [&]() { return val->makeConverted(emitter, UNKNOWN)->getValue(); };
_setVRegIfUserVisible(name, get_llvm_val);
}
}
......@@ -1947,6 +2015,8 @@ private:
// SyntaxError: can not delete variable 'x' referenced in nested scope
assert(vst == ScopeInfo::VarScopeType::FAST);
_setVRegIfUserVisible(target->id, []() { return getNullPtr(g.llvm_value_type_ptr); });
if (symbol_table.count(target->id) == 0) {
llvm::CallSite call
= emitter.createCall(unw_info, g.funcs.assertNameDefined,
......@@ -2546,7 +2616,8 @@ public:
pp->addFrameVar("!current_stmt", UNBOXED_INT);
if (ENABLE_FRAME_INTROSPECTION) {
// For deopts we need to add the compiler created names to the stackmap
if (ENABLE_FRAME_INTROSPECTION && pp->isDeopt()) {
// TODO: don't need to use a sorted symbol table if we're explicitly recording the names!
// nice for debugging though.
typedef std::pair<InternedString, CompilerVariable*> Entry;
......@@ -2554,6 +2625,11 @@ public:
std::sort(sorted_symbol_table.begin(), sorted_symbol_table.end(),
[](const Entry& lhs, const Entry& rhs) { return lhs.first < rhs.first; });
for (const auto& p : sorted_symbol_table) {
// We never have to include non compiler generated vars because the user visible variables are stored
// inside the vregs array.
if (!p.first.isCompilerCreatedName())
continue;
CompilerVariable* v = p.second;
v->serializeToFrame(stackmap_args);
pp->addFrameVar(p.first.s(), v->getType());
......@@ -2687,6 +2763,11 @@ public:
passed_closure = AI;
symbol_table[internString(PASSED_CLOSURE_NAME)]
= new ConcreteCompilerVariable(getPassedClosureType(), AI, true);
// store the passed_closure inside the frame info so that frame introspection can access it without needing
// a stackmap entry
emitter.getBuilder()->CreateStore(passed_closure,
getPassedClosureGep(*emitter.getBuilder(), irstate->getFrameInfoVar()));
++AI;
}
......
......@@ -72,6 +72,7 @@ private:
llvm::Value* boxed_locals;
llvm::Value* frame_info_arg;
llvm::Value* globals;
llvm::Value* vregs;
int scratch_size;
public:
......@@ -93,6 +94,7 @@ public:
llvm::Value* getScratchSpace(int min_bytes);
llvm::Value* getFrameInfoVar();
llvm::Value* getBoxedLocalsVar();
llvm::Value* getVRegsVar();
ConcreteCompilerType* getReturnType() { return cf->getReturnType(); }
......
......@@ -34,6 +34,9 @@ void PatchpointInfo::addFrameVar(llvm::StringRef name, CompilerType* type) {
}
int ICSetupInfo::totalSize() const {
if (isDeopt())
return DEOPT_CALL_ONLY_SIZE;
int call_size = CALL_ONLY_SIZE;
if (getCallingConvention() != llvm::CallingConv::C) {
// 14 bytes per reg that needs to be spilled
......@@ -198,7 +201,8 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
if (spilled)
nspills++;
}
ASSERT(nspills <= MAX_FRAME_SPILLS, "did %d spills but expected only %d!", nspills, MAX_FRAME_SPILLS);
RELEASE_ASSERT(nspills <= pp->numFrameSpillsSupported(), "did %d spills but expected only %d!", nspills,
pp->numFrameSpillsSupported());
assert(scratch_size % sizeof(void*) == 0);
assert(scratch_rbp_offset % sizeof(void*) == 0);
......@@ -216,7 +220,6 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
frame_remapped);
continue;
}
LiveOutSet live_outs(extractLiveOuts(r, ic->getCallingConvention()));
if (ic->hasReturnValue()) {
......@@ -351,4 +354,8 @@ ICSetupInfo* createHasnextIC(TypeRecorder* type_recorder) {
return ICSetupInfo::initialize(true, 2, 64, ICSetupInfo::Hasnext, type_recorder);
}
ICSetupInfo* createDeoptIC() {
return ICSetupInfo::initialize(true, 1, 0, ICSetupInfo::Deopt, NULL);
}
} // namespace pyston
......@@ -32,7 +32,9 @@ class TypeRecorder;
static const int MAX_FRAME_SPILLS = 9; // TODO this shouldn't have to be larger than the set of non-callee-save args (9)
// except that will we currently spill the same reg multiple times
static const int CALL_ONLY_SIZE
static const int CALL_ONLY_SIZE = 13 + 1; // 13 for the call, + 1 if we want to nop/trap
static const int DEOPT_CALL_ONLY_SIZE
= 13 + (MAX_FRAME_SPILLS * 9)
+ 1; // 13 for the call, 9 bytes per spill (7 for GP, 9 for XMM), + 1 if we want to nop/trap
......@@ -53,6 +55,7 @@ public:
Binexp,
Nonzero,
Hasnext,
Deopt,
};
private:
......@@ -72,6 +75,7 @@ public:
int totalSize() const;
bool hasReturnValue() const { return has_return_value; }
bool isDeopt() const { return type == Deopt; }
llvm::CallingConv::ID getCallingConvention() const {
// FIXME: we currently have some issues with using PreserveAll (the rewriter currently
......@@ -124,6 +128,8 @@ public:
int scratchStackmapArg() { return 0; }
int scratchSize() { return 80 + MAX_FRAME_SPILLS * sizeof(void*); }
bool isDeopt() const { return icinfo ? icinfo->isDeopt() : false; }
int numFrameSpillsSupported() const { return isDeopt() ? MAX_FRAME_SPILLS : 0; }
void addFrameVar(llvm::StringRef name, CompilerType* type);
void setNumFrameArgs(int num_frame_args) {
......@@ -164,6 +170,7 @@ ICSetupInfo* createDelitemIC(TypeRecorder* type_recorder);
ICSetupInfo* createBinexpIC(TypeRecorder* type_recorder, ICInfo* bjit_ic_info);
ICSetupInfo* createNonzeroIC(TypeRecorder* type_recorder);
ICSetupInfo* createHasnextIC(TypeRecorder* type_recorder);
ICSetupInfo* createDeoptIC();
} // namespace pyston
......
......@@ -36,6 +36,7 @@
#include "codegen/irgen/hooks.h"
#include "codegen/irgen/irgenerator.h"
#include "codegen/stackmaps.h"
#include "core/cfg.h"
#include "core/util.h"
#include "runtime/ctxswitching.h"
#include "runtime/objmodel.h"
......@@ -558,9 +559,6 @@ public:
}
void handleCFrame(unw_cursor_t* cursor) {
unw_word_t ip = get_cursor_ip(cursor);
unw_word_t bp = get_cursor_bp(cursor);
PythonFrameIteratorImpl frame_iter;
bool found_frame = pystack_extractor.handleCFrame(cursor, &frame_iter);
if (found_frame) {
......@@ -850,9 +848,6 @@ PythonFrameIterator::PythonFrameIterator(std::unique_ptr<PythonFrameIteratorImpl
std::swap(this->impl, impl);
}
// TODO factor getDeoptState and fastLocalsToBoxedLocals
// because they are pretty ugly but have a pretty repetitive pattern.
DeoptState getDeoptState() {
DeoptState rtn;
bool found = false;
......@@ -891,6 +886,24 @@ DeoptState getDeoptState() {
}
}
// We could do much better here by memcpying the user visible vregs into the new location which the
// interpreter allocated, instead of storing them one by one in a dict and then retrieving them
// and assigning them to the new vregs array...
// But deopts are so rare it's not really worth it.
Box** vregs = frame_iter->getFrameInfo()->vregs;
for (const auto& p : cf->md->source->cfg->sym_vreg_map_user_visible) {
if (is_undefined.count(p.first.s()))
continue;
assert(p.second >= 0 && p.second < cf->md->source->cfg->sym_vreg_map_user_visible.size());
Box* v = vregs[p.second];
if (!v)
continue;
ASSERT(gc::isValidGCObject(v), "%p", v);
d->d[p.first.getBox()] = v;
}
for (const auto& p : cf->location_map->names) {
if (p.first()[0] == '!')
continue;
......@@ -937,10 +950,6 @@ Box* fastLocalsToBoxedLocals() {
Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
assert(impl.get());
BoxedDict* d;
BoxedClosure* closure;
FrameInfo* frame_info;
FunctionMetadata* md = impl->getMD();
ScopeInfo* scope_info = md->source->getScopeInfo();
......@@ -951,92 +960,15 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
return md->source->parent_module->getAttrWrapper();
}
BoxedDict* d;
FrameInfo* frame_info = impl->getFrameInfo();
BoxedClosure* closure = frame_info->passed_closure;
if (impl->getId().type == PythonFrameId::COMPILED) {
CompiledFunction* cf = impl->getCF();
d = new BoxedDict();
uint64_t ip = impl->getId().ip;
assert(ip > cf->code_start);
unsigned offset = ip - cf->code_start;
assert(cf->location_map);
// We have to detect + ignore any entries for variables that
// could have been defined (so they have entries) but aren't (so the
// entries point to uninitialized memory).
std::unordered_set<std::string> is_undefined;
for (const auto& p : cf->location_map->names) {
if (!startswith(p.first(), "!is_defined_"))
continue;
auto e = p.second.findEntry(offset);
if (e) {
const auto& locs = e->locations;
assert(locs.size() == 1);
uint64_t v = impl->readLocation(locs[0]);
if ((v & 1) == 0)
is_undefined.insert(p.first().substr(12));
}
}
for (const auto& p : cf->location_map->names) {
if (p.first()[0] == '!')
continue;
if (p.first()[0] == '#')
continue;
if (is_undefined.count(p.first()))
continue;
auto e = p.second.findEntry(offset);
if (e) {
const auto& locs = e->locations;
llvm::SmallVector<uint64_t, 1> vals;
// printf("%s: %s\n", p.first().c_str(), e.type->debugName().c_str());
// printf("%ld locs\n", locs.size());
for (auto& loc : locs) {
auto v = impl->readLocation(loc);
vals.push_back(v);
// printf("%d %d %d: 0x%lx\n", loc.type, loc.regnum, loc.offset, v);
// dump((void*)v);
}
Box* v = e->type->deserializeFromFrame(vals);
// printf("%s: (pp id %ld) %p\n", p.first().c_str(), e._debug_pp_id, v);
assert(gc::isValidGCObject(v));
d->d[boxString(p.first())] = v;
}
}
closure = NULL;
if (cf->location_map->names.count(PASSED_CLOSURE_NAME) > 0) {
auto e = cf->location_map->names[PASSED_CLOSURE_NAME].findEntry(offset);
if (e) {
const auto& locs = e->locations;
llvm::SmallVector<uint64_t, 1> vals;
for (auto& loc : locs) {
vals.push_back(impl->readLocation(loc));
}
Box* v = e->type->deserializeFromFrame(vals);
assert(gc::isValidGCObject(v));
closure = static_cast<BoxedClosure*>(v);
}
}
frame_info = impl->getFrameInfo();
assert(impl->getId().ip > cf->code_start);
d = localsForInterpretedFrame(frame_info->vregs, cf->md->source->cfg);
} else if (impl->getId().type == PythonFrameId::INTERPRETED) {
d = localsForInterpretedFrame((void*)impl->getId().bp, true);
closure = passedClosureForInterpretedFrame((void*)impl->getId().bp);
frame_info = getFrameInfoForInterpretedFrame((void*)impl->getId().bp);
d = localsForInterpretedFrame((void*)impl->getId().bp);
} else {
abort();
}
......@@ -1071,10 +1003,14 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
// TODO Right now d just has all the python variables that are *initialized*
// But we also need to loop through all the uninitialized variables that we have
// access to and delete them from the locals dict
for (const auto& p : *d) {
Box* varname = p.first;
Box* value = p.second;
setitem(frame_info->boxedLocals, varname, value);
if (frame_info->boxedLocals == dict_cls) {
((BoxedDict*)frame_info->boxedLocals)->d.insert(d->d.begin(), d->d.end());
} else {
for (const auto& p : *d) {
Box* varname = p.first;
Box* value = p.second;
setitem(frame_info->boxedLocals, varname, value);
}
}
return frame_info->boxedLocals;
......
......@@ -2528,10 +2528,11 @@ void CFG::print(llvm::raw_ostream& stream) {
class AssignVRegsVisitor : public NoopASTVisitor {
public:
int index = 0;
bool only_user_visible;
llvm::DenseMap<InternedString, int> sym_vreg_map;
ScopeInfo* scope_info;
AssignVRegsVisitor(ScopeInfo* scope_info) : scope_info(scope_info) {}
AssignVRegsVisitor(ScopeInfo* scope_info, bool only_user_visible) : only_user_visible(only_user_visible), scope_info(scope_info) {}
bool visit_arguments(AST_arguments* node) override {
for (AST_expr* d : node->defaults)
......@@ -2563,6 +2564,9 @@ public:
if (node->vreg != -1)
return true;
if (only_user_visible && node->id.isCompilerCreatedName())
return true;
if (node->lookup_type == ScopeInfo::VarScopeType::UNKNOWN)
node->lookup_type = scope_info->getScopeTypeOfName(node->id);
......@@ -2585,23 +2589,31 @@ void CFG::assignVRegs(const ParamNames& param_names, ScopeInfo* scope_info) {
if (has_vregs_assigned)
return;
AssignVRegsVisitor visitor(scope_info);
for (CFGBlock* b : blocks) {
for (AST_stmt* stmt : b->body) {
stmt->accept(&visitor);
AssignVRegsVisitor visitor(scope_info, true);
// we need todo two passes: first we assign the user visible vars a vreg and then the compiler created get there value.
for (int i=0; i<2; ++i) {
for (CFGBlock* b : blocks) {
for (AST_stmt* stmt : b->body) {
stmt->accept(&visitor);
}
}
}
for (auto* name : param_names.arg_names) {
name->accept(&visitor);
}
for (auto* name : param_names.arg_names) {
name->accept(&visitor);
}
if (param_names.vararg_name)
param_names.vararg_name->accept(&visitor);
if (param_names.vararg_name)
param_names.vararg_name->accept(&visitor);
if (param_names.kwarg_name)
param_names.kwarg_name->accept(&visitor);
if (param_names.kwarg_name)
param_names.kwarg_name->accept(&visitor);
if (visitor.only_user_visible) {
visitor.only_user_visible = false;
sym_vreg_map_user_visible = visitor.sym_vreg_map;
}
}
sym_vreg_map = std::move(visitor.sym_vreg_map);
has_vregs_assigned = true;
}
......
......@@ -81,7 +81,10 @@ private:
public:
std::vector<CFGBlock*> blocks;
// Contains the vreg assignment for every name including the user visible ones
// (which will have lower ids than the compiler generated ones).
llvm::DenseMap<InternedString, int> sym_vreg_map;
llvm::DenseMap<InternedString, int> sym_vreg_map_user_visible;
CFG() : next_idx(0), has_vregs_assigned(false) {}
......
......@@ -883,8 +883,11 @@ struct FrameInfo {
Box* boxedLocals;
BoxedFrame* frame_obj;
BoxedClosure* passed_closure;
FrameInfo(ExcInfo exc) : exc(exc), boxedLocals(NULL), frame_obj(0) {}
Box** vregs;
FrameInfo(ExcInfo exc) : exc(exc), boxedLocals(NULL), frame_obj(0), passed_closure(0), vregs(0) {}
void gcVisit(GCVisitor* visitor);
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment