Commit 25ac9de4 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Basic (new) deopt support

Old deopt worked by compiling two copies of every BB, one with
speculations and one without, and stitching the two together.
This has a number of issues:
- doubles the amount of code LLVM has to jit
- can't ever get back on the optimized path
- doesn't support 'deopt if branch taken'
- horrifically complex
- doesn't support deopt from within try blocks

We actually ran into that last issue (see test from previous commit).  So
rather than wade in and try to fix old-deopt, just start switching to new-deopt.

(new) deopt works by using the frame introspection features, gathering up all
the locals, and passing them to the interpreter.
parent 6dad4d22
......@@ -65,7 +65,7 @@ public:
void initArguments(int nargs, BoxedClosure* closure, BoxedGenerator* generator, Box* arg1, Box* arg2, Box* arg3,
Box** args);
static Value execute(ASTInterpreter& interpreter, AST_stmt* start_at = NULL);
static Value execute(ASTInterpreter& interpreter, CFGBlock* start_block = NULL, AST_stmt* start_at = NULL);
private:
Box* createFunction(AST* node, AST_arguments* args, const std::vector<AST_stmt*>& body);
......@@ -138,6 +138,7 @@ public:
CompiledFunction* getCF() { return compiled_func; }
FrameInfo* getFrameInfo() { return &frame_info; }
const SymMap& getSymbolTable() { return sym_table; }
void addSymbol(const std::string& name, Box* value, bool allow_duplicates);
void gcVisit(GCVisitor* visitor);
};
......@@ -169,12 +170,76 @@ Box* astInterpretFunction(CompiledFunction* cf, int nargs, Box* closure, Box* ge
return v.o ? v.o : None;
}
Box* astInterpretFrom(CompiledFunction* cf, AST_stmt* start_at, BoxedDict* locals) {
assert(locals->d.size() == 0);
void ASTInterpreter::addSymbol(const std::string& name, Box* value, bool allow_duplicates) {
if (!allow_duplicates)
assert(sym_table.count(name) == 0);
sym_table[name] = value;
}
Box* astInterpretFrom(CompiledFunction* cf, AST_expr* after_expr, AST_stmt* enclosing_stmt, Box* expr_val,
BoxedDict* locals) {
assert(cf);
assert(enclosing_stmt);
assert(locals);
assert(after_expr);
assert(expr_val);
ASTInterpreter interpreter(cf);
Value v = ASTInterpreter::execute(interpreter, start_at);
for (const auto& p : locals->d) {
assert(p.first->cls == str_cls);
interpreter.addSymbol(static_cast<BoxedString*>(p.first)->s, p.second, false);
}
CFGBlock* start_block = NULL;
AST_stmt* starting_statement = NULL;
while (true) {
if (enclosing_stmt->type == AST_TYPE::Assign) {
auto asgn = ast_cast<AST_Assign>(enclosing_stmt);
assert(asgn->value == after_expr);
assert(asgn->targets.size() == 1);
assert(asgn->targets[0]->type == AST_TYPE::Name);
auto name = ast_cast<AST_Name>(asgn->targets[0]);
assert(name->id[0] == '#');
interpreter.addSymbol(name->id, expr_val, true);
break;
} else if (enclosing_stmt->type == AST_TYPE::Expr) {
auto expr = ast_cast<AST_Expr>(enclosing_stmt);
assert(expr->value == after_expr);
break;
} else if (enclosing_stmt->type == AST_TYPE::Invoke) {
auto invoke = ast_cast<AST_Invoke>(enclosing_stmt);
start_block = invoke->normal_dest;
starting_statement = start_block->body[0];
enclosing_stmt = invoke->stmt;
} else {
RELEASE_ASSERT(0, "should not be able to reach here with anything other than an Assign (got %d)",
enclosing_stmt->type);
}
}
if (start_block == NULL) {
// TODO innefficient
for (auto block : cf->clfunc->source->cfg->blocks) {
int n = block->body.size();
for (int i = 0; i < n; i++) {
if (block->body[i] == enclosing_stmt) {
ASSERT(i + 1 < n, "how could we deopt from a non-invoke terminator?");
start_block = block;
starting_statement = block->body[i + 1];
break;
}
}
if (start_block)
break;
}
ASSERT(start_block, "was unable to find the starting block??");
assert(starting_statement);
}
Value v = ASTInterpreter::execute(interpreter, start_block, starting_statement);
return v.o ? v.o : None;
}
......@@ -288,16 +353,33 @@ public:
};
}
Value ASTInterpreter::execute(ASTInterpreter& interpreter, AST_stmt* start_at) {
Value ASTInterpreter::execute(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at) {
threading::allowGLReadPreemption();
assert(start_at == NULL);
void* frame_addr = __builtin_frame_address(0);
RegisterHelper frame_registerer(&interpreter, frame_addr);
Value v;
interpreter.next_block = interpreter.source_info->cfg->getStartingBlock();
assert((start_block == NULL) == (start_at == NULL));
if (start_block == NULL) {
start_block = interpreter.source_info->cfg->getStartingBlock();
start_at = start_block->body[0];
}
interpreter.current_block = start_block;
bool started = false;
for (auto s : start_block->body) {
if (!started) {
if (s != start_at)
continue;
started = true;
}
interpreter.current_inst = s;
v = interpreter.visit_stmt(s);
}
while (interpreter.next_block) {
interpreter.current_block = interpreter.next_block;
interpreter.next_block = 0;
......
......@@ -21,6 +21,7 @@ namespace gc {
class GCVisitor;
}
class AST_expr;
class AST_stmt;
class Box;
class BoxedDict;
......@@ -31,7 +32,8 @@ extern const void* interpreter_instr_addr;
Box* astInterpretFunction(CompiledFunction* f, int nargs, Box* closure, Box* generator, Box* arg1, Box* arg2, Box* arg3,
Box** args);
Box* astInterpretFrom(CompiledFunction* cf, AST_stmt* start_at, BoxedDict* locals);
Box* astInterpretFrom(CompiledFunction* cf, AST_expr* after_expr, AST_stmt* enclosing_stmt, Box* expr_val,
BoxedDict* locals);
AST_stmt* getCurrentStatementForInterpretedFrame(void* frame_ptr);
CompiledFunction* getCFForInterpretedFrame(void* frame_ptr);
......
......@@ -346,7 +346,8 @@ private:
OpInfo getEmptyOpInfo(UnwindInfo unw_info) { return OpInfo(irstate->getEffortLevel(), NULL, unw_info); }
void createExprTypeGuard(llvm::Value* check_val, AST_expr* node, CompilerVariable* node_value) {
void createExprTypeGuard(llvm::Value* check_val, AST_expr* node, llvm::Value* node_value,
AST_stmt* current_statement) {
assert(check_val->getType() == g.i1);
llvm::Metadata* md_vals[]
......@@ -361,17 +362,18 @@ private:
= llvm::BasicBlock::Create(g.context, "check_succeeded", irstate->getLLVMFunction());
success_bb->moveAfter(curblock);
// Create the guard with both branches leading to the success_bb,
// and let the deopt path change the failure case to point to the
// as-yet-unknown deopt block.
// TODO Not the best approach since if we fail to do that patching,
// the guard will just silently be ignored.
llvm::BranchInst* guard = emitter.getBuilder()->CreateCondBr(check_val, success_bb, success_bb, branch_weights);
llvm::BasicBlock* deopt_bb = llvm::BasicBlock::Create(g.context, "check_failed", irstate->getLLVMFunction());
curblock = success_bb;
llvm::BranchInst* guard = emitter.getBuilder()->CreateCondBr(check_val, success_bb, deopt_bb, branch_weights);
curblock = deopt_bb;
emitter.getBuilder()->SetInsertPoint(curblock);
llvm::Value* v = emitter.createCall2(UnwindInfo(current_statement, NULL), g.funcs.deopt,
embedConstantPtr(node, g.i8->getPointerTo()), node_value);
emitter.getBuilder()->CreateRet(v);
out_guards.addExprTypeGuard(myblock, guard, node, node_value, symbol_table);
curblock = success_bb;
emitter.getBuilder()->SetInsertPoint(curblock);
}
CompilerVariable* evalAttribute(AST_Attribute* node, UnwindInfo unw_info) {
......@@ -1244,7 +1246,7 @@ private:
llvm::Value* guard_check = old_rtn->makeClassCheck(emitter, speculated_class);
assert(guard_check->getType() == g.i1);
createExprTypeGuard(guard_check, node, old_rtn);
createExprTypeGuard(guard_check, node, old_rtn->getValue(), unw_info.current_stmt);
rtn = unboxVar(speculated_type, old_rtn->getValue(), true);
}
......
......@@ -158,6 +158,7 @@ public:
void addExprTypeGuard(CFGBlock* cfg_block, llvm::BranchInst* branch, AST_expr* ast_node, CompilerVariable* val,
const SymbolTable& st) {
abort();
ExprTypeGuard*& g = expr_type_guards[ast_node];
assert(g == NULL);
g = new ExprTypeGuard(cfg_block, branch, ast_node, val, st);
......
......@@ -248,6 +248,7 @@ void initGlobalFuncs(GlobalState& g) {
g.funcs.__cxa_end_catch = addFunc((void*)__cxa_end_catch, g.void_);
GET(raise0);
GET(raise3);
GET(deopt);
GET(div_float_float);
GET(floordiv_float_float);
......
......@@ -48,6 +48,7 @@ struct GlobalFuncs {
llvm::Value* __cxa_begin_catch, *__cxa_end_catch;
llvm::Value* raise0, *raise3;
llvm::Value* deopt;
llvm::Value* div_float_float, *floordiv_float_float, *mod_float_float, *pow_float_float;
......
......@@ -586,6 +586,12 @@ BoxedDict* getLocals(bool only_user_visible) {
RELEASE_ASSERT(0, "Internal error: unable to find any python frames");
}
ExecutionPoint getExecutionPoint() {
auto frame = getTopPythonFrame();
auto cf = frame->getCF();
auto current_stmt = frame->getCurrentStatement();
return ExecutionPoint({.cf = cf, .current_stmt = current_stmt });
}
llvm::JITEventListener* makeTracebacksListener() {
return new TracebacksEventListener();
......
......@@ -32,6 +32,12 @@ BoxedDict* getLocals(bool only_user_visible);
// Fetches a writeable pointer to the frame-local excinfo object,
// calculating it if necessary (from previous frames).
ExcInfo* getFrameExcInfo();
struct ExecutionPoint {
CompiledFunction* cf;
AST_stmt* current_stmt;
};
ExecutionPoint getExecutionPoint();
}
#endif
......@@ -106,6 +106,7 @@ void force() {
FORCE(raise0);
FORCE(raise3);
FORCE(deopt);
FORCE(div_i64_i64);
FORCE(mod_i64_i64);
......
......@@ -31,6 +31,7 @@
#include "codegen/irgen/hooks.h"
#include "codegen/parser.h"
#include "codegen/type_recording.h"
#include "codegen/unwinding.h"
#include "core/ast.h"
#include "core/options.h"
#include "core/stats.h"
......@@ -226,6 +227,15 @@ bool PyLt::operator()(Box* lhs, Box* rhs) const {
return cmp == True;
}
extern "C" Box* deopt(AST_expr* expr, Box* value) {
static StatCounter num_deopt("num_deopt");
num_deopt.log();
auto locals = getLocals(false /* filter */);
auto execution_point = getExecutionPoint();
return astInterpretFrom(execution_point.cf, expr, execution_point.current_stmt, value, locals);
}
extern "C" bool softspace(Box* b, bool newval) {
assert(b);
......
......@@ -36,6 +36,8 @@ extern "C" void raise3(Box*, Box*, Box*) __attribute__((__noreturn__));
void raiseExc(Box* exc_obj) __attribute__((__noreturn__));
void raiseRaw(const ExcInfo& e) __attribute__((__noreturn__));
extern "C" Box* deopt(AST_expr* expr, Box* value);
// helper function for raising from the runtime:
void raiseExcHelper(BoxedClass*, const char* fmt, ...) __attribute__((__noreturn__));
......
# statcheck: 0 <= noninit_count('num_deopt') < 500
def f(o):
print "starting"
try:
print o.a
if o.b:
raise Exception()
except Exception, e:
print o.c
print e
print o.d
print "Done"
class C(object):
pass
c = C()
c.a = 1
c.b = 0
c.c = 3
c.d = 4
# These limits are high to try to trigger OSR.
# TODO we should have some way to lower the OSR thresholds
for i in xrange(20000):
if i == 5000:
c.a = []
if i == 6000:
c.b = 1
if i == 7000:
c.c = []
if i == 8000:
c.b = 0
c.d = 1.0
f(c)
......@@ -16,3 +16,6 @@ c = C()
c.pid = 1
for i in xrange(20000):
f(c, None)
if i == 15000:
c.pid = 1.0
......@@ -280,7 +280,7 @@ def run_test(fn, check_stats, run_memcheck):
statname = m.group(1)
raise Exception((l, statname, stats[statname]))
m = re.match("""noninit_count\(['"]([\w_]+)['"]\)""", l)
m = re.search("""noninit_count\(['"]([\w_]+)['"]\)""", l)
if m:
statname = m.group(1)
raise Exception((l, statname, noninit_count(statname)))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment