Commit fc366564 authored by Marius Wachtler's avatar Marius Wachtler

Add new JIT tier between the interpreter and the LLVM JIT tiers.

This JIT is tightly coupled to the ASTInterpreter, at every CFGBlock* entry/exit
on can switch between interpreting and directly executing the generated code without having
to do any translations.
Generating the code is pretty fast compared to the LLVM tier but the generated code is not as fast
as code generated by the higher LLVM tiers.
But because the JITed can use runtime ICs, avoids a lot of interpretation overhead and
stores CFGBlock locals sysbols inside register/stack slots, it's much faster than the interpreter.
parent 1ee49a67
......@@ -34,6 +34,7 @@ add_library(PYSTON_OBJECTS OBJECT ${OPTIONAL_SRCS}
capi/object.cpp
capi/typeobject.cpp
codegen/ast_interpreter.cpp
codegen/baseline_jit.cpp
codegen/codegen.cpp
codegen/compvars.cpp
codegen/entry.cpp
......
......@@ -109,7 +109,6 @@ void ICSlotRewrite::commit(CommitHook* hook) {
if (!do_commit)
return;
assert(assembler->isExactlyFull());
assert(!assembler->hasFailed());
for (int i = 0; i < dependencies.size(); i++) {
......
......@@ -62,9 +62,8 @@ private:
ICSlotInfo* ic_entry;
ICSlotRewrite(ICInfo* ic, const char* debug_name);
public:
ICSlotRewrite(ICInfo* ic, const char* debug_name);
~ICSlotRewrite();
assembler::Assembler* getAssembler() { return assembler; }
......
......@@ -958,7 +958,7 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, void* func_addr
assembler->callq(r);
} else {
assembler->call(assembler::Immediate(offset));
assert(asm_address == (uint64_t)assembler->curInstPointer());
assert(assembler->hasFailed() || asm_address == (uint64_t)assembler->curInstPointer());
}
assert(vars_by_location.count(assembler::RAX) == 0);
......
This diff is collapsed.
......@@ -25,6 +25,7 @@ class GCVisitor;
class AST_expr;
class AST_stmt;
class AST_Jump;
class Box;
class BoxedClosure;
class BoxedDict;
......@@ -33,6 +34,43 @@ struct LineInfo;
extern const void* interpreter_instr_addr;
struct ASTInterpreterJitInterface {
static int getCurrentBlockOffset();
static int getCurrentInstOffset();
static Box* derefHelper(void* interp, InternedString s);
static Box* doOSRHelper(void* interp, AST_Jump* node);
static Box* getBoxedLocalHelper(void* interp, BoxedString* s);
static Box* getBoxedLocalsHelper(void* interp);
static Box* getLocalHelper(void* interp, InternedString id);
static Box* landingpadHelper(void* interp);
static Box* setExcInfoHelper(void* interp, Box* type, Box* value, Box* traceback);
static Box* uncacheExcInfoHelper(void* interp);
static Box* yieldHelper(void* interp, Box* val);
static void setItemNameHelper(void* interp, Box* str, Box* val);
static void setLocalClosureHelper(void* interp, InternedString id, Box* v);
static void setLocalHelper(void* interp, InternedString id, Box* v);
};
class RewriterVar;
struct Value {
union {
bool b;
int64_t n;
double d;
Box* o;
};
RewriterVar* var;
operator RewriterVar*() { return var; }
Value() : o(0), var(0) {}
Value(bool b, RewriterVar* var) : b(b), var(var) {}
Value(int64_t n, RewriterVar* var) : n(n), var(var) {}
Value(double d, RewriterVar* var) : d(d), var(var) {}
Value(Box* o, RewriterVar* var) : o(o), var(var) {}
};
void setupInterpreter();
Box* astInterpretFunction(CompiledFunction* f, int nargs, Box* closure, Box* generator, Box* globals, Box* arg1,
Box* arg2, Box* arg3, Box** args);
......
This diff is collapsed.
This diff is collapsed.
......@@ -25,6 +25,7 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/FileSystem.h"
#include "analysis/function_analysis.h"
#include "analysis/scoping_analysis.h"
#include "codegen/compvars.h"
#include "core/ast.h"
......@@ -62,6 +63,10 @@ SourceInfo::SourceInfo(BoxedModule* m, ScopingAnalysis* scoping, FutureFlags fut
}
}
SourceInfo::~SourceInfo() {
// TODO: release memory..
}
void FunctionAddressRegistry::registerFunction(const std::string& name, void* addr, int length,
llvm::Function* llvm_func) {
assert(addr);
......
......@@ -544,9 +544,7 @@ static void emitBBs(IRGenState* irstate, TypeAnalysis* types, const OSREntryDesc
emitter->getBuilder()->CreateStore(new_call_count, call_count_ptr);
int reopt_threshold;
if (effort == EffortLevel::MINIMAL)
reopt_threshold = REOPT_THRESHOLD_BASELINE;
else if (effort == EffortLevel::MODERATE)
if (effort == EffortLevel::MODERATE)
reopt_threshold = REOPT_THRESHOLD_T2;
else
RELEASE_ASSERT(0, "Unknown effort: %d", (int)effort);
......@@ -1059,15 +1057,15 @@ CompiledFunction* doCompile(SourceInfo* source, ParamNames* param_names, const O
computeBlockSetClosure(blocks);
}
std::unique_ptr<LivenessAnalysis> liveness = computeLivenessInfo(source->cfg);
LivenessAnalysis* liveness = source->getLiveness();
std::unique_ptr<PhiAnalysis> phis;
if (entry_descriptor)
phis = computeRequiredPhis(entry_descriptor, liveness.get(), source->getScopeInfo());
phis = computeRequiredPhis(entry_descriptor, liveness, source->getScopeInfo());
else
phis = computeRequiredPhis(*param_names, source->cfg, liveness.get(), source->getScopeInfo());
phis = computeRequiredPhis(*param_names, source->cfg, liveness, source->getScopeInfo());
IRGenState irstate(cf, source, std::move(liveness), std::move(phis), param_names, getGCBuilder(), dbg_funcinfo);
IRGenState irstate(cf, source, std::move(phis), param_names, getGCBuilder(), dbg_funcinfo);
emitBBs(&irstate, types, entry_descriptor, blocks);
......
......@@ -21,6 +21,7 @@
#include "analysis/scoping_analysis.h"
#include "asm_writing/icinfo.h"
#include "codegen/ast_interpreter.h"
#include "codegen/baseline_jit.h"
#include "codegen/codegen.h"
#include "codegen/compvars.h"
#include "codegen/irgen.h"
......@@ -120,6 +121,12 @@ ScopeInfo* SourceInfo::getScopeInfo() {
return scoping->getScopeInfoForNode(ast);
}
LivenessAnalysis* SourceInfo::getLiveness() {
if (!liveness_info)
liveness_info = computeLivenessInfo(cfg);
return liveness_info.get();
}
EffortLevel initialEffort() {
if (FORCE_INTERPRETER)
return EffortLevel::INTERPRETED;
......@@ -127,7 +134,7 @@ EffortLevel initialEffort() {
return EffortLevel::MAXIMAL;
if (ENABLE_INTERPRETER)
return EffortLevel::INTERPRETED;
return EffortLevel::MINIMAL;
return EffortLevel::MODERATE;
}
static void compileIR(CompiledFunction* cf, EffortLevel effort) {
......@@ -270,13 +277,6 @@ CompiledFunction* compileFunction(CLFunction* f, FunctionSpecialization* spec, E
num_compiles.log();
break;
}
case EffortLevel::MINIMAL: {
static StatCounter us_compiling("us_compiling_1_minimal");
us_compiling.log(us);
static StatCounter num_compiles("num_compiles_1_minimal");
num_compiles.log();
break;
}
case EffortLevel::MODERATE: {
static StatCounter us_compiling("us_compiling_2_moderate");
us_compiling.log(us);
......@@ -742,6 +742,21 @@ void CompiledFunction::speculationFailed() {
}
}
CompiledFunction::CompiledFunction(llvm::Function* func, FunctionSpecialization* spec, bool is_interpreted, void* code,
EffortLevel effort, const OSREntryDescriptor* entry_descriptor)
: clfunc(NULL),
func(func),
spec(spec),
entry_descriptor(entry_descriptor),
is_interpreted(is_interpreted),
code(code),
effort(effort),
times_called(0),
times_speculation_failed(0),
location_map(nullptr) {
assert((spec != NULL) + (entry_descriptor != NULL) == 1);
}
ConcreteCompilerType* CompiledFunction::getReturnType() {
if (spec)
return spec->rtn_type;
......@@ -803,7 +818,7 @@ CompiledFunction* compilePartialFuncInternal(OSRExit* exit) {
assert(exit->parent_cf->clfunc);
CompiledFunction*& new_cf = exit->parent_cf->clfunc->osr_versions[exit->entry];
if (new_cf == NULL) {
EffortLevel new_effort = exit->parent_cf->effort == EffortLevel::INTERPRETED ? EffortLevel::MINIMAL
EffortLevel new_effort = exit->parent_cf->effort == EffortLevel::INTERPRETED ? EffortLevel::MODERATE
: EffortLevel::MAXIMAL;
CompiledFunction* compiled = compileFunction(exit->parent_cf->clfunc, NULL, new_effort, exit->entry);
assert(compiled == new_cf);
......@@ -830,8 +845,6 @@ extern "C" CompiledFunction* reoptCompiledFuncInternal(CompiledFunction* cf) {
EffortLevel new_effort;
if (cf->effort == EffortLevel::INTERPRETED)
new_effort = EffortLevel::MINIMAL;
else if (cf->effort == EffortLevel::MINIMAL)
new_effort = EffortLevel::MODERATE;
else if (cf->effort == EffortLevel::MODERATE)
new_effort = EffortLevel::MAXIMAL;
......
......@@ -42,12 +42,10 @@ extern "C" void dumpLLVM(llvm::Value* v) {
v->dump();
}
IRGenState::IRGenState(CompiledFunction* cf, SourceInfo* source_info, std::unique_ptr<LivenessAnalysis> liveness,
std::unique_ptr<PhiAnalysis> phis, ParamNames* param_names, GCBuilder* gc,
llvm::MDNode* func_dbg_info)
IRGenState::IRGenState(CompiledFunction* cf, SourceInfo* source_info, std::unique_ptr<PhiAnalysis> phis,
ParamNames* param_names, GCBuilder* gc, llvm::MDNode* func_dbg_info)
: cf(cf),
source_info(source_info),
liveness(std::move(liveness)),
phis(std::move(phis)),
param_names(param_names),
gc(gc),
......@@ -426,13 +424,22 @@ IREmitter* createIREmitter(IRGenState* irstate, llvm::BasicBlock*& curblock, IRG
}
static std::unordered_map<AST_expr*, std::vector<BoxedString*>*> made_keyword_storage;
static std::vector<BoxedString*>* getKeywordNameStorage(AST_Call* node) {
std::vector<BoxedString*>* getKeywordNameStorage(AST_Call* node) {
auto it = made_keyword_storage.find(node);
if (it != made_keyword_storage.end())
return it->second;
auto rtn = new std::vector<BoxedString*>();
made_keyword_storage.insert(it, std::make_pair(node, rtn));
// Only add the keywords to the array the first time, since
// the later times we will hit the cache which will have the
// keyword names already populated:
if (!rtn->size()) {
for (auto kw : node->keywords)
rtn->push_back(kw->arg.getBox());
}
return rtn;
}
......@@ -833,22 +840,10 @@ private:
}
std::vector<CompilerVariable*> args;
std::vector<BoxedString*>* keyword_names;
if (node->keywords.size()) {
std::vector<BoxedString*>* keyword_names = NULL;
if (node->keywords.size())
keyword_names = getKeywordNameStorage(node);
// Only add the keywords to the array the first time, since
// the later times we will hit the cache which will have the
// keyword names already populated:
if (!keyword_names->size()) {
for (auto kw : node->keywords) {
keyword_names->push_back(kw->arg.getBox());
}
}
} else {
keyword_names = NULL;
}
for (int i = 0; i < node->args.size(); i++) {
CompilerVariable* a = evalExpr(node->args[i], unw_info);
args.push_back(a);
......@@ -1887,6 +1882,7 @@ private:
static BoxedString* space_str = static_cast<BoxedString*>(PyString_InternFromString(" "));
// TODO: why are we inline-generating all this code instead of just emitting a call to some runtime function?
// (=printHelper())
int nvals = node->values.size();
for (int i = 0; i < nvals; i++) {
CompilerVariable* var = evalExpr(node->values[i], unw_info);
......@@ -2027,9 +2023,7 @@ private:
auto effort = irstate->getEffortLevel();
int osr_threshold;
if (effort == EffortLevel::MINIMAL)
osr_threshold = OSR_THRESHOLD_BASELINE;
else if (effort == EffortLevel::MODERATE)
if (effort == EffortLevel::MODERATE)
osr_threshold = OSR_THRESHOLD_T2;
else
RELEASE_ASSERT(0, "Unknown effort: %d", (int)effort);
......
......@@ -56,7 +56,6 @@ class IRGenState {
private:
CompiledFunction* cf;
SourceInfo* source_info;
std::unique_ptr<LivenessAnalysis> liveness;
std::unique_ptr<PhiAnalysis> phis;
ParamNames* param_names;
GCBuilder* gc;
......@@ -70,8 +69,8 @@ private:
public:
IRGenState(CompiledFunction* cf, SourceInfo* source_info, std::unique_ptr<LivenessAnalysis> liveness,
std::unique_ptr<PhiAnalysis> phis, ParamNames* param_names, GCBuilder* gc, llvm::MDNode* func_dbg_info);
IRGenState(CompiledFunction* cf, SourceInfo* source_info, std::unique_ptr<PhiAnalysis> phis,
ParamNames* param_names, GCBuilder* gc, llvm::MDNode* func_dbg_info);
~IRGenState();
CompiledFunction* getCurFunction() { return cf; }
......@@ -90,7 +89,7 @@ public:
SourceInfo* getSourceInfo() { return source_info; }
LivenessAnalysis* getLiveness() { return liveness.get(); }
LivenessAnalysis* getLiveness() { return source_info->getLiveness(); }
PhiAnalysis* getPhis() { return phis.get(); }
ScopeInfo* getScopeInfo();
......@@ -133,11 +132,13 @@ public:
};
class IREmitter;
class AST_Call;
IREmitter* createIREmitter(IRGenState* irstate, llvm::BasicBlock*& curblock, IRGenerator* irgenerator = NULL);
IRGenerator* createIRGenerator(IRGenState* irstate, std::unordered_map<CFGBlock*, llvm::BasicBlock*>& entry_blocks,
CFGBlock* myblock, TypeAnalysis* types);
CLFunction* wrapFunction(AST* node, AST_arguments* args, const std::vector<AST_stmt*>& body, SourceInfo* source);
std::vector<BoxedString*>* getKeywordNameStorage(AST_Call* node);
}
#endif
......@@ -36,6 +36,7 @@
namespace pyston {
class AST_stmt;
class Box;
class CFG;
class CFGBlock {
......@@ -43,6 +44,12 @@ private:
CFG* cfg;
public:
// Baseline JIT helper fields:
// contains address to the start of the code of this basic block
void* code;
// contains the address of the entry function
std::pair<CFGBlock*, Box*>(*entry_code)(void* interpeter, CFGBlock* block);
std::vector<AST_stmt*> body;
std::vector<CFGBlock*> predecessors, successors;
int idx; // index in the CFG
......@@ -50,7 +57,7 @@ public:
typedef std::vector<AST_stmt*>::iterator iterator;
CFGBlock(CFG* cfg, int idx) : cfg(cfg), idx(idx), info(NULL) {}
CFGBlock(CFG* cfg, int idx) : cfg(cfg), code(NULL), entry_code(NULL), idx(idx), info(NULL) {}
void connectTo(CFGBlock* successor, bool allow_backedge = false);
void unconnectFrom(CFGBlock* successor);
......
......@@ -39,15 +39,16 @@ bool DUMPJIT = false;
bool TRAP = false;
bool USE_STRIPPED_STDLIB = true; // always true
bool ENABLE_INTERPRETER = true;
bool ENABLE_BASELINEJIT = true;
bool ENABLE_PYPA_PARSER = true;
bool USE_REGALLOC_BASIC = true;
bool PAUSE_AT_ABORT = false;
bool ENABLE_TRACEBACKS = true;
int OSR_THRESHOLD_INTERPRETER = 500;
int REOPT_THRESHOLD_INTERPRETER = 200;
int OSR_THRESHOLD_BASELINE = 10000;
int REOPT_THRESHOLD_BASELINE = 250;
int OSR_THRESHOLD_INTERPRETER = 25;
int REOPT_THRESHOLD_INTERPRETER = 25;
int OSR_THRESHOLD_BASELINE = 2500;
int REOPT_THRESHOLD_BASELINE = 1000;
int OSR_THRESHOLD_T2 = 10000;
int REOPT_THRESHOLD_T2 = 10000;
int SPECULATION_THRESHOLD = 100;
......
......@@ -38,8 +38,8 @@ extern int SPECULATION_THRESHOLD;
extern int MAX_OBJECT_CACHE_ENTRIES;
extern bool SHOW_DISASM, FORCE_INTERPRETER, FORCE_OPTIMIZE, PROFILE, DUMPJIT, TRAP, USE_STRIPPED_STDLIB,
CONTINUE_AFTER_FATAL, ENABLE_INTERPRETER, ENABLE_PYPA_PARSER, USE_REGALLOC_BASIC, PAUSE_AT_ABORT, ENABLE_TRACEBACKS,
ASSEMBLY_LOGGING;
CONTINUE_AFTER_FATAL, ENABLE_INTERPRETER, ENABLE_BASELINEJIT, ENABLE_PYPA_PARSER, USE_REGALLOC_BASIC,
PAUSE_AT_ABORT, ENABLE_TRACEBACKS, ASSEMBLY_LOGGING;
extern bool ENABLE_ICS, ENABLE_ICGENERICS, ENABLE_ICGETITEMS, ENABLE_ICSETITEMS, ENABLE_ICDELITEMS, ENABLE_ICBINEXPS,
ENABLE_ICNONZEROS, ENABLE_ICCALLSITES, ENABLE_ICSETATTRS, ENABLE_ICGETATTRS, ENALBE_ICDELATTRS, ENABLE_ICGETGLOBALS,
......
......@@ -65,7 +65,6 @@ using gc::GCVisitor;
enum class EffortLevel {
INTERPRETED = 0,
MINIMAL = 1,
MODERATE = 2,
MAXIMAL = 3,
};
......@@ -221,6 +220,7 @@ class BoxedClosure;
class BoxedGenerator;
class ICInfo;
class LocationMap;
class JitCodeBlock;
struct CompiledFunction {
private:
......@@ -249,21 +249,10 @@ public:
LocationMap* location_map; // only meaningful if this is a compiled frame
std::vector<ICInfo*> ics;
std::vector<std::unique_ptr<JitCodeBlock>> code_blocks;
CompiledFunction(llvm::Function* func, FunctionSpecialization* spec, bool is_interpreted, void* code,
EffortLevel effort, const OSREntryDescriptor* entry_descriptor)
: clfunc(NULL),
func(func),
spec(spec),
entry_descriptor(entry_descriptor),
is_interpreted(is_interpreted),
code(code),
effort(effort),
times_called(0),
times_speculation_failed(0),
location_map(nullptr) {
assert((spec != NULL) + (entry_descriptor != NULL) == 1);
}
EffortLevel effort, const OSREntryDescriptor* entry_descriptor);
ConcreteCompilerType* getReturnType();
......@@ -282,6 +271,7 @@ typedef int FutureFlags;
class BoxedModule;
class ScopeInfo;
class InternedStringPool;
class LivenessAnalysis;
class SourceInfo {
public:
BoxedModule* parent_module;
......@@ -295,6 +285,7 @@ public:
InternedStringPool& getInternedStrings();
ScopeInfo* getScopeInfo();
LivenessAnalysis* getLiveness();
// TODO we're currently copying the body of the AST into here, since lambdas don't really have a statement-based
// body and we have to create one. Ideally, we'd be able to avoid the space duplication for non-lambdas.
......@@ -307,6 +298,10 @@ public:
SourceInfo(BoxedModule* m, ScopingAnalysis* scoping, FutureFlags future_flags, AST* ast,
std::vector<AST_stmt*> body, std::string fn);
~SourceInfo();
private:
std::unique_ptr<LivenessAnalysis> liveness_info;
};
typedef std::vector<CompiledFunction*> FunctionList;
......
......@@ -226,6 +226,28 @@ extern "C" bool softspace(Box* b, bool newval) {
return r;
}
extern "C" void printHelper(Box* dest, Box* var, bool nl) {
static BoxedString* write_str = static_cast<BoxedString*>(PyString_InternFromString("write"));
static BoxedString* newline_str = static_cast<BoxedString*>(PyString_InternFromString("\n"));
static BoxedString* space_str = static_cast<BoxedString*>(PyString_InternFromString(" "));
if (var) {
// begin code for handling of softspace
bool new_softspace = !nl;
if (softspace(dest, new_softspace))
callattrInternal(dest, write_str, CLASS_OR_INST, 0, ArgPassSpec(1), space_str, 0, 0, 0, 0);
Box* str_or_unicode_var = (var->cls == unicode_cls) ? var : str(var);
callattrInternal(dest, write_str, CLASS_OR_INST, 0, ArgPassSpec(1), str_or_unicode_var, 0, 0, 0, 0);
}
if (nl) {
callattrInternal(dest, write_str, CLASS_OR_INST, 0, ArgPassSpec(1), newline_str, 0, 0, 0, 0);
if (!var)
softspace(dest, false);
}
}
extern "C" void my_assert(bool b) {
assert(b);
}
......
......@@ -48,6 +48,7 @@ BoxedModule* getCurrentModule();
// TODO sort this
extern "C" bool softspace(Box* b, bool newval);
extern "C" void printHelper(Box* dest, Box* var, bool nl);
extern "C" void my_assert(bool b);
extern "C" Box* getattr(Box* obj, BoxedString* attr);
extern "C" Box* getattrMaybeNonstring(Box* obj, Box* attr);
......
......@@ -578,6 +578,23 @@ public:
rtn->elts[2] = elt2;
return rtn;
}
static BoxedTuple* create4(Box* elt0, Box* elt1, Box* elt2, Box* elt3) {
BoxedTuple* rtn = new (4) BoxedTuple(4);
rtn->elts[0] = elt0;
rtn->elts[1] = elt1;
rtn->elts[2] = elt2;
rtn->elts[3] = elt3;
return rtn;
}
static BoxedTuple* create5(Box* elt0, Box* elt1, Box* elt2, Box* elt3, Box* elt4) {
BoxedTuple* rtn = new (5) BoxedTuple(5);
rtn->elts[0] = elt0;
rtn->elts[1] = elt1;
rtn->elts[2] = elt2;
rtn->elts[3] = elt3;
rtn->elts[4] = elt4;
return rtn;
}
static BoxedTuple* create(std::initializer_list<Box*> members) { return new (members.size()) BoxedTuple(members); }
static BoxedTuple* create(int64_t size, BoxedClass* cls) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment