Commit 89c8630a authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #668 from undingen/patch_bjit

baseline jit: patch block transitions to a direct jump.
parents dbe78661 f9021356
...@@ -901,6 +901,7 @@ void Assembler::jmp(JumpDestination dest) { ...@@ -901,6 +901,7 @@ void Assembler::jmp(JumpDestination dest) {
emitByte(0xeb); emitByte(0xeb);
emitByte(offset); emitByte(offset);
} else { } else {
assert((-1L << 31) <= dest.offset && dest.offset < (1L << 31) - 1);
offset -= 3; offset -= 3;
emitByte(0xe9); emitByte(0xe9);
emitInt(offset, 4); emitInt(offset, 4);
......
...@@ -144,7 +144,8 @@ private: ...@@ -144,7 +144,8 @@ private:
Value visit_jump(AST_Jump* node); Value visit_jump(AST_Jump* node);
Value visit_langPrimitive(AST_LangPrimitive* node); Value visit_langPrimitive(AST_LangPrimitive* node);
void startJITing(CFGBlock* block, int jump_offset = 0); // for doc on 'exit_offset' have a look at JitFragmentWriter::num_bytes_exit and num_bytes_overlapping
void startJITing(CFGBlock* block, int exit_offset = 0);
void abortJITing(); void abortJITing();
void finishJITing(CFGBlock* continue_block = NULL); void finishJITing(CFGBlock* continue_block = NULL);
...@@ -339,7 +340,7 @@ void RegisterHelper::deregister(void* frame_addr) { ...@@ -339,7 +340,7 @@ void RegisterHelper::deregister(void* frame_addr) {
s_interpreterMap.erase(frame_addr); s_interpreterMap.erase(frame_addr);
} }
void ASTInterpreter::startJITing(CFGBlock* block, int jump_offset) { void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset) {
assert(ENABLE_BASELINEJIT); assert(ENABLE_BASELINEJIT);
assert(!jit); assert(!jit);
...@@ -351,10 +352,10 @@ void ASTInterpreter::startJITing(CFGBlock* block, int jump_offset) { ...@@ -351,10 +352,10 @@ void ASTInterpreter::startJITing(CFGBlock* block, int jump_offset) {
if (!code_block || code_block->shouldCreateNewBlock()) { if (!code_block || code_block->shouldCreateNewBlock()) {
code_blocks.push_back(std::unique_ptr<JitCodeBlock>(new JitCodeBlock(source_info->getName()))); code_blocks.push_back(std::unique_ptr<JitCodeBlock>(new JitCodeBlock(source_info->getName())));
code_block = code_blocks[code_blocks.size() - 1].get(); code_block = code_blocks[code_blocks.size() - 1].get();
jump_offset = 0; exit_offset = 0;
} }
jit = code_block->newFragment(block, jump_offset); jit = code_block->newFragment(block, exit_offset);
} }
void ASTInterpreter::abortJITing() { void ASTInterpreter::abortJITing() {
...@@ -367,10 +368,10 @@ void ASTInterpreter::abortJITing() { ...@@ -367,10 +368,10 @@ void ASTInterpreter::abortJITing() {
void ASTInterpreter::finishJITing(CFGBlock* continue_block) { void ASTInterpreter::finishJITing(CFGBlock* continue_block) {
if (!jit) if (!jit)
return; return;
int jump_offset = jit->finishCompilation(); int exit_offset = jit->finishCompilation();
jit.reset(); jit.reset();
if (continue_block && !continue_block->code) if (continue_block && !continue_block->code)
startJITing(continue_block, jump_offset); startJITing(continue_block, exit_offset);
} }
Value ASTInterpreter::executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at, Value ASTInterpreter::executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at,
......
...@@ -27,13 +27,13 @@ ...@@ -27,13 +27,13 @@
namespace pyston { namespace pyston {
static llvm::DenseSet<CFGBlock*> blocks_aborted; static llvm::DenseSet<CFGBlock*> blocks_aborted;
static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations;
JitCodeBlock::JitCodeBlock(llvm::StringRef name) JitCodeBlock::JitCodeBlock(llvm::StringRef name)
: frame_manager(false /* don't omit frame pointers */), : frame_manager(false /* don't omit frame pointers */),
code(new uint8_t[code_size]), code(new uint8_t[code_size]),
entry_offset(0), entry_offset(0),
epilog_offset(0), a(code.get(), code_size),
a(code.get(), code_size - epilog_size),
is_currently_writing(false), is_currently_writing(false),
asm_failed(false) { asm_failed(false) {
static StatCounter num_jit_code_blocks("num_baselinejit_code_blocks"); static StatCounter num_jit_code_blocks("num_baselinejit_code_blocks");
...@@ -53,13 +53,6 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name) ...@@ -53,13 +53,6 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
entry_offset = a.bytesWritten(); entry_offset = a.bytesWritten();
// emit epilog
epilog_offset = code_size - epilog_size;
assembler::Assembler endAsm(code.get() + epilog_offset, epilog_size);
endAsm.leave();
endAsm.retq();
RELEASE_ASSERT(!endAsm.hasFailed(), "");
// generate eh frame... // generate eh frame...
frame_manager.writeAndRegister(code.get(), code_size); frame_manager.writeAndRegister(code.get(), code_size);
...@@ -82,9 +75,8 @@ std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, in ...@@ -82,9 +75,8 @@ std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, in
llvm::CallingConv::C, live_outs, assembler::RAX, 0)); llvm::CallingConv::C, live_outs, assembler::RAX, 0));
std::unique_ptr<ICSlotRewrite> rewrite(new ICSlotRewrite(ic_info.get(), "")); std::unique_ptr<ICSlotRewrite> rewrite(new ICSlotRewrite(ic_info.get(), ""));
return std::unique_ptr<JitFragmentWriter>(new JitFragmentWriter(block, std::move(ic_info), std::move(rewrite), return std::unique_ptr<JitFragmentWriter>(new JitFragmentWriter(
fragment_offset, epilog_offset - fragment_offset, block, std::move(ic_info), std::move(rewrite), fragment_offset, patch_jump_offset, a.getStartAddr(), *this));
patch_jump_offset, a.getStartAddr(), *this));
} }
void JitCodeBlock::fragmentAbort(bool not_enough_space) { void JitCodeBlock::fragmentAbort(bool not_enough_space) {
...@@ -102,14 +94,13 @@ void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping ...@@ -102,14 +94,13 @@ void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping
JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info, JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info,
std::unique_ptr<ICSlotRewrite> rewrite, int code_offset, int epilog_offset, std::unique_ptr<ICSlotRewrite> rewrite, int code_offset, int num_bytes_overlapping,
int num_bytes_overlapping, void* entry_code, JitCodeBlock& code_block) void* entry_code, JitCodeBlock& code_block)
: Rewriter(std::move(rewrite), 0, {}), : Rewriter(std::move(rewrite), 0, {}),
block(block), block(block),
code_offset(code_offset), code_offset(code_offset),
epilog_offset(epilog_offset), num_bytes_exit(0),
num_bytes_overlapping(num_bytes_overlapping), num_bytes_overlapping(num_bytes_overlapping),
num_bytes_forward_jump(0),
entry_code(entry_code), entry_code(entry_code),
code_block(code_block), code_block(code_block),
interp(0), interp(0),
...@@ -387,7 +378,7 @@ void JitFragmentWriter::emitExec(RewriterVar* code, RewriterVar* globals, Rewrit ...@@ -387,7 +378,7 @@ void JitFragmentWriter::emitExec(RewriterVar* code, RewriterVar* globals, Rewrit
void JitFragmentWriter::emitJump(CFGBlock* b) { void JitFragmentWriter::emitJump(CFGBlock* b) {
RewriterVar* next = imm(b); RewriterVar* next = imm(b);
addAction([=]() { _emitJump(b, next, num_bytes_forward_jump); }, { next }, ActionType::NORMAL); addAction([=]() { _emitJump(b, next, num_bytes_exit); }, { next }, ActionType::NORMAL);
} }
void JitFragmentWriter::emitOSRPoint(AST_Jump* node) { void JitFragmentWriter::emitOSRPoint(AST_Jump* node) {
...@@ -505,9 +496,31 @@ int JitFragmentWriter::finishCompilation() { ...@@ -505,9 +496,31 @@ int JitFragmentWriter::finishCompilation() {
block->code = (void*)((uint64_t)entry_code + code_offset); block->code = (void*)((uint64_t)entry_code + code_offset);
block->entry_code = (decltype(block->entry_code))entry_code; block->entry_code = (decltype(block->entry_code))entry_code;
// if any side exits point to this block patch them to a direct jump to this block
auto it = block_patch_locations.find(block);
if (it != block_patch_locations.end()) {
for (void* patch_location : it->second) {
assembler::Assembler patch_asm((uint8_t*)patch_location, min_patch_size);
int64_t offset = (uint64_t)block->code - (uint64_t)patch_location;
if (isLargeConstant(offset)) {
patch_asm.mov(assembler::Immediate(block->code), assembler::R11);
patch_asm.jmpq(assembler::R11);
} else
patch_asm.jmp(assembler::JumpDestination::fromStart(offset));
RELEASE_ASSERT(!patch_asm.hasFailed(), "you may have to increase 'min_patch_size'");
}
block_patch_locations.erase(it);
}
// if we have a side exit, remember its location for patching
if (side_exit_patch_location.first) {
void* patch_location = (uint8_t*)block->code + side_exit_patch_location.second;
block_patch_locations[side_exit_patch_location.first].push_back(patch_location);
}
void* next_fragment_start = (uint8_t*)block->code + assembler->bytesWritten(); void* next_fragment_start = (uint8_t*)block->code + assembler->bytesWritten();
code_block.fragmentFinished(assembler->bytesWritten(), num_bytes_overlapping, next_fragment_start); code_block.fragmentFinished(assembler->bytesWritten(), num_bytes_overlapping, next_fragment_start);
return num_bytes_forward_jump; return num_bytes_exit;
} }
bool JitFragmentWriter::finishAssembly(int continue_offset) { bool JitFragmentWriter::finishAssembly(int continue_offset) {
...@@ -669,8 +682,8 @@ Box* JitFragmentWriter::unaryopICHelper(UnaryopIC* ic, Box* obj, int op) { ...@@ -669,8 +682,8 @@ Box* JitFragmentWriter::unaryopICHelper(UnaryopIC* ic, Box* obj, int op) {
} }
void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_indirect_jump) { void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_exit_to_interp) {
size_of_indirect_jump = 0; size_of_exit_to_interp = 0;
if (b->code) { if (b->code) {
int64_t offset = (uint64_t)b->code - ((uint64_t)entry_code + code_offset); int64_t offset = (uint64_t)b->code - ((uint64_t)entry_code + code_offset);
if (isLargeConstant(offset)) { if (isLargeConstant(offset)) {
...@@ -681,11 +694,15 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& siz ...@@ -681,11 +694,15 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& siz
} else { } else {
int num_bytes = assembler->bytesWritten(); int num_bytes = assembler->bytesWritten();
block_next->getInReg(assembler::RAX, true); block_next->getInReg(assembler::RAX, true);
assembler->mov(assembler::Indirect(assembler::RAX, 8), assembler::RSI); assembler->leave();
assembler->test(assembler::RSI, assembler::RSI); assembler->retq();
assembler->je(assembler::JumpDestination::fromStart(epilog_offset));
assembler->jmp(assembler::Indirect(assembler::RAX, offsetof(CFGBlock, code))); // make sure we have at least 'min_patch_size' of bytes available.
size_of_indirect_jump = assembler->bytesWritten() - num_bytes; for (int i = assembler->bytesWritten() - num_bytes; i < min_patch_size; ++i)
assembler->trap(); // we could use nops but traps may help if something goes wrong
size_of_exit_to_interp = assembler->bytesWritten() - num_bytes;
assert(assembler->hasFailed() || size_of_exit_to_interp >= min_patch_size);
} }
block_next->bumpUse(); block_next->bumpUse();
} }
...@@ -702,7 +719,8 @@ void JitFragmentWriter::_emitOSRPoint(RewriterVar* result, RewriterVar* node_var ...@@ -702,7 +719,8 @@ void JitFragmentWriter::_emitOSRPoint(RewriterVar* result, RewriterVar* node_var
{ {
assembler::ForwardJump je(*assembler, assembler::COND_EQUAL); assembler::ForwardJump je(*assembler, assembler::COND_EQUAL);
assembler->mov(assembler::Immediate(0ul), assembler::RAX); // TODO: use xor assembler->mov(assembler::Immediate(0ul), assembler::RAX); // TODO: use xor
assembler->jmp(assembler::JumpDestination::fromStart(epilog_offset)); assembler->leave();
assembler->retq();
} }
assertConsistent(); assertConsistent();
...@@ -711,7 +729,8 @@ void JitFragmentWriter::_emitOSRPoint(RewriterVar* result, RewriterVar* node_var ...@@ -711,7 +729,8 @@ void JitFragmentWriter::_emitOSRPoint(RewriterVar* result, RewriterVar* node_var
void JitFragmentWriter::_emitReturn(RewriterVar* return_val) { void JitFragmentWriter::_emitReturn(RewriterVar* return_val) {
return_val->getInReg(assembler::RDX, true); return_val->getInReg(assembler::RDX, true);
assembler->mov(assembler::Immediate(0ul), assembler::RAX); // TODO: use xor assembler->mov(assembler::Immediate(0ul), assembler::RAX); // TODO: use xor
assembler->jmp(assembler::JumpDestination::fromStart(epilog_offset)); assembler->leave();
assembler->retq();
return_val->bumpUse(); return_val->bumpUse();
} }
...@@ -731,11 +750,12 @@ void JitFragmentWriter::_emitSideExit(RewriterVar* var, RewriterVar* val_constan ...@@ -731,11 +750,12 @@ void JitFragmentWriter::_emitSideExit(RewriterVar* var, RewriterVar* val_constan
{ {
assembler::ForwardJump jne(*assembler, assembler::COND_EQUAL); assembler::ForwardJump jne(*assembler, assembler::COND_EQUAL);
int bytes = 0; int exit_size = 0;
_emitJump(next_block, next_block_var, bytes); _emitJump(next_block, next_block_var, exit_size);
if (bytes) { if (exit_size) {
// TODO: We generated an indirect jump. RELEASE_ASSERT(!side_exit_patch_location.first,
// If we later on JIT the dest block we could patch this code to a direct jump to the dest. "if we start to emit more than one side exit we should make this a vector");
side_exit_patch_location = std::make_pair(next_block, assembler->bytesWritten() - exit_size);
} }
} }
......
...@@ -74,7 +74,7 @@ class JitFragmentWriter; ...@@ -74,7 +74,7 @@ class JitFragmentWriter;
// Currently a JitFragment always contains the code of a single CFGBlock*. // Currently a JitFragment always contains the code of a single CFGBlock*.
// A JitFragment can get called from the Interpreter by calling 'entry_code' which will jump to the fragment start or // A JitFragment can get called from the Interpreter by calling 'entry_code' which will jump to the fragment start or
// it can get executed by a jump from another fragment. // it can get executed by a jump from another fragment.
// At every fragment end we can jump to another fragment, fallback to the Interpreter or exit. // At every fragment end we can jump to another fragment or exit to the interpreter.
// This means we are not allowed to assume that a register contains a specific value between JitFragments. // This means we are not allowed to assume that a register contains a specific value between JitFragments.
// This also means that we are allowed to store a Python variable which only lives in the current CFGBLock* inside a // This also means that we are allowed to store a Python variable which only lives in the current CFGBLock* inside a
// register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance. // register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance.
...@@ -109,38 +109,34 @@ class JitFragmentWriter; ...@@ -109,38 +109,34 @@ class JitFragmentWriter;
// movabs $0x1270014108,%rcx ; rcx = True // movabs $0x1270014108,%rcx ; rcx = True
// cmp %rax,%rcx ; rax == True // cmp %rax,%rcx ; rax == True
// jne end_side_exit // jne end_side_exit
// mov %rax,0x10(%rsp) ; // movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg)
// movabs $0x215bb60,%rax ; rax = CFGBlock* to call next (rax is also the 1. return reg) // leave
// mov 0x8(%rax),%rsi ; load CFGBlock->code // ret ; exit to the interpreter which will interpret the specified CFGBLock*
// test %rsi,%rsi ; CFGBlock->code == 0
// je epilog ; exit to interpreter if code == 0
// jmpq *0x8(%rax) ; jump to new jit fragment (e.g second_JitFragment)
// end_side_exit: // end_side_exit:
// .... // ....
// second_JitFragment: // second_JitFragment:
// ... // ...
// ; this shows how a AST_Return looks like
// mov $0,%rax ; rax contains the next block to interpret.
// in this case 0 which means we are finished
// movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return
// leave
// ret
// //
// nth_JitFragment: // nth_JitFragment:
// ... ; direct jump previous JITed block // ... ; direct jump previous JITed block
// jmp first_JitFragment // jmp first_JitFragment
// //
// epilog: ; code which jumps to epilog has to make sure that
// ; rax contains the next block to execute
// ; or 0 if we are finished but then rdx must contain the Box* value to return
// leave
// ret
// //
class JitCodeBlock { class JitCodeBlock {
private: private:
static constexpr int scratch_size = 256; static constexpr int scratch_size = 256;
static constexpr int code_size = 4096 * 2; static constexpr int code_size = 4096 * 2;
static constexpr int epilog_size = 2; // size of [leave, ret] in bytes
EHFrameManager frame_manager; EHFrameManager frame_manager;
std::unique_ptr<uint8_t[]> code; std::unique_ptr<uint8_t[]> code;
int entry_offset; int entry_offset;
int epilog_offset;
assembler::Assembler a; assembler::Assembler a;
bool is_currently_writing; bool is_currently_writing;
bool asm_failed; bool asm_failed;
...@@ -156,23 +152,35 @@ public: ...@@ -156,23 +152,35 @@ public:
class JitFragmentWriter : public Rewriter { class JitFragmentWriter : public Rewriter {
private: private:
static constexpr int min_patch_size = 13;
CFGBlock* block; CFGBlock* block;
int code_offset; // offset inside the JitCodeBlock to the start of this block int code_offset; // offset inside the JitCodeBlock to the start of this block
int epilog_offset; // offset inside the JitCodeBlock to the epilog
int num_bytes_overlapping; // num of bytes this block overlaps with the prev. used to patch unessary forward jumps // If the next block is not yet JITed we will set this field to the number of bytes we emitted for the exit to the
int num_bytes_forward_jump; // number of bytes emited for the last forward jump to the next block. This is used to // interpreter which continues interpreting the next block.
// patch unessary forward jumps when the next fragment is emited (it becomes // If we immediatelly start JITing the next block we will set 'num_bytes_overlapping' on the new fragment to this
// num_bytes_overlapping) // value which will make the fragment start at the instruction where the last block is exiting to the interpreter to
void* entry_code; // JitCodeBlock start address. Mmust have an offset of 0 into the code block // interpret the new block -> we overwrite the exit with the code of the new block.
// If there is nothing to overwrite this field will be 0.
int num_bytes_exit;
int num_bytes_overlapping; // num of bytes this block overlaps with the prev. used to patch unessary jumps
void* entry_code; // JitCodeBlock start address. Must have an offset of 0 into the code block
JitCodeBlock& code_block; JitCodeBlock& code_block;
RewriterVar* interp; RewriterVar* interp;
llvm::DenseMap<InternedString, RewriterVar*> local_syms; llvm::DenseMap<InternedString, RewriterVar*> local_syms;
std::unique_ptr<ICInfo> ic_info; std::unique_ptr<ICInfo> ic_info;
// Optional points to a CFGBlock and a patch location which should get patched to a direct jump if
// the specified block gets JITed. The patch location is guaranteed to be at least 'min_patch_size' bytes long.
// We can't directly mark the offset for patching because JITing the current fragment may fail. That's why we store
// it in this field and process it only when we know we successfully generated the code.
std::pair<CFGBlock*, int /* offset from fragment start*/> side_exit_patch_location;
public: public:
JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info, std::unique_ptr<ICSlotRewrite> rewrite, JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info, std::unique_ptr<ICSlotRewrite> rewrite,
int code_offset, int epilog_offset, int num_bytes_overlapping, void* entry_code, int code_offset, int num_bytes_overlapping, void* entry_code, JitCodeBlock& code_block);
JitCodeBlock& code_block);
RewriterVar* imm(uint64_t val); RewriterVar* imm(uint64_t val);
RewriterVar* imm(void* val); RewriterVar* imm(void* val);
...@@ -269,7 +277,7 @@ private: ...@@ -269,7 +277,7 @@ private:
static Box* runtimeCallHelperIC(Box* obj, ArgPassSpec argspec, RuntimeCallIC* ic, Box** args); static Box* runtimeCallHelperIC(Box* obj, ArgPassSpec argspec, RuntimeCallIC* ic, Box** args);
#endif #endif
void _emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_indirect_jump); void _emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_exit_to_interp);
void _emitOSRPoint(RewriterVar* result, RewriterVar* node_var); void _emitOSRPoint(RewriterVar* result, RewriterVar* node_var);
void _emitReturn(RewriterVar* v); void _emitReturn(RewriterVar* v);
void _emitSideExit(RewriterVar* var, RewriterVar* val_constant, CFGBlock* next_block, RewriterVar* false_path); void _emitSideExit(RewriterVar* var, RewriterVar* val_constant, CFGBlock* next_block, RewriterVar* false_path);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment