Commit a9a9c6c2 authored by Marius Wachtler's avatar Marius Wachtler

bjit: use r12 and r15

parent dc85d001
......@@ -755,6 +755,7 @@ void Assembler::incq(Indirect mem) {
}
assert(src_idx >= 0 && src_idx < 8);
bool needssib = (src_idx == 0b100);
if (rex)
emitRex(rex);
......@@ -763,8 +764,12 @@ void Assembler::incq(Indirect mem) {
assert(-0x80 <= mem.offset && mem.offset < 0x80);
if (mem.offset == 0) {
emitModRM(0b00, 0, src_idx);
if (needssib)
emitSIB(0b00, 0b100, src_idx);
} else {
emitModRM(0b01, 0, src_idx);
if (needssib)
emitSIB(0b00, 0b100, src_idx);
emitByte(mem.offset);
}
}
......
......@@ -24,7 +24,7 @@
namespace pyston {
static const assembler::Register allocatable_regs[] = {
static const assembler::Register std_allocatable_regs[] = {
assembler::RAX, assembler::RCX, assembler::RDX,
// no RSP
// no RBP
......@@ -1234,17 +1234,29 @@ std::vector<Location> Rewriter::getDecrefLocations() {
std::vector<Location> decref_infos;
for (RewriterVar& var : vars) {
if (var.locations.size() && var.needsDecref()) {
// TODO: add code to handle other location types and choose best location if there are several
Location l = *var.locations.begin();
if (l.type == Location::Scratch) {
// convert to stack based location because later on we may not know the offset of the scratch area from
// the SP.
decref_infos.emplace_back(Location::Stack, indirectFor(l).offset);
} else if (l.type == Location::Register) {
// CSRs shouldn't be getting allocated, and we should only be calling this at a callsite:
RELEASE_ASSERT(0, "we shouldn't be trying to decref anything in a register");
} else
RELEASE_ASSERT(0, "not implemented");
bool found_location = false;
for (Location l : var.locations) {
if (l.type == Location::Scratch) {
// convert to stack based location because later on we may not know the offset of the scratch area
// from the SP.
decref_infos.emplace_back(Location::Stack, indirectFor(l).offset);
found_location = true;
break;
} else if (l.type == Location::Register) {
// we only allow registers which are not clobbered by a call
if (l.isClobberedByCall())
continue;
decref_infos.emplace_back(l);
found_location = true;
break;
} else
RELEASE_ASSERT(0, "not implemented");
}
if (!found_location) {
// this is very rare. just fail the rewrite for now
failed = true;
}
}
}
......@@ -2208,7 +2220,8 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const L
done_guarding(false),
last_guard_action(-1),
offset_eq_jmp_slowpath(-1),
offset_ne_jmp_slowpath(-1) {
offset_ne_jmp_slowpath(-1),
allocatable_regs(std_allocatable_regs) {
initPhaseCollecting();
finished = false;
......
......@@ -567,6 +567,8 @@ protected:
#endif
}
llvm::ArrayRef<assembler::Register> allocatable_regs;
public:
// This should be called exactly once for each argument
RewriterVar* getArg(int argnum);
......
......@@ -38,19 +38,22 @@ static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations;
//
// long foo(char* c);
// void bjit() {
// asm volatile ("" ::: "r15");
// asm volatile ("" ::: "r14");
// asm volatile ("" ::: "r13");
// asm volatile ("" ::: "r12");
// char scratch[256+16];
// foo(scratch);
// }
//
// It omits the frame pointer but saves r13 and r14
// It omits the frame pointer but saves r12, r13, r14 and r15
// use 'objdump -s -j .eh_frame <obj.file>' to dump it
const unsigned char eh_info[]
= { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10,
0x01, 0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42,
0x0e, 0x18, 0x47, 0x0e, 0xb0, 0x02, 0x8d, 0x03, 0x8e, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 };
= { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01,
0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42, 0x0e, 0x18, 0x42,
0x0e, 0x20, 0x42, 0x0e, 0x28, 0x47, 0x0e, 0xc0, 0x02, 0x8c, 0x05, 0x8d, 0x04, 0x8e, 0x03, 0x8f,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
static_assert(JitCodeBlock::num_stack_args == 2, "have to update EH table!");
static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!");
......@@ -70,8 +73,10 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
uint8_t* code = a.curInstPointer();
// emit prolog
a.push(assembler::R15);
a.push(assembler::R14);
a.push(assembler::R13);
a.push(assembler::R12);
static_assert(sp_adjustment % 16 == 8, "stack isn't aligned");
a.sub(assembler::Immediate(sp_adjustment), assembler::RSP);
a.mov(assembler::RDI, assembler::R13); // interpreter pointer
......@@ -136,6 +141,12 @@ void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping
ic_info.appendDecrefInfosTo(decref_infos);
}
static const assembler::Register bjit_allocatable_regs[]
= { assembler::RAX, assembler::RCX, assembler::RDX,
// no RSP
// no RBP
assembler::RDI, assembler::RSI, assembler::R8, assembler::R9,
assembler::R10, assembler::R11, assembler::R12, assembler::R15 };
JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info,
std::unique_ptr<ICSlotRewrite> rewrite, int code_offset, int num_bytes_overlapping,
......@@ -149,6 +160,8 @@ JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic
code_block(code_block),
interp(0),
ic_info(std::move(ic_info)) {
allocatable_regs = bjit_allocatable_regs;
if (LOG_BJIT_ASSEMBLY)
comment("BJIT: JitFragmentWriter() start");
interp = createNewVar();
......@@ -1009,8 +1022,10 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, ExitInfo
exit_info.exit_start = assembler->curInstPointer();
block_next->getInReg(assembler::RAX, true);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12);
assembler->pop(assembler::R13);
assembler->pop(assembler::R14);
assembler->pop(assembler::R15);
assembler->retq();
// make sure we have at least 'min_patch_size' of bytes available.
......@@ -1042,8 +1057,10 @@ void JitFragmentWriter::_emitOSRPoint() {
assembler->clear_reg(assembler::RAX); // = next block to execute
assembler->mov(assembler::Immediate(ASTInterpreterJitInterface::osr_dummy_value), assembler::RDX);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12);
assembler->pop(assembler::R13);
assembler->pop(assembler::R14);
assembler->pop(assembler::R15);
assembler->retq();
}
interp->bumpUse();
......@@ -1145,8 +1162,10 @@ void JitFragmentWriter::_emitReturn(RewriterVar* return_val) {
return_val->getInReg(assembler::RDX, true);
assembler->clear_reg(assembler::RAX);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12);
assembler->pop(assembler::R13);
assembler->pop(assembler::R14);
assembler->pop(assembler::R15);
assembler->retq();
return_val->bumpUse();
}
......
......@@ -70,8 +70,9 @@ class JitFragmentWriter;
// register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance.
//
// We use the following callee-save regs to speed up the generated code:
// r13: pointer to ASTInterpreter instance
// r14: pointer to the vregs array
// r12, r15: temporary values
// r13: pointer to ASTInterpreter instance
// r14: pointer to the vregs array
//
// To execute a specific CFGBlock one has to call:
// CFGBlock* block;
......@@ -90,8 +91,10 @@ class JitFragmentWriter;
//
// Basic layout of generated code block is:
// entry_code:
// push %r15 ; save r15
// push %r14 ; save r14
// push %r13 ; save r13
// push %r12 ; save r12
// sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the
// stack + 8 byte for stack alignment
// mov %rdi,%r13 ; copy the pointer to ASTInterpreter instance into r13
......@@ -107,8 +110,10 @@ class JitFragmentWriter;
// jne end_side_exit
// movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg)
// add $0x118,%rsp ; restore stack pointer
// pop %r12 ; restore r12
// pop %r13 ; restore r13
// pop %r14 ; restore r14
// pop %r15 ; restore r15
// ret ; exit to the interpreter which will interpret the specified CFGBLock*
// end_side_exit:
// ....
......@@ -120,8 +125,10 @@ class JitFragmentWriter;
// in this case 0 which means we are finished
// movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return
// add $0x118,%rsp ; restore stack pointer
// pop %r12 ; restore r12
// pop %r13 ; restore r13
// pop %r14 ; restore r14
// pop %r15 ; restore r15
// ret
//
// nth_JitFragment:
......
......@@ -593,11 +593,7 @@ public:
assert(l.stack_second_offset % 8 == 0);
b = b_ptr[l.stack_second_offset / 8];
} else if (l.type == Location::Register) {
RELEASE_ASSERT(0, "untested");
// This branch should never get hit since we shouldn't generate Register locations,
// since we don't allow allocating callee-save registers.
// If we did, this code might be right:
// b = (Box*)get_cursor_reg(cursor, l.regnum);
b = (Box*)get_cursor_reg(cursor, l.regnum);
} else {
RELEASE_ASSERT(0, "not implemented");
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment