Commit 9686c587 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #664 from undingen/prep_bjit

Preperations for the new JIT tier
parents e9316820 53862fa5
......@@ -790,6 +790,8 @@ void Assembler::cmp(Indirect mem, Register reg) {
}
void Assembler::lea(Indirect mem, Register reg) {
RELEASE_ASSERT(mem.base != RSP && mem.base != R12, "We have to generate the SIB byte...");
int mem_idx = mem.base.regnum;
int reg_idx = reg.regnum;
......@@ -886,6 +888,23 @@ void Assembler::jmp(JumpDestination dest) {
}
}
void Assembler::jmp(Indirect dest) {
int reg_idx = dest.base.regnum;
assert(reg_idx >= 0 && reg_idx < 8 && "not yet implemented");
emitByte(0xFF);
if (dest.offset == 0) {
emitModRM(0b00, 0b100, reg_idx);
} else if (-0x80 <= dest.offset && dest.offset < 0x80) {
emitModRM(0b01, 0b100, reg_idx);
emitByte(dest.offset);
} else {
assert((-1L << 31) <= dest.offset && dest.offset < (1L << 31) - 1);
emitModRM(0b10, 0b100, reg_idx);
emitInt(dest.offset, 4);
}
}
void Assembler::jne(JumpDestination dest) {
jmp_cond(dest, COND_NOT_EQUAL);
}
......@@ -934,6 +953,10 @@ void Assembler::setne(Register reg) {
set_cond(reg, COND_NOT_EQUAL);
}
void Assembler::leave() {
emitByte(0xC9);
}
uint8_t* Assembler::emitCall(void* ptr, Register scratch) {
mov(Immediate(ptr), scratch);
callq(scratch);
......@@ -1002,5 +1025,19 @@ void Assembler::emitAnnotation(int num) {
cmp(RAX, Immediate(num));
nop();
}
ForwardJump::ForwardJump(Assembler& assembler, ConditionCode condition)
: assembler(assembler), condition(condition), jmp_inst(assembler.curInstPointer()) {
assembler.jmp_cond(JumpDestination::fromStart(assembler.bytesWritten() + max_jump_size), condition);
}
ForwardJump::~ForwardJump() {
uint8_t* new_pos = assembler.curInstPointer();
int offset = new_pos - jmp_inst;
RELEASE_ASSERT(offset < max_jump_size, "");
assembler.setCurInstPointer(jmp_inst);
assembler.jmp_cond(JumpDestination::fromStart(assembler.bytesWritten() + offset), condition);
assembler.setCurInstPointer(new_pos);
}
}
}
......@@ -154,6 +154,7 @@ public:
void callq(Register reg);
void retq();
void leave();
void cmp(Register reg1, Register reg2);
void cmp(Register reg, Immediate imm);
......@@ -166,6 +167,7 @@ public:
void jmp_cond(JumpDestination dest, ConditionCode condition);
void jmp(JumpDestination dest);
void jmp(Indirect dest);
void jmpq(Register dest);
void je(JumpDestination dest);
void jne(JumpDestination dest);
......@@ -185,9 +187,27 @@ public:
void fillWithNopsExcept(int bytes);
void emitAnnotation(int num);
int bytesWritten() { return addr - start_addr; }
int bytesLeft() const { return end_addr - addr; }
int bytesWritten() const { return addr - start_addr; }
uint8_t* curInstPointer() { return addr; }
bool isExactlyFull() { return addr == end_addr; }
void setCurInstPointer(uint8_t* ptr) { addr = ptr; }
bool isExactlyFull() const { return addr == end_addr; }
uint8_t* getStartAddr() { return start_addr; }
};
// This class helps generating a forward jump with a relative offset.
// It keeps track of the current assembler offset at construction time and in the destructor patches the
// generated conditional jump with the correct offset depending on the number of bytes emitted in between.
class ForwardJump {
private:
const int max_jump_size = 128;
Assembler& assembler;
ConditionCode condition;
uint8_t* jmp_inst;
public:
ForwardJump(Assembler& assembler, ConditionCode condition);
~ForwardJump();
};
uint8_t* initializePatchpoint2(uint8_t* start_addr, uint8_t* slowpath_start, uint8_t* end_addr, StackInfo stack_info,
......
......@@ -151,8 +151,8 @@ assembler::GenericRegister ICSlotRewrite::returnRegister() {
ICSlotRewrite* ICInfo::startRewrite(const char* debug_name) {
return new ICSlotRewrite(this, debug_name);
std::unique_ptr<ICSlotRewrite> ICInfo::startRewrite(const char* debug_name) {
return std::unique_ptr<ICSlotRewrite>(new ICSlotRewrite(this, debug_name));
}
ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
......@@ -236,11 +236,11 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
// writer->emitNop();
// writer->emitGuardFalse();
std::unique_ptr<Assembler> writer(new Assembler(start, ic->slot_size));
writer->nop();
// writer->trap();
// writer->jmp(JumpDestination::fromStart(ic->slot_size * (ic->num_slots - i)));
writer->jmp(JumpDestination::fromStart(slowpath_start_addr - start));
Assembler writer(start, ic->slot_size);
writer.nop();
// writer.trap();
// writer.jmp(JumpDestination::fromStart(ic->slot_size * (ic->num_slots - i)));
writer.jmp(JumpDestination::fromStart(slowpath_start_addr - start));
}
ICInfo* icinfo = new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->num_slots, ic->slot_size,
......@@ -272,10 +272,10 @@ void ICInfo::clear(ICSlotInfo* icentry) {
if (VERBOSITY() >= 4)
printf("clearing patchpoint %p, slot at %p\n", start_addr, start);
std::unique_ptr<Assembler> writer(new Assembler(start, getSlotSize()));
writer->nop();
writer->jmp(JumpDestination::fromStart(getSlotSize()));
assert(writer->bytesWritten() <= IC_INVALDITION_HEADER_SIZE);
Assembler writer(start, getSlotSize());
writer.nop();
writer.jmp(JumpDestination::fromStart(getSlotSize()));
assert(writer.bytesWritten() <= IC_INVALDITION_HEADER_SIZE);
// std::unique_ptr<MCWriter> writer(createMCWriter(start, getSlotSize(), 0));
// writer->emitNop();
......
......@@ -119,7 +119,7 @@ public:
llvm::CallingConv::ID getCallingConvention() { return calling_conv; }
const std::vector<int>& getLiveOuts() { return live_outs; }
ICSlotRewrite* startRewrite(const char* debug_name);
std::unique_ptr<ICSlotRewrite> startRewrite(const char* debug_name);
void clear(ICSlotInfo* entry);
bool shouldAttempt();
......
......@@ -135,10 +135,6 @@ void Location::dump() const {
RELEASE_ASSERT(0, "%d", type);
}
static bool isLargeConstant(int64_t val) {
return (val < (-1L << 31) || val >= (1L << 31) - 1);
}
Rewriter::ConstLoader::ConstLoader(Rewriter* rewriter) : rewriter(rewriter) {
}
......@@ -554,7 +550,7 @@ void RewriterVar::dump() {
}
assembler::Immediate RewriterVar::tryGetAsImmediate(bool* is_immediate) {
if (this->is_constant && !isLargeConstant(this->constant_value)) {
if (this->is_constant && !Rewriter::isLargeConstant(this->constant_value)) {
*is_immediate = true;
return assembler::Immediate(this->constant_value);
} else {
......@@ -568,7 +564,7 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_
#ifndef NDEBUG
if (!allow_constant_in_reg) {
assert(!is_constant || isLargeConstant(constant_value));
assert(!is_constant || Rewriter::isLargeConstant(constant_value));
}
#endif
......@@ -753,6 +749,18 @@ RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar*
return call(has_side_effects, func_addr, args, args_xmm);
}
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1,
RewriterVar* arg2, RewriterVar* arg3, RewriterVar* arg4) {
RewriterVar::SmallVector args;
RewriterVar::SmallVector args_xmm;
args.push_back(arg0);
args.push_back(arg1);
args.push_back(arg2);
args.push_back(arg3);
args.push_back(arg4);
return call(has_side_effects, func_addr, args, args_xmm);
}
static const Location caller_save_registers[]{
assembler::RAX, assembler::RCX, assembler::RDX, assembler::RSI, assembler::RDI,
assembler::R8, assembler::R9, assembler::R10, assembler::R11, assembler::XMM0,
......@@ -918,10 +926,14 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, void* func_addr
if (need_to_spill) {
if (check_reg.type == Location::Register) {
spillRegister(check_reg.asRegister());
if (failed)
return;
} else {
assert(check_reg.type == Location::XMMRegister);
assert(var->locations.size() == 1);
spillRegister(check_reg.asXMMRegister());
if (failed)
return;
}
} else {
removeLocationFromVar(var, check_reg);
......@@ -1358,7 +1370,8 @@ int Rewriter::_allocate(RewriterVar* result, int n) {
consec = 0;
}
}
RELEASE_ASSERT(0, "Using all %d bytes of scratch!", scratch_size);
failed = true;
return 0;
}
RewriterVar* Rewriter::allocateAndCopy(RewriterVar* array_ptr, int n) {
......@@ -1663,11 +1676,11 @@ TypeRecorder* Rewriter::getTypeRecorder() {
return rewrite->getTypeRecorder();
}
Rewriter::Rewriter(ICSlotRewrite* rewrite, int num_args, const std::vector<int>& live_outs)
: rewrite(rewrite),
assembler(rewrite->getAssembler()),
Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs)
: rewrite(std::move(rewrite)),
assembler(this->rewrite->getAssembler()),
const_loader(this),
return_location(rewrite->returnRegister()),
return_location(this->rewrite->returnRegister()),
failed(false),
added_changing_action(false),
marked_inside_ic(false),
......@@ -1675,9 +1688,6 @@ Rewriter::Rewriter(ICSlotRewrite* rewrite, int num_args, const std::vector<int>&
done_guarding(false) {
initPhaseCollecting();
#ifndef NDEBUG
start_vars = RewriterVar::nvars;
#endif
finished = false;
for (int i = 0; i < num_args; i++) {
......@@ -1823,10 +1833,6 @@ Rewriter* Rewriter::createRewriter(void* rtn_addr, int num_args, const char* deb
return new Rewriter(ic->startRewrite(debug_name), num_args, ic->getLiveOuts());
}
#ifndef NDEBUG
int RewriterVar::nvars = 0;
#endif
static const int INITIAL_CALL_SIZE = 13;
static const int DWARF_RBP_REGNUM = 6;
bool spillFrameArgumentIfNecessary(StackMap::Record::Location& l, uint8_t*& inst_addr, uint8_t* inst_end,
......
......@@ -266,22 +266,12 @@ private:
RewriterVar& operator=(const RewriterVar&) = delete;
public:
#ifndef NDEBUG
static int nvars;
#endif
RewriterVar(Rewriter* rewriter) : rewriter(rewriter), next_use(0), is_arg(false), is_constant(false) {
#ifndef NDEBUG
nvars++;
#endif
assert(rewriter);
}
#ifndef NDEBUG
~RewriterVar() { nvars--; }
#endif
friend class Rewriter;
friend class JitFragmentWriter;
};
class RewriterAction {
......@@ -297,7 +287,7 @@ enum class ActionType { NORMAL, GUARD, MUTATION };
#define LOCATION_PLACEHOLDER ((RewriterVar*)1)
class Rewriter : public ICSlotRewrite::CommitHook {
private:
protected:
// Helps generating the best code for loading a const integer value.
// By keeping track of the last known value of every register and reusing it.
class ConstLoader {
......@@ -335,7 +325,6 @@ private:
bool failed; // if we tried to generate an invalid rewrite.
bool finished; // committed or aborted
#ifndef NDEBUG
int start_vars;
bool phase_emitting;
void initPhaseCollecting() { phase_emitting = false; }
......@@ -355,7 +344,7 @@ private:
std::vector<RewriterVar*> args;
std::vector<RewriterVar*> live_outs;
Rewriter(ICSlotRewrite* rewrite, int num_args, const std::vector<int>& live_outs);
Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs);
std::vector<RewriterAction> actions;
void addAction(const std::function<void()>& action, std::vector<RewriterVar*> const& vars, ActionType type) {
......@@ -477,11 +466,6 @@ public:
for (RewriterVar* var : vars) {
delete var;
}
// This check isn't thread safe and should be fine to remove if it causes
// issues (along with the nvars/start_vars accounting)
ASSERT(threading::threadWasStarted() || RewriterVar::nvars == start_vars, "%d %d", RewriterVar::nvars,
start_vars);
}
Location getReturnDestination();
......@@ -507,6 +491,8 @@ public:
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2);
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2,
RewriterVar* arg3);
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2,
RewriterVar* arg3, RewriterVar* arg4);
RewriterVar* add(RewriterVar* a, int64_t b, Location dest);
// Allocates n pointer-sized stack slots:
RewriterVar* allocate(int n);
......@@ -521,6 +507,8 @@ public:
static Rewriter* createRewriter(void* rtn_addr, int num_args, const char* debug_name);
static bool isLargeConstant(int64_t val) { return (val < (-1L << 31) || val >= (1L << 31) - 1); }
friend class RewriterVar;
};
......
......@@ -64,7 +64,7 @@ namespace pyston {
// readelf -w test
//
#if RUNTIMEICS_OMIT_FRAME_PTR
// clang++ test.cpp -o test -O3 -fomit-frame-pointer -c -DN=40
// The generated assembly is:
//
......@@ -77,7 +77,7 @@ namespace pyston {
//
// (I believe the push/pop are for stack alignment)
//
static const char _eh_frame_template[] =
static const char _eh_frame_template_ofp[] =
// CIE
"\x14\x00\x00\x00" // size of the CIE
"\x00\x00\x00\x00" // specifies this is an CIE
......@@ -105,7 +105,7 @@ static const char _eh_frame_template[] =
"\x00\x00\x00\x00" // terminator
;
#else
// clang++ test.cpp -o test -O3 -fno-omit-frame-pointer -c -DN=40
// The generated assembly is:
// 0: 55 push %rbp
......@@ -118,7 +118,7 @@ static const char _eh_frame_template[] =
// 1a: 5d pop %rbp
// 1b: c3 retq
//
static const char _eh_frame_template[] =
static const char _eh_frame_template_fp[] =
// CIE
"\x14\x00\x00\x00" // size of the CIE
"\x00\x00\x00\x00" // specifies this is an CIE
......@@ -150,13 +150,19 @@ static const char _eh_frame_template[] =
"\x00\x00\x00\x00" // terminator
;
#endif
static constexpr int _eh_frame_template_ofp_size = sizeof(_eh_frame_template_ofp) - 1;
static constexpr int _eh_frame_template_fp_size = sizeof(_eh_frame_template_fp) - 1;
#define EH_FRAME_SIZE (sizeof(_eh_frame_template) - 1) // omit string-terminating null byte
static_assert(sizeof("") == 1, "strings are null-terminated");
static void writeTrivialEhFrame(void* eh_frame_addr, void* func_addr, uint64_t func_size) {
memcpy(eh_frame_addr, _eh_frame_template, EH_FRAME_SIZE);
static void writeTrivialEhFrame(void* eh_frame_addr, void* func_addr, uint64_t func_size, bool omit_frame_pointer) {
if (omit_frame_pointer)
memcpy(eh_frame_addr, _eh_frame_template_ofp, _eh_frame_template_ofp_size);
else
memcpy(eh_frame_addr, _eh_frame_template_fp, _eh_frame_template_fp_size);
int32_t* offset_ptr = (int32_t*)((uint8_t*)eh_frame_addr + 0x20);
int32_t* size_ptr = (int32_t*)((uint8_t*)eh_frame_addr + 0x24);
......@@ -171,27 +177,29 @@ static void writeTrivialEhFrame(void* eh_frame_addr, void* func_addr, uint64_t f
void EHFrameManager::writeAndRegister(void* func_addr, uint64_t func_size) {
assert(eh_frame_addr == NULL);
const int size = omit_frame_pointer ? _eh_frame_template_ofp_size : _eh_frame_template_fp_size;
#ifdef NVALGRIND
eh_frame_addr = malloc(EH_FRAME_SIZE);
eh_frame_addr = malloc(size);
#else
eh_frame_addr = mmap(NULL, (EH_FRAME_SIZE + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1), PROT_READ | PROT_WRITE,
eh_frame_addr = mmap(NULL, (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
RELEASE_ASSERT(eh_frame_addr != MAP_FAILED, "");
#endif
writeTrivialEhFrame(eh_frame_addr, func_addr, func_size);
writeTrivialEhFrame(eh_frame_addr, func_addr, func_size, omit_frame_pointer);
// (EH_FRAME_SIZE - 4) to omit the 4-byte null terminator, otherwise we trip an assert in parseEhFrame.
// TODO: can we omit the terminator in general?
registerDynamicEhFrame((uint64_t)func_addr, func_size, (uint64_t)eh_frame_addr, EH_FRAME_SIZE - 4);
registerEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, EH_FRAME_SIZE);
registerDynamicEhFrame((uint64_t)func_addr, func_size, (uint64_t)eh_frame_addr, size - 4);
registerEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, size);
}
EHFrameManager::~EHFrameManager() {
if (eh_frame_addr) {
deregisterEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, EH_FRAME_SIZE);
const int size = omit_frame_pointer ? _eh_frame_template_ofp_size : _eh_frame_template_fp_size;
deregisterEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, size);
#ifdef NVALGRIND
free(eh_frame_addr);
#else
munmap(eh_frame_addr, (EH_FRAME_SIZE + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1));
munmap(eh_frame_addr, (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1));
#endif
}
}
......@@ -204,7 +212,7 @@ EHFrameManager::~EHFrameManager() {
#define SCRATCH_BYTES 0x30
#endif
RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) {
RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(RUNTIMEICS_OMIT_FRAME_PTR) {
static StatCounter sc("runtime_ics_num");
sc.log();
......
......@@ -25,9 +25,10 @@ class ICInfo;
class EHFrameManager {
private:
void* eh_frame_addr;
bool omit_frame_pointer;
public:
EHFrameManager() : eh_frame_addr(NULL) {}
EHFrameManager(bool omit_frame_pointer) : eh_frame_addr(NULL), omit_frame_pointer(omit_frame_pointer) {}
~EHFrameManager();
void writeAndRegister(void* func_addr, uint64_t func_size);
};
......@@ -76,6 +77,29 @@ public:
}
};
class RuntimeCallIC : public RuntimeIC {
public:
RuntimeCallIC() : RuntimeIC((void*)runtimeCall, 2, 320) {}
Box* call(Box* obj, ArgPassSpec argspec, Box* arg1, Box* arg2, Box* arg3, Box** args) {
return (Box*)call_ptr(obj, argspec, arg1, arg2, arg3, args);
}
};
class UnaryopIC : public RuntimeIC {
public:
UnaryopIC() : RuntimeIC((void*)unaryop, 2, 160) {}
Box* call(Box* obj, int op_type) { return (Box*)call_ptr(obj, op_type); }
};
class AugBinopIC : public RuntimeIC {
public:
AugBinopIC() : RuntimeIC((void*)augbinop, 2, 240) {}
Box* call(Box* lhs, Box* rhs, int op_type) { return (Box*)call_ptr(lhs, rhs, op_type); }
};
class BinopIC : public RuntimeIC {
public:
BinopIC() : RuntimeIC((void*)binop, 2, 240) {}
......@@ -83,6 +107,55 @@ public:
Box* call(Box* lhs, Box* rhs, int op_type) { return (Box*)call_ptr(lhs, rhs, op_type); }
};
class CompareIC : public RuntimeIC {
public:
CompareIC() : RuntimeIC((void*)compare, 2, 240) {}
Box* call(Box* lhs, Box* rhs, int op_type) { return (Box*)call_ptr(lhs, rhs, op_type); }
};
class GetItemIC : public RuntimeIC {
public:
GetItemIC() : RuntimeIC((void*)getitem, 2, 512) {}
Box* call(Box* obj, Box* attr) { return (Box*)call_ptr(obj, attr); }
};
class SetItemIC : public RuntimeIC {
public:
SetItemIC() : RuntimeIC((void*)setitem, 2, 512) {}
Box* call(Box* obj, Box* attr, Box* v) { return (Box*)call_ptr(obj, attr, v); }
};
class GetAttrIC : public RuntimeIC {
public:
GetAttrIC() : RuntimeIC((void*)getattr, 2, 512) {}
Box* call(Box* obj, BoxedString* attr) { return (Box*)call_ptr(obj, attr); }
};
class SetAttrIC : public RuntimeIC {
public:
SetAttrIC() : RuntimeIC((void*)setattr, 2, 512) {}
Box* call(Box* obj, BoxedString* attr, Box* v) { return (Box*)call_ptr(obj, attr, v); }
};
class GetGlobalIC : public RuntimeIC {
public:
GetGlobalIC() : RuntimeIC((void*)getGlobal, 2, 512) {}
Box* call(Box* obj, BoxedString* s) { return (Box*)call_ptr(obj, s); }
};
class SetGlobalIC : public RuntimeIC {
public:
SetGlobalIC() : RuntimeIC((void*)setGlobal, 2, 512) {}
Box* call(Box* obj, BoxedString* s, Box* v) { return (Box*)call_ptr(obj, s, v); }
};
class NonzeroIC : public RuntimeIC {
public:
NonzeroIC() : RuntimeIC((void*)nonzero, 1, 40) {}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment