Commit 2e780773 authored by Marius Wachtler's avatar Marius Wachtler Committed by GitHub

Merge pull request #1290 from undingen/bjit_opt8

use r12, r15 and rbx in bjit and inside bjit ICs, remove some nonzero checks
parents 3489a0aa 5c0d56b3
...@@ -57,7 +57,41 @@ const int dwarf_to_gp[] = { ...@@ -57,7 +57,41 @@ const int dwarf_to_gp[] = {
Register Register::fromDwarf(int dwarf_regnum) { Register Register::fromDwarf(int dwarf_regnum) {
assert(dwarf_regnum >= 0 && dwarf_regnum <= 16); assert(dwarf_regnum >= 0 && dwarf_regnum <= 16);
return Register(dwarf_to_gp[dwarf_regnum]); Register reg(dwarf_to_gp[dwarf_regnum]);
assert(reg.getDwarfId() == dwarf_regnum);
return reg;
}
int Register::getDwarfId() const {
switch (regnum) {
case RAX.regnum:
return 0;
case RDX.regnum:
return 1;
case RCX.regnum:
return 2;
case RBX.regnum:
return 3;
case RSI.regnum:
return 4;
case RDI.regnum:
return 5;
case RBP.regnum:
return 6;
case RSP.regnum:
return 7;
case R8.regnum:
case R9.regnum:
case R10.regnum:
case R11.regnum:
case R12.regnum:
case R13.regnum:
case R14.regnum:
case R15.regnum:
return regnum;
default:
RELEASE_ASSERT(0, "not implemented");
};
} }
GenericRegister GenericRegister::fromDwarf(int dwarf_regnum) { GenericRegister GenericRegister::fromDwarf(int dwarf_regnum) {
...@@ -441,7 +475,7 @@ void Assembler::mov_generic(Indirect src, Register dest, MovType type) { ...@@ -441,7 +475,7 @@ void Assembler::mov_generic(Indirect src, Register dest, MovType type) {
bool needssib = (src_idx == 0b100); bool needssib = (src_idx == 0b100);
int mode; int mode;
if (src.offset == 0) if (src.offset == 0 && src.base != RBP)
mode = 0b00; mode = 0b00;
else if (-0x80 <= src.offset && src.offset < 0x80) else if (-0x80 <= src.offset && src.offset < 0x80)
mode = 0b01; mode = 0b01;
...@@ -526,13 +560,12 @@ void Assembler::movsd(Indirect src, XMMRegister dest) { ...@@ -526,13 +560,12 @@ void Assembler::movsd(Indirect src, XMMRegister dest) {
int dest_idx = dest.regnum; int dest_idx = dest.regnum;
if (src_idx >= 8) { if (src_idx >= 8) {
trap(); rex |= REX_B;
rex |= REX_R;
src_idx -= 8; src_idx -= 8;
} }
if (dest_idx >= 8) { if (dest_idx >= 8) {
trap(); trap();
rex |= REX_B; rex |= REX_R;
dest_idx -= 8; dest_idx -= 8;
} }
...@@ -545,7 +578,7 @@ void Assembler::movsd(Indirect src, XMMRegister dest) { ...@@ -545,7 +578,7 @@ void Assembler::movsd(Indirect src, XMMRegister dest) {
bool needssib = (src_idx == 0b100); bool needssib = (src_idx == 0b100);
int mode; int mode;
if (src.offset == 0) if (src.offset == 0 && src.base != RBP)
mode = 0b00; mode = 0b00;
else if (-0x80 <= src.offset && src.offset < 0x80) else if (-0x80 <= src.offset && src.offset < 0x80)
mode = 0b01; mode = 0b01;
...@@ -589,7 +622,7 @@ void Assembler::movss(Indirect src, XMMRegister dest) { ...@@ -589,7 +622,7 @@ void Assembler::movss(Indirect src, XMMRegister dest) {
bool needssib = (src_idx == 0b100); bool needssib = (src_idx == 0b100);
int mode; int mode;
if (src.offset == 0) if (src.offset == 0 && src.base != RBP)
mode = 0b00; mode = 0b00;
else if (-0x80 <= src.offset && src.offset < 0x80) else if (-0x80 <= src.offset && src.offset < 0x80)
mode = 0b01; mode = 0b01;
...@@ -695,7 +728,7 @@ void Assembler::incl(Indirect mem) { ...@@ -695,7 +728,7 @@ void Assembler::incl(Indirect mem) {
emitByte(0xff); emitByte(0xff);
assert(-0x80 <= mem.offset && mem.offset < 0x80); assert(-0x80 <= mem.offset && mem.offset < 0x80);
if (mem.offset == 0) { if (mem.offset == 0 && mem.base != RBP) {
emitModRM(0b00, 0, src_idx); emitModRM(0b00, 0, src_idx);
if (needssib) if (needssib)
emitSIB(0b00, 0b100, src_idx); emitSIB(0b00, 0b100, src_idx);
...@@ -723,7 +756,7 @@ void Assembler::decl(Indirect mem) { ...@@ -723,7 +756,7 @@ void Assembler::decl(Indirect mem) {
emitByte(0xff); emitByte(0xff);
assert(-0x80 <= mem.offset && mem.offset < 0x80); assert(-0x80 <= mem.offset && mem.offset < 0x80);
if (mem.offset == 0) { if (mem.offset == 0 && mem.base != RBP) {
emitModRM(0b00, 1, src_idx); emitModRM(0b00, 1, src_idx);
} else { } else {
emitModRM(0b01, 1, src_idx); emitModRM(0b01, 1, src_idx);
...@@ -762,7 +795,7 @@ void Assembler::incq(Indirect mem) { ...@@ -762,7 +795,7 @@ void Assembler::incq(Indirect mem) {
emitByte(0xff); emitByte(0xff);
assert(-0x80 <= mem.offset && mem.offset < 0x80); assert(-0x80 <= mem.offset && mem.offset < 0x80);
if (mem.offset == 0) { if (mem.offset == 0 && mem.base != RBP) {
emitModRM(0b00, 0, src_idx); emitModRM(0b00, 0, src_idx);
if (needssib) if (needssib)
emitSIB(0b00, 0b100, src_idx); emitSIB(0b00, 0b100, src_idx);
...@@ -790,7 +823,7 @@ void Assembler::decq(Indirect mem) { ...@@ -790,7 +823,7 @@ void Assembler::decq(Indirect mem) {
emitByte(0xff); emitByte(0xff);
assert(-0x80 <= mem.offset && mem.offset < 0x80); assert(-0x80 <= mem.offset && mem.offset < 0x80);
if (mem.offset == 0) { if (mem.offset == 0 && mem.base != RBP) {
emitModRM(0b00, 1, src_idx); emitModRM(0b00, 1, src_idx);
} else { } else {
emitModRM(0b01, 1, src_idx); emitModRM(0b01, 1, src_idx);
...@@ -844,7 +877,7 @@ void Assembler::callq(Indirect mem) { ...@@ -844,7 +877,7 @@ void Assembler::callq(Indirect mem) {
emitByte(0xff); emitByte(0xff);
assert(-0x80 <= mem.offset && mem.offset < 0x80); assert(-0x80 <= mem.offset && mem.offset < 0x80);
if (mem.offset == 0) { if (mem.offset == 0 && mem.base != RBP) {
emitModRM(0b00, 2, src_idx); emitModRM(0b00, 2, src_idx);
} else { } else {
emitModRM(0b01, 2, src_idx); emitModRM(0b01, 2, src_idx);
...@@ -903,7 +936,7 @@ void Assembler::cmp(Indirect mem, Immediate imm, MovType type) { ...@@ -903,7 +936,7 @@ void Assembler::cmp(Indirect mem, Immediate imm, MovType type) {
emitRex(rex); emitRex(rex);
emitByte(0x81); emitByte(0x81);
if (mem.offset == 0) { if (mem.offset == 0 && mem.base != RBP) {
emitModRM(0b00, 7, src_idx); emitModRM(0b00, 7, src_idx);
if (needssib) if (needssib)
emitSIB(0b00, 0b100, src_idx); emitSIB(0b00, 0b100, src_idx);
...@@ -940,17 +973,25 @@ void Assembler::cmp(Indirect mem, Register reg) { ...@@ -940,17 +973,25 @@ void Assembler::cmp(Indirect mem, Register reg) {
assert(mem_idx >= 0 && mem_idx < 8); assert(mem_idx >= 0 && mem_idx < 8);
assert(reg_idx >= 0 && reg_idx < 8); assert(reg_idx >= 0 && reg_idx < 8);
bool needssib = (mem_idx == 0b100);
emitRex(rex); emitRex(rex);
emitByte(0x3B); emitByte(0x3B);
if (mem.offset == 0) { if (mem.offset == 0 && mem.base != RBP) {
emitModRM(0b00, reg_idx, mem_idx); emitModRM(0b00, reg_idx, mem_idx);
if (needssib)
emitSIB(0b00, 0b100, mem_idx);
} else if (-0x80 <= mem.offset && mem.offset < 0x80) { } else if (-0x80 <= mem.offset && mem.offset < 0x80) {
emitModRM(0b01, reg_idx, mem_idx); emitModRM(0b01, reg_idx, mem_idx);
if (needssib)
emitSIB(0b00, 0b100, mem_idx);
emitByte(mem.offset); emitByte(mem.offset);
} else { } else {
assert(fitsInto<int32_t>(mem.offset)); assert(fitsInto<int32_t>(mem.offset));
emitModRM(0b10, reg_idx, mem_idx); emitModRM(0b10, reg_idx, mem_idx);
if (needssib)
emitSIB(0b00, 0b100, mem_idx);
emitInt(mem.offset, 4); emitInt(mem.offset, 4);
} }
} }
...@@ -1059,7 +1100,7 @@ void Assembler::jmp(Indirect dest) { ...@@ -1059,7 +1100,7 @@ void Assembler::jmp(Indirect dest) {
assert(reg_idx >= 0 && reg_idx < 8 && "not yet implemented"); assert(reg_idx >= 0 && reg_idx < 8 && "not yet implemented");
emitByte(0xFF); emitByte(0xFF);
if (dest.offset == 0) { if (dest.offset == 0 && dest.base != RBP) {
emitModRM(0b00, 0b100, reg_idx); emitModRM(0b00, 0b100, reg_idx);
} else if (-0x80 <= dest.offset && dest.offset < 0x80) { } else if (-0x80 <= dest.offset && dest.offset < 0x80) {
emitModRM(0b01, 0b100, reg_idx); emitModRM(0b01, 0b100, reg_idx);
......
...@@ -241,7 +241,7 @@ int ICInfo::calculateSuggestedSize() { ...@@ -241,7 +241,7 @@ int ICInfo::calculateSuggestedSize() {
if (!times_rewritten) if (!times_rewritten)
return slots[0].size; return slots[0].size;
int additional_space_per_slot = 30; int additional_space_per_slot = 50;
// if there are less rewrites than slots we can give a very accurate estimate // if there are less rewrites than slots we can give a very accurate estimate
if (times_rewritten < slots.size()) { if (times_rewritten < slots.size()) {
// add up the sizes of all used slots // add up the sizes of all used slots
...@@ -316,7 +316,8 @@ static llvm::DenseMap<void*, ICInfo*> ics_by_return_addr; ...@@ -316,7 +316,8 @@ static llvm::DenseMap<void*, ICInfo*> ics_by_return_addr;
ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size, ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size,
llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs, assembler::GenericRegister return_register, llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs, assembler::GenericRegister return_register,
TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations) TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations,
assembler::RegisterSet allocatable_registers)
: next_slot_to_try(0), : next_slot_to_try(0),
stack_info(stack_info), stack_info(stack_info),
calling_conv(calling_conv), calling_conv(calling_conv),
...@@ -326,6 +327,7 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S ...@@ -326,6 +327,7 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S
retry_in(0), retry_in(0),
retry_backoff(1), retry_backoff(1),
times_rewritten(0), times_rewritten(0),
allocatable_registers(allocatable_registers),
ic_global_decref_locations(std::move(ic_global_decref_locations)), ic_global_decref_locations(std::move(ic_global_decref_locations)),
start_addr(start_addr), start_addr(start_addr),
slowpath_rtn_addr(slowpath_rtn_addr), slowpath_rtn_addr(slowpath_rtn_addr),
...@@ -387,7 +389,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* ...@@ -387,7 +389,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
ICInfo* icinfo ICInfo* icinfo
= new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->size, ic->getCallingConvention(), = new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->size, ic->getCallingConvention(),
std::move(live_outs), return_register, ic->type_recorder, decref_info); std::move(live_outs), return_register, ic->type_recorder, decref_info, ic->allocatable_regs);
assert(!ics_by_return_addr.count(slowpath_rtn_addr)); assert(!ics_by_return_addr.count(slowpath_rtn_addr));
ics_by_return_addr[slowpath_rtn_addr] = icinfo; ics_by_return_addr[slowpath_rtn_addr] = icinfo;
......
...@@ -94,6 +94,7 @@ private: ...@@ -94,6 +94,7 @@ private:
TypeRecorder* const type_recorder; TypeRecorder* const type_recorder;
int retry_in, retry_backoff; int retry_in, retry_backoff;
int times_rewritten; int times_rewritten;
assembler::RegisterSet allocatable_registers;
DecrefInfo slowpath_decref_info; DecrefInfo slowpath_decref_info;
// This is a vector of locations which always need to get decrefed inside this IC. // This is a vector of locations which always need to get decrefed inside this IC.
...@@ -107,7 +108,8 @@ private: ...@@ -107,7 +108,8 @@ private:
public: public:
ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size, ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size,
llvm::CallingConv::ID calling_conv, LiveOutSet live_outs, assembler::GenericRegister return_register, llvm::CallingConv::ID calling_conv, LiveOutSet live_outs, assembler::GenericRegister return_register,
TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations); TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations,
assembler::RegisterSet allocatable_registers = assembler::RegisterSet::stdAllocatable());
~ICInfo(); ~ICInfo();
void* const start_addr, *const slowpath_rtn_addr, *const continue_addr; void* const start_addr, *const slowpath_rtn_addr, *const continue_addr;
...@@ -133,6 +135,8 @@ public: ...@@ -133,6 +135,8 @@ public:
int percentBackedoff() const { return retry_backoff; } int percentBackedoff() const { return retry_backoff; }
int timesRewritten() const { return times_rewritten; } int timesRewritten() const { return times_rewritten; }
assembler::RegisterSet getAllocatableRegs() const { return allocatable_registers; }
friend class ICSlotRewrite; friend class ICSlotRewrite;
static ICInfo* getICInfoForNode(AST* node); static ICInfo* getICInfoForNode(AST* node);
......
...@@ -646,10 +646,9 @@ void Rewriter::_cmp(RewriterVar* result, RewriterVar* v1, AST_TYPE::AST_TYPE cmp ...@@ -646,10 +646,9 @@ void Rewriter::_cmp(RewriterVar* result, RewriterVar* v1, AST_TYPE::AST_TYPE cmp
v1->bumpUseEarlyIfPossible(); v1->bumpUseEarlyIfPossible();
v2->bumpUseEarlyIfPossible(); v2->bumpUseEarlyIfPossible();
// sete and setne has special register requirements (can't use r8-r15) // sete and setne has special register requirements
const assembler::Register valid_registers[] = { auto set_inst_valid_registers = assembler::RAX | assembler::RBX | assembler::RCX | assembler::RDX;
assembler::RAX, assembler::RCX, assembler::RDX, assembler::RSI, assembler::RDI, auto valid_registers = set_inst_valid_registers & allocatable_regs;
};
assembler::Register newvar_reg = allocReg(dest, Location::any(), valid_registers); assembler::Register newvar_reg = allocReg(dest, Location::any(), valid_registers);
result->initializeInReg(newvar_reg); result->initializeInReg(newvar_reg);
assembler->cmp(v1_reg, v2_reg); assembler->cmp(v1_reg, v2_reg);
...@@ -849,6 +848,9 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_ ...@@ -849,6 +848,9 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_
Location l(*locations.begin()); Location l(*locations.begin());
assembler::Register reg = rewriter->allocReg(dest, otherThan); assembler::Register reg = rewriter->allocReg(dest, otherThan);
if (rewriter->failed)
return reg;
assert(rewriter->vars_by_location.count(reg) == 0); assert(rewriter->vars_by_location.count(reg) == 0);
if (l.type == Location::Scratch || l.type == Location::Stack) { if (l.type == Location::Scratch || l.type == Location::Stack) {
...@@ -1979,9 +1981,10 @@ void Rewriter::spillRegister(assembler::Register reg, Location preserve) { ...@@ -1979,9 +1981,10 @@ void Rewriter::spillRegister(assembler::Register reg, Location preserve) {
} }
// First, try to spill into a callee-save register: // First, try to spill into a callee-save register:
for (assembler::Register new_reg : allocatable_regs) { auto callee_save_allocatable_regs = allocatable_regs & assembler::RegisterSet::getCalleeSave();
if (!new_reg.isCalleeSave()) for (assembler::Register new_reg : callee_save_allocatable_regs) {
continue; assert(new_reg.isCalleeSave());
if (vars_by_location.count(new_reg)) if (vars_by_location.count(new_reg))
continue; continue;
if (Location(new_reg) == preserve) if (Location(new_reg) == preserve)
...@@ -2023,8 +2026,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) { ...@@ -2023,8 +2026,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) {
return allocReg(dest, otherThan, allocatable_regs); return allocReg(dest, otherThan, allocatable_regs);
} }
assembler::Register Rewriter::allocReg(Location dest, Location otherThan, assembler::Register Rewriter::allocReg(Location dest, Location otherThan, assembler::RegisterSet valid_registers) {
llvm::ArrayRef<assembler::Register> valid_registers) {
assertPhaseEmitting(); assertPhaseEmitting();
if (dest.type == Location::AnyReg) { if (dest.type == Location::AnyReg) {
...@@ -2063,7 +2065,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan, ...@@ -2063,7 +2065,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan,
assert(failed || vars_by_location.count(best_reg) == 0); assert(failed || vars_by_location.count(best_reg) == 0);
return best_reg; return best_reg;
} else if (dest.type == Location::Register) { } else if (dest.type == Location::Register) {
assert(std::find(valid_registers.begin(), valid_registers.end(), dest.asRegister()) != valid_registers.end()); assert(valid_registers.isInside(dest.asRegister()));
assembler::Register reg(dest.regnum); assembler::Register reg(dest.regnum);
if (vars_by_location.count(reg)) { if (vars_by_location.count(reg)) {
...@@ -2220,7 +2222,7 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const L ...@@ -2220,7 +2222,7 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const L
marked_inside_ic(false), marked_inside_ic(false),
done_guarding(false), done_guarding(false),
last_guard_action(-1), last_guard_action(-1),
allocatable_regs(std_allocatable_regs) { allocatable_regs(this->rewrite->getICInfo()->getAllocatableRegs()) {
initPhaseCollecting(); initPhaseCollecting();
finished = false; finished = false;
......
...@@ -516,8 +516,7 @@ protected: ...@@ -516,8 +516,7 @@ protected:
// Allocates a register. dest must be of type Register or AnyReg // Allocates a register. dest must be of type Register or AnyReg
// If otherThan is a register, guaranteed to not use that register. // If otherThan is a register, guaranteed to not use that register.
assembler::Register allocReg(Location dest, Location otherThan = Location::any()); assembler::Register allocReg(Location dest, Location otherThan = Location::any());
assembler::Register allocReg(Location dest, Location otherThan, assembler::Register allocReg(Location dest, Location otherThan, assembler::RegisterSet valid_registers);
llvm::ArrayRef<assembler::Register> valid_registers);
assembler::XMMRegister allocXMMReg(Location dest, Location otherThan = Location::any()); assembler::XMMRegister allocXMMReg(Location dest, Location otherThan = Location::any());
// Allocates an 8-byte region in the scratch space // Allocates an 8-byte region in the scratch space
Location allocScratch(); Location allocScratch();
...@@ -609,7 +608,7 @@ protected: ...@@ -609,7 +608,7 @@ protected:
#endif #endif
} }
llvm::ArrayRef<assembler::Register> allocatable_regs; assembler::RegisterSet allocatable_regs;
public: public:
// This should be called exactly once for each argument // This should be called exactly once for each argument
......
...@@ -35,6 +35,7 @@ namespace assembler { ...@@ -35,6 +35,7 @@ namespace assembler {
class Assembler; class Assembler;
struct RegisterSet;
struct Register { struct Register {
int regnum; int regnum;
...@@ -48,30 +49,84 @@ struct Register { ...@@ -48,30 +49,84 @@ struct Register {
void dump() const; void dump() const;
int getDwarfId() const;
static Register fromDwarf(int dwarf_regnum); static Register fromDwarf(int dwarf_regnum);
static constexpr int numRegs() { return 16; } static constexpr int numRegs() { return 16; }
constexpr RegisterSet operator|(Register b) const;
}; };
const Register RAX(0); constexpr Register RAX(0);
const Register RCX(1); constexpr Register RCX(1);
const Register RDX(2); constexpr Register RDX(2);
const Register RBX(3); constexpr Register RBX(3);
const Register RSP(4); constexpr Register RSP(4);
const Register RBP(5); constexpr Register RBP(5);
const Register RSI(6); constexpr Register RSI(6);
const Register RDI(7); constexpr Register RDI(7);
const Register R8(8); constexpr Register R8(8);
const Register R9(9); constexpr Register R9(9);
const Register R10(10); constexpr Register R10(10);
const Register R11(11); constexpr Register R11(11);
const Register R12(12); constexpr Register R12(12);
const Register R13(13); constexpr Register R13(13);
const Register R14(14); constexpr Register R14(14);
const Register R15(15); constexpr Register R15(15);
struct RegisterSet {
typedef unsigned int Regs;
Regs regs;
constexpr explicit RegisterSet(Regs regs) : regs(regs) {}
constexpr RegisterSet(Register reg) : regs(1ul << reg.regnum) {}
static constexpr RegisterSet getCalleeSave() { return RBX | RSP | RBP | R12 | R13 | R14 | R15; }
static constexpr RegisterSet stdAllocatable() { return RAX | RCX | RDX | RDI | RSI | R8 | R9 | R10 | R11; }
bool isInside(Register reg) const { return regs & (1ul << reg.regnum); }
bool empty() const { return regs == 0; }
class iterator {
public:
const RegisterSet& set;
int i;
iterator(const RegisterSet& set, int i) : set(set), i(i) {}
iterator& operator++() {
do {
i++;
} while (i < Register::numRegs() && !set.isInside(Register(i)));
if (i > Register::numRegs())
i = Register::numRegs();
return *this;
}
bool operator==(const iterator& rhs) const { return i == rhs.i; }
bool operator!=(const iterator& rhs) const { return !(*this == rhs); }
Register operator*() { return Register(i); }
};
iterator begin() const {
if (empty())
return end();
return iterator(*this, __builtin_ctz(regs));
}
iterator end() const { return iterator(*this, Register::numRegs()); }
constexpr RegisterSet operator|(RegisterSet b) const { return RegisterSet(regs | b.regs); }
constexpr RegisterSet operator&(RegisterSet b) const { return RegisterSet(regs & b.regs); }
void operator|=(RegisterSet b) { regs |= b.regs; }
void operator&=(RegisterSet b) { regs &= b.regs; }
};
constexpr RegisterSet Register::operator|(Register b) const {
return RegisterSet(*this) | RegisterSet(b);
}
inline bool Register::isCalleeSave() { inline bool Register::isCalleeSave() {
return *this == RBX || *this == RSP || *this == RBP || regnum >= 12; return RegisterSet::getCalleeSave().isInside(*this);
} }
struct Indirect { struct Indirect {
......
...@@ -128,7 +128,8 @@ private: ...@@ -128,7 +128,8 @@ private:
Value visit_langPrimitive(AST_LangPrimitive* node); Value visit_langPrimitive(AST_LangPrimitive* node);
// for doc on 'exit_offset' have a look at JitFragmentWriter::num_bytes_exit and num_bytes_overlapping // for doc on 'exit_offset' have a look at JitFragmentWriter::num_bytes_exit and num_bytes_overlapping
void startJITing(CFGBlock* block, int exit_offset = 0); void startJITing(CFGBlock* block, int exit_offset = 0,
llvm::DenseSet<int> known_non_null_vregs = llvm::DenseSet<int>());
void abortJITing(); void abortJITing();
void finishJITing(CFGBlock* continue_block = NULL); void finishJITing(CFGBlock* continue_block = NULL);
Box* execJITedBlock(CFGBlock* b); Box* execJITedBlock(CFGBlock* b);
...@@ -293,7 +294,7 @@ void ASTInterpreter::initArguments(BoxedClosure* _closure, BoxedGenerator* _gene ...@@ -293,7 +294,7 @@ void ASTInterpreter::initArguments(BoxedClosure* _closure, BoxedGenerator* _gene
assert(i == param_names.totalParameters()); assert(i == param_names.totalParameters());
} }
void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset) { void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset, llvm::DenseSet<int> known_non_null_vregs) {
assert(ENABLE_BASELINEJIT); assert(ENABLE_BASELINEJIT);
assert(!jit); assert(!jit);
...@@ -308,7 +309,18 @@ void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset) { ...@@ -308,7 +309,18 @@ void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset) {
exit_offset = 0; exit_offset = 0;
} }
jit = code_block->newFragment(block, exit_offset); // small optimization: we know that the passed arguments in the entry block are non zero
if (block == block->cfg->getStartingBlock() && block->predecessors.empty()) {
auto param_names = getMD()->param_names;
for (auto&& arg : param_names.arg_names) {
known_non_null_vregs.insert(arg->vreg);
}
if (param_names.vararg_name)
known_non_null_vregs.insert(param_names.vararg_name->vreg);
if (param_names.kwarg_name)
known_non_null_vregs.insert(param_names.kwarg_name->vreg);
}
jit = code_block->newFragment(block, exit_offset, std::move(known_non_null_vregs));
} }
void ASTInterpreter::abortJITing() { void ASTInterpreter::abortJITing() {
...@@ -323,10 +335,20 @@ void ASTInterpreter::abortJITing() { ...@@ -323,10 +335,20 @@ void ASTInterpreter::abortJITing() {
void ASTInterpreter::finishJITing(CFGBlock* continue_block) { void ASTInterpreter::finishJITing(CFGBlock* continue_block) {
if (!jit) if (!jit)
return; return;
int exit_offset = jit->finishCompilation();
int exit_offset = 0;
llvm::DenseSet<int> known_non_null;
std::tie(exit_offset, known_non_null) = jit->finishCompilation();
jit.reset(); jit.reset();
if (continue_block && !continue_block->code) if (continue_block && !continue_block->code) {
startJITing(continue_block, exit_offset); // check if we can reuse the known non null vreg set
if (continue_block->predecessors.size() == 1)
assert(current_block == continue_block->predecessors[0]);
else
known_non_null.clear();
startJITing(continue_block, exit_offset, std::move(known_non_null));
}
} }
Box* ASTInterpreter::execJITedBlock(CFGBlock* b) { Box* ASTInterpreter::execJITedBlock(CFGBlock* b) {
......
This diff is collapsed.
...@@ -73,9 +73,9 @@ class JitFragmentWriter; ...@@ -73,9 +73,9 @@ class JitFragmentWriter;
// register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance. // register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance.
// //
// We use the following callee-save regs to speed up the generated code: // We use the following callee-save regs to speed up the generated code:
// r12, r15: temporary values // rbx, rbp, r12, r15: temporary values
// r13: pointer to ASTInterpreter instance // r13 : pointer to ASTInterpreter instance
// r14: pointer to the vregs array // r14 : pointer to the vregs array
// //
// To execute a specific CFGBlock one has to call: // To execute a specific CFGBlock one has to call:
// CFGBlock* block; // CFGBlock* block;
...@@ -94,10 +94,12 @@ class JitFragmentWriter; ...@@ -94,10 +94,12 @@ class JitFragmentWriter;
// //
// Basic layout of generated code block is: // Basic layout of generated code block is:
// entry_code: // entry_code:
// push %rbp ; save rbp
// push %r15 ; save r15 // push %r15 ; save r15
// push %r14 ; save r14 // push %r14 ; save r14
// push %r13 ; save r13 // push %r13 ; save r13
// push %r12 ; save r12 // push %r12 ; save r12
// push %rbx ; save rbx
// sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the // sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the
// stack + 8 byte for stack alignment // stack + 8 byte for stack alignment
// mov %rdi,%r13 ; copy the pointer to ASTInterpreter instance into r13 // mov %rdi,%r13 ; copy the pointer to ASTInterpreter instance into r13
...@@ -113,10 +115,12 @@ class JitFragmentWriter; ...@@ -113,10 +115,12 @@ class JitFragmentWriter;
// jne end_side_exit // jne end_side_exit
// movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg) // movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg)
// add $0x118,%rsp ; restore stack pointer // add $0x118,%rsp ; restore stack pointer
// pop %rbx ; restore rbx
// pop %r12 ; restore r12 // pop %r12 ; restore r12
// pop %r13 ; restore r13 // pop %r13 ; restore r13
// pop %r14 ; restore r14 // pop %r14 ; restore r14
// pop %r15 ; restore r15 // pop %r15 ; restore r15
// pop %rbp ; restore rbp
// ret ; exit to the interpreter which will interpret the specified CFGBLock* // ret ; exit to the interpreter which will interpret the specified CFGBLock*
// end_side_exit: // end_side_exit:
// .... // ....
...@@ -128,10 +132,12 @@ class JitFragmentWriter; ...@@ -128,10 +132,12 @@ class JitFragmentWriter;
// in this case 0 which means we are finished // in this case 0 which means we are finished
// movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return // movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return
// add $0x118,%rsp ; restore stack pointer // add $0x118,%rsp ; restore stack pointer
// pop %rbx ; restore rbx
// pop %r12 ; restore r12 // pop %r12 ; restore r12
// pop %r13 ; restore r13 // pop %r13 ; restore r13
// pop %r14 ; restore r14 // pop %r14 ; restore r14
// pop %r15 ; restore r15 // pop %r15 ; restore r15
// pop %rbp ; restore rbp
// ret // ret
// //
// nth_JitFragment: // nth_JitFragment:
...@@ -148,6 +154,8 @@ public: ...@@ -148,6 +154,8 @@ public:
// scratch size + space for passing additional args on the stack without having to adjust the SP when calling // scratch size + space for passing additional args on the stack without having to adjust the SP when calling
// functions with more than 6 args. // functions with more than 6 args.
static constexpr int sp_adjustment = scratch_size + num_stack_args * 8 + 8 /* = alignment */; static constexpr int sp_adjustment = scratch_size + num_stack_args * 8 + 8 /* = alignment */;
static constexpr assembler::RegisterSet additional_regs = assembler::RBX | assembler::RBP | assembler::R12
| assembler::R15;
private: private:
struct MemoryManager { struct MemoryManager {
...@@ -173,7 +181,8 @@ private: ...@@ -173,7 +181,8 @@ private:
public: public:
JitCodeBlock(llvm::StringRef name); JitCodeBlock(llvm::StringRef name);
std::unique_ptr<JitFragmentWriter> newFragment(CFGBlock* block, int patch_jump_offset = 0); std::unique_ptr<JitFragmentWriter> newFragment(CFGBlock* block, int patch_jump_offset,
llvm::DenseSet<int> known_non_null_vregs);
bool shouldCreateNewBlock() const { return asm_failed || a.bytesLeft() < 128; } bool shouldCreateNewBlock() const { return asm_failed || a.bytesLeft() < 128; }
void fragmentAbort(bool not_enough_space); void fragmentAbort(bool not_enough_space);
void fragmentFinished(int bytes_witten, int num_bytes_overlapping, void* next_fragment_start, ICInfo& ic_info); void fragmentFinished(int bytes_witten, int num_bytes_overlapping, void* next_fragment_start, ICInfo& ic_info);
...@@ -208,9 +217,9 @@ private: ...@@ -208,9 +217,9 @@ private:
RewriterVar* vregs_array; RewriterVar* vregs_array;
llvm::DenseMap<InternedString, RewriterVar*> local_syms; llvm::DenseMap<InternedString, RewriterVar*> local_syms;
// keeps track which non block local vregs are known to have a non NULL value // keeps track which non block local vregs are known to have a non NULL value
// TODO: in the future we could reuse this information between different basic blocks
llvm::DenseSet<int> known_non_null_vregs; llvm::DenseSet<int> known_non_null_vregs;
std::unique_ptr<ICInfo> ic_info; std::unique_ptr<ICInfo> ic_info;
llvm::SmallPtrSet<RewriterVar*, 4> var_is_a_python_bool;
// Optional points to a CFGBlock and a patch location which should get patched to a direct jump if // Optional points to a CFGBlock and a patch location which should get patched to a direct jump if
// the specified block gets JITed. The patch location is guaranteed to be at least 'min_patch_size' bytes long. // the specified block gets JITed. The patch location is guaranteed to be at least 'min_patch_size' bytes long.
...@@ -232,7 +241,8 @@ private: ...@@ -232,7 +241,8 @@ private:
public: public:
JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info, std::unique_ptr<ICSlotRewrite> rewrite, JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info, std::unique_ptr<ICSlotRewrite> rewrite,
int code_offset, int num_bytes_overlapping, void* entry_code, JitCodeBlock& code_block); int code_offset, int num_bytes_overlapping, void* entry_code, JitCodeBlock& code_block,
llvm::DenseSet<int> known_non_null_vregs);
RewriterVar* getInterp(); RewriterVar* getInterp();
RewriterVar* imm(uint64_t val); RewriterVar* imm(uint64_t val);
...@@ -302,7 +312,8 @@ public: ...@@ -302,7 +312,8 @@ public:
void emitUncacheExcInfo(); void emitUncacheExcInfo();
void abortCompilation(); void abortCompilation();
int finishCompilation(); // returns pair of the number of bytes for the overwriteable jump and known non null vregs at end of current block
std::pair<int, llvm::DenseSet<int>> finishCompilation();
bool finishAssembly(int continue_offset, bool& should_fill_with_nops, bool& variable_size_slots) override; bool finishAssembly(int continue_offset, bool& should_fill_with_nops, bool& variable_size_slots) override;
...@@ -333,7 +344,7 @@ private: ...@@ -333,7 +344,7 @@ private:
static Box* createSetHelper(uint64_t num, Box** data); static Box* createSetHelper(uint64_t num, Box** data);
static Box* createTupleHelper(uint64_t num, Box** data); static Box* createTupleHelper(uint64_t num, Box** data);
static Box* exceptionMatchesHelper(Box* obj, Box* cls); static Box* exceptionMatchesHelper(Box* obj, Box* cls);
static Box* hasnextHelper(Box* b); static BORROWED(Box*) hasnextHelper(Box* b);
static BORROWED(Box*) nonzeroHelper(Box* b); static BORROWED(Box*) nonzeroHelper(Box* b);
static BORROWED(Box*) notHelper(Box* b); static BORROWED(Box*) notHelper(Box* b);
static Box* runtimeCallHelper(Box* obj, ArgPassSpec argspec, TypeRecorder* type_recorder, Box** args, static Box* runtimeCallHelper(Box* obj, ArgPassSpec argspec, TypeRecorder* type_recorder, Box** args,
......
...@@ -44,8 +44,10 @@ int ICSetupInfo::totalSize() const { ...@@ -44,8 +44,10 @@ int ICSetupInfo::totalSize() const {
static std::vector<std::pair<PatchpointInfo*, void* /* addr of func to call */>> new_patchpoints; static std::vector<std::pair<PatchpointInfo*, void* /* addr of func to call */>> new_patchpoints;
ICSetupInfo* ICSetupInfo::initialize(bool has_return_value, int size, ICType type, TypeRecorder* type_recorder) { ICSetupInfo* ICSetupInfo::initialize(bool has_return_value, int size, ICType type, TypeRecorder* type_recorder,
ICSetupInfo* rtn = new ICSetupInfo(type, size, has_return_value, type_recorder); assembler::RegisterSet allocatable_regs) {
ICSetupInfo* rtn = new ICSetupInfo(type, size, has_return_value, type_recorder, allocatable_regs);
// We use size == CALL_ONLY_SIZE to imply that the call isn't patchable // We use size == CALL_ONLY_SIZE to imply that the call isn't patchable
assert(rtn->totalSize() > CALL_ONLY_SIZE); assert(rtn->totalSize() > CALL_ONLY_SIZE);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallingConv.h"
#include "asm_writing/types.h"
#include "codegen/stackmaps.h" #include "codegen/stackmaps.h"
#include "core/common.h" #include "core/common.h"
...@@ -64,14 +65,20 @@ public: ...@@ -64,14 +65,20 @@ public:
}; };
private: private:
ICSetupInfo(ICType type, int size, bool has_return_value, TypeRecorder* type_recorder) ICSetupInfo(ICType type, int size, bool has_return_value, TypeRecorder* type_recorder,
: type(type), size(size), has_return_value(has_return_value), type_recorder(type_recorder) {} assembler::RegisterSet allocatable_regs)
: type(type),
size(size),
has_return_value(has_return_value),
type_recorder(type_recorder),
allocatable_regs(allocatable_regs) {}
public: public:
const ICType type; const ICType type;
const int size; const int size;
const bool has_return_value; const bool has_return_value;
TypeRecorder* const type_recorder; TypeRecorder* const type_recorder;
assembler::RegisterSet allocatable_regs;
int totalSize() const; int totalSize() const;
bool hasReturnValue() const { return has_return_value; } bool hasReturnValue() const { return has_return_value; }
...@@ -90,7 +97,8 @@ public: ...@@ -90,7 +97,8 @@ public:
return llvm::CallingConv::C; return llvm::CallingConv::C;
} }
static ICSetupInfo* initialize(bool has_return_value, int size, ICType type, TypeRecorder* type_recorder); static ICSetupInfo* initialize(bool has_return_value, int size, ICType type, TypeRecorder* type_recorder,
assembler::RegisterSet allocatable_regs = assembler::RegisterSet::stdAllocatable());
}; };
struct PatchpointInfo { struct PatchpointInfo {
......
...@@ -574,7 +574,7 @@ public: ...@@ -574,7 +574,7 @@ public:
assert(l.stack_second_offset % 8 == 0); assert(l.stack_second_offset % 8 == 0);
b = b_ptr[l.stack_second_offset / 8]; b = b_ptr[l.stack_second_offset / 8];
} else if (l.type == Location::Register) { } else if (l.type == Location::Register) {
b = (Box*)get_cursor_reg(cursor, l.regnum); b = (Box*)get_cursor_reg(cursor, l.asRegister().getDwarfId());
} else { } else {
RELEASE_ASSERT(0, "not implemented"); RELEASE_ASSERT(0, "not implemented");
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment