Commit 9ef4e561 authored by Russ Cox's avatar Russ Cox

[dev.garbage] all: merge dev.power64 (7667e41f3ced) into dev.garbage

Now the only difference between dev.cc and dev.garbage
is the runtime conversion on the one side and the
garbage collection on the other. They both have the
same set of changes from default and dev.power64.

LGTM=austin
R=austin
CC=golang-codereviews
https://golang.org/cl/172570043
parents 3dcc62e1 743bdf61
...@@ -54,7 +54,7 @@ struct Addr ...@@ -54,7 +54,7 @@ struct Addr
{ {
char sval[8]; char sval[8];
float64 dval; float64 dval;
Prog* branch; // for 5g, 6g, 8g Prog* branch; // for 5g, 6g, 8g, 9g
} u; } u;
LSym* sym; LSym* sym;
...@@ -89,10 +89,13 @@ struct Prog ...@@ -89,10 +89,13 @@ struct Prog
int32 lineno; int32 lineno;
Prog* link; Prog* link;
short as; short as;
uchar reg; // arm, power64 only
uchar scond; // arm only uchar scond; // arm only
// operands
Addr from; Addr from;
Addr from3; // power64 only, fma and rlwm uchar reg; // arm, power64 only (e.g., ADD from, reg, to);
// also used for ADATA width on arm, power64
Addr from3; // power64 only (e.g., RLWM/FMADD from, reg, from3, to)
Addr to; Addr to;
// for 5g, 6g, 8g internal use // for 5g, 6g, 8g internal use
......
...@@ -86,7 +86,7 @@ datagostring(Strlit *sval, Addr *a) ...@@ -86,7 +86,7 @@ datagostring(Strlit *sval, Addr *a)
sym = stringsym(sval->s, sval->len); sym = stringsym(sval->s, sval->len);
a->type = D_OREG; a->type = D_OREG;
a->name = D_EXTERN; a->name = D_EXTERN;
a->etype = TINT32; a->etype = TSTRING;
a->offset = 0; // header a->offset = 0; // header
a->reg = NREG; a->reg = NREG;
a->sym = linksym(sym); a->sym = linksym(sym);
......
...@@ -1353,9 +1353,10 @@ naddr(Node *n, Addr *a, int canemitcode) ...@@ -1353,9 +1353,10 @@ naddr(Node *n, Addr *a, int canemitcode)
case OITAB: case OITAB:
// itable of interface value // itable of interface value
naddr(n->left, a, canemitcode); naddr(n->left, a, canemitcode);
a->etype = TINT32; a->etype = simtype[tptr];
if(a->type == D_CONST && a->offset == 0) if(a->type == D_CONST && a->offset == 0)
break; // len(nil) break; // len(nil)
a->width = widthptr;
break; break;
case OSPTR: case OSPTR:
......
...@@ -63,8 +63,8 @@ enum ...@@ -63,8 +63,8 @@ enum
uint32 BLOAD(Reg*); uint32 BLOAD(Reg*);
uint32 BSTORE(Reg*); uint32 BSTORE(Reg*);
uint32 LOAD(Reg*); uint64 LOAD(Reg*);
uint32 STORE(Reg*); uint64 STORE(Reg*);
*/ */
// A Reg is a wrapper around a single Prog (one instruction) that holds // A Reg is a wrapper around a single Prog (one instruction) that holds
...@@ -145,7 +145,7 @@ void synch(Reg*, Bits); ...@@ -145,7 +145,7 @@ void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*); uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int); void paint1(Reg*, int);
uint32 paint2(Reg*, int); uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int); void paint3(Reg*, int, uint32, int);
void addreg(Adr*, int); void addreg(Adr*, int);
void dumpit(char *str, Flow *r0, int); void dumpit(char *str, Flow *r0, int);
...@@ -156,10 +156,10 @@ void peep(Prog*); ...@@ -156,10 +156,10 @@ void peep(Prog*);
void excise(Flow*); void excise(Flow*);
int copyu(Prog*, Adr*, Adr*); int copyu(Prog*, Adr*, Adr*);
int32 RtoB(int); uint32 RtoB(int);
int32 FtoB(int); uint32 FtoB(int);
int BtoR(int32); int BtoR(uint32);
int BtoF(int32); int BtoF(uint32);
/* /*
* prog.c * prog.c
......
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
#include "opt.h" #include "opt.h"
#define NREGVAR 32 #define NREGVAR 32
#define REGBITS ((uint32)0xffffffff) #define REGBITS ((uint64)0xffffffffull)
/*c2go enum { /*c2go enum {
NREGVAR = 32, NREGVAR = 32,
REGBITS = 0xffffffff, REGBITS = 0xffffffff,
...@@ -86,7 +86,7 @@ setaddrs(Bits bit) ...@@ -86,7 +86,7 @@ setaddrs(Bits bit)
i = bnum(bit); i = bnum(bit);
node = var[i].node; node = var[i].node;
n = var[i].name; n = var[i].name;
bit.b[i/32] &= ~(1L<<(i%32)); biclr(&bit, i);
// disable all pieces of that variable // disable all pieces of that variable
for(i=0; i<nvar; i++) { for(i=0; i<nvar; i++) {
...@@ -393,7 +393,7 @@ loop2: ...@@ -393,7 +393,7 @@ loop2:
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
if(bany(&bit) & !r->f.refset) { if(bany(&bit) && !r->f.refset) {
// should never happen - all variables are preset // should never happen - all variables are preset
if(debug['w']) if(debug['w'])
print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
...@@ -425,7 +425,7 @@ loop2: ...@@ -425,7 +425,7 @@ loop2:
if(debug['R'] > 1) if(debug['R'] > 1)
print("\n"); print("\n");
paint1(r, i); paint1(r, i);
bit.b[i/32] &= ~(1L<<(i%32)); biclr(&bit, i);
if(change <= 0) { if(change <= 0) {
if(debug['R']) if(debug['R'])
print("%L $%d: %Q\n", print("%L $%d: %Q\n",
...@@ -570,7 +570,7 @@ walkvardef(Node *n, Reg *r, int active) ...@@ -570,7 +570,7 @@ walkvardef(Node *n, Reg *r, int active)
break; break;
for(v=n->opt; v!=nil; v=v->nextinnode) { for(v=n->opt; v!=nil; v=v->nextinnode) {
bn = v - var; bn = v - var;
r1->act.b[bn/32] |= 1L << (bn%32); biset(&r1->act, bn);
} }
if(r1->f.prog->as == ABL) if(r1->f.prog->as == ABL)
break; break;
...@@ -606,7 +606,7 @@ addsplits(void) ...@@ -606,7 +606,7 @@ addsplits(void)
~(r->calahead.b[z] & addrs.b[z]); ~(r->calahead.b[z] & addrs.b[z]);
while(bany(&bit)) { while(bany(&bit)) {
i = bnum(bit); i = bnum(bit);
bit.b[i/32] &= ~(1L << (i%32)); biclr(&bit, i);
} }
} }
} }
...@@ -972,10 +972,10 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -972,10 +972,10 @@ prop(Reg *r, Bits ref, Bits cal)
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
if(cal.b[z] == 0) if(cal.b[z] == 0)
continue; continue;
for(i=0; i<32; i++) { for(i=0; i<64; i++) {
if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
continue; continue;
v = var+z*32+i; v = var+z*64+i;
if(v->node->opt == nil) // v represents fixed register, not Go variable if(v->node->opt == nil) // v represents fixed register, not Go variable
continue; continue;
...@@ -991,10 +991,10 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -991,10 +991,10 @@ prop(Reg *r, Bits ref, Bits cal)
// This will set the bits at most twice, keeping the overall loop linear. // This will set the bits at most twice, keeping the overall loop linear.
v1 = v->node->opt; v1 = v->node->opt;
j = v1 - var; j = v1 - var;
if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { if(v == v1 || !btest(&cal, j)) {
for(; v1 != nil; v1 = v1->nextinnode) { for(; v1 != nil; v1 = v1->nextinnode) {
j = v1 - var; j = v1 - var;
cal.b[j/32] |= 1<<(j&31); biset(&cal, j);
} }
} }
} }
...@@ -1115,10 +1115,10 @@ paint1(Reg *r, int bn) ...@@ -1115,10 +1115,10 @@ paint1(Reg *r, int bn)
Reg *r1; Reg *r1;
Prog *p; Prog *p;
int z; int z;
uint32 bb; uint64 bb;
z = bn/32; z = bn/64;
bb = 1L<<(bn%32); bb = 1LL<<(bn%64);
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
return; return;
for(;;) { for(;;) {
...@@ -1193,10 +1193,10 @@ paint2(Reg *r, int bn) ...@@ -1193,10 +1193,10 @@ paint2(Reg *r, int bn)
{ {
Reg *r1; Reg *r1;
int z; int z;
uint32 bb, vreg; uint64 bb, vreg;
z = bn/32; z = bn/64;
bb = 1L << (bn%32); bb = 1LL << (bn%64);
vreg = regbits; vreg = regbits;
if(!(r->act.b[z] & bb)) if(!(r->act.b[z] & bb))
return vreg; return vreg;
...@@ -1240,15 +1240,15 @@ paint2(Reg *r, int bn) ...@@ -1240,15 +1240,15 @@ paint2(Reg *r, int bn)
} }
void void
paint3(Reg *r, int bn, int32 rb, int rn) paint3(Reg *r, int bn, uint32 rb, int rn)
{ {
Reg *r1; Reg *r1;
Prog *p; Prog *p;
int z; int z;
uint32 bb; uint64 bb;
z = bn/32; z = bn/64;
bb = 1L << (bn%32); bb = 1LL << (bn%64);
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
return; return;
for(;;) { for(;;) {
...@@ -1333,7 +1333,7 @@ addreg(Adr *a, int rn) ...@@ -1333,7 +1333,7 @@ addreg(Adr *a, int rn)
* 10 R10 * 10 R10
* 12 R12 * 12 R12
*/ */
int32 uint32
RtoB(int r) RtoB(int r)
{ {
if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12 if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12
...@@ -1342,7 +1342,7 @@ RtoB(int r) ...@@ -1342,7 +1342,7 @@ RtoB(int r)
} }
int int
BtoR(int32 b) BtoR(uint32 b)
{ {
b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12
if(b == 0) if(b == 0)
...@@ -1357,7 +1357,7 @@ BtoR(int32 b) ...@@ -1357,7 +1357,7 @@ BtoR(int32 b)
* ... ... * ... ...
* 31 F15 * 31 F15
*/ */
int32 uint32
FtoB(int f) FtoB(int f)
{ {
...@@ -1367,7 +1367,7 @@ FtoB(int f) ...@@ -1367,7 +1367,7 @@ FtoB(int f)
} }
int int
BtoF(int32 b) BtoF(uint32 b)
{ {
b &= 0xfffc0000L; b &= 0xfffc0000L;
......
...@@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a) ...@@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a)
a->sym = linksym(sym); a->sym = linksym(sym);
a->node = sym->def; a->node = sym->def;
a->offset = 0; // header a->offset = 0; // header
a->etype = TINT32; a->etype = TSTRING;
} }
void void
......
...@@ -63,8 +63,8 @@ enum ...@@ -63,8 +63,8 @@ enum
uint32 BLOAD(Reg*); uint32 BLOAD(Reg*);
uint32 BSTORE(Reg*); uint32 BSTORE(Reg*);
uint32 LOAD(Reg*); uint64 LOAD(Reg*);
uint32 STORE(Reg*); uint64 STORE(Reg*);
*/ */
// A Reg is a wrapper around a single Prog (one instruction) that holds // A Reg is a wrapper around a single Prog (one instruction) that holds
...@@ -141,7 +141,7 @@ void synch(Reg*, Bits); ...@@ -141,7 +141,7 @@ void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*); uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int); void paint1(Reg*, int);
uint32 paint2(Reg*, int); uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int); void paint3(Reg*, int, uint32, int);
void addreg(Adr*, int); void addreg(Adr*, int);
void dumpone(Flow*, int); void dumpone(Flow*, int);
void dumpit(char*, Flow*, int); void dumpit(char*, Flow*, int);
...@@ -153,10 +153,10 @@ void peep(Prog*); ...@@ -153,10 +153,10 @@ void peep(Prog*);
void excise(Flow*); void excise(Flow*);
int copyu(Prog*, Adr*, Adr*); int copyu(Prog*, Adr*, Adr*);
int32 RtoB(int); uint32 RtoB(int);
int32 FtoB(int); uint32 FtoB(int);
int BtoR(int32); int BtoR(uint32);
int BtoF(int32); int BtoF(uint32);
/* /*
* prog.c * prog.c
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#include "opt.h" #include "opt.h"
#define NREGVAR 32 /* 16 general + 16 floating */ #define NREGVAR 32 /* 16 general + 16 floating */
#define REGBITS ((uint32)0xffffffff) #define REGBITS ((uint64)0xffffffffull)
/*c2go enum { /*c2go enum {
NREGVAR = 32, NREGVAR = 32,
REGBITS = 0xffffffff, REGBITS = 0xffffffff,
...@@ -71,7 +71,7 @@ setaddrs(Bits bit) ...@@ -71,7 +71,7 @@ setaddrs(Bits bit)
i = bnum(bit); i = bnum(bit);
node = var[i].node; node = var[i].node;
n = var[i].name; n = var[i].name;
bit.b[i/32] &= ~(1L<<(i%32)); biclr(&bit, i);
// disable all pieces of that variable // disable all pieces of that variable
for(i=0; i<nvar; i++) { for(i=0; i<nvar; i++) {
...@@ -364,7 +364,7 @@ loop2: ...@@ -364,7 +364,7 @@ loop2:
rgp->varno = i; rgp->varno = i;
change = 0; change = 0;
paint1(r, i); paint1(r, i);
bit.b[i/32] &= ~(1L<<(i%32)); biclr(&bit, i);
if(change <= 0) if(change <= 0)
continue; continue;
rgp->cost = change; rgp->cost = change;
...@@ -477,7 +477,7 @@ walkvardef(Node *n, Reg *r, int active) ...@@ -477,7 +477,7 @@ walkvardef(Node *n, Reg *r, int active)
break; break;
for(v=n->opt; v!=nil; v=v->nextinnode) { for(v=n->opt; v!=nil; v=v->nextinnode) {
bn = v - var; bn = v - var;
r1->act.b[bn/32] |= 1L << (bn%32); biset(&r1->act, bn);
} }
if(r1->f.prog->as == ACALL) if(r1->f.prog->as == ACALL)
break; break;
...@@ -621,6 +621,9 @@ mkvar(Reg *r, Adr *a) ...@@ -621,6 +621,9 @@ mkvar(Reg *r, Adr *a)
if(r != R) if(r != R)
r->use1.b[0] |= doregbits(a->index); r->use1.b[0] |= doregbits(a->index);
if(t >= D_INDIR && t < 2*D_INDIR)
goto none;
switch(t) { switch(t) {
default: default:
regu = doregbits(t); regu = doregbits(t);
...@@ -822,10 +825,10 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -822,10 +825,10 @@ prop(Reg *r, Bits ref, Bits cal)
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
if(cal.b[z] == 0) if(cal.b[z] == 0)
continue; continue;
for(i=0; i<32; i++) { for(i=0; i<64; i++) {
if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
continue; continue;
v = var+z*32+i; v = var+z*64+i;
if(v->node->opt == nil) // v represents fixed register, not Go variable if(v->node->opt == nil) // v represents fixed register, not Go variable
continue; continue;
...@@ -841,10 +844,10 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -841,10 +844,10 @@ prop(Reg *r, Bits ref, Bits cal)
// This will set the bits at most twice, keeping the overall loop linear. // This will set the bits at most twice, keeping the overall loop linear.
v1 = v->node->opt; v1 = v->node->opt;
j = v1 - var; j = v1 - var;
if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { if(v == v1 || !btest(&cal, j)) {
for(; v1 != nil; v1 = v1->nextinnode) { for(; v1 != nil; v1 = v1->nextinnode) {
j = v1 - var; j = v1 - var;
cal.b[j/32] |= 1UL<<(j&31); biset(&cal, j);
} }
} }
} }
...@@ -959,10 +962,10 @@ paint1(Reg *r, int bn) ...@@ -959,10 +962,10 @@ paint1(Reg *r, int bn)
{ {
Reg *r1; Reg *r1;
int z; int z;
uint32 bb; uint64 bb;
z = bn/32; z = bn/64;
bb = 1L<<(bn%32); bb = 1LL<<(bn%64);
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
return; return;
for(;;) { for(;;) {
...@@ -1016,55 +1019,15 @@ paint1(Reg *r, int bn) ...@@ -1016,55 +1019,15 @@ paint1(Reg *r, int bn)
} }
} }
uint32
regset(Reg *r, uint32 bb)
{
uint32 b, set;
Adr v;
int c;
set = 0;
v = zprog.from;
while(b = bb & ~(bb-1)) {
v.type = b & 0xFFFF? BtoR(b): BtoF(b);
if(v.type == 0)
fatal("zero v.type for %#ux", b);
c = copyu(r->f.prog, &v, nil);
if(c == 3)
set |= b;
bb &= ~b;
}
return set;
}
uint32
reguse(Reg *r, uint32 bb)
{
uint32 b, set;
Adr v;
int c;
set = 0;
v = zprog.from;
while(b = bb & ~(bb-1)) {
v.type = b & 0xFFFF? BtoR(b): BtoF(b);
c = copyu(r->f.prog, &v, nil);
if(c == 1 || c == 2 || c == 4)
set |= b;
bb &= ~b;
}
return set;
}
uint32 uint32
paint2(Reg *r, int bn) paint2(Reg *r, int bn)
{ {
Reg *r1; Reg *r1;
int z; int z;
uint32 bb, vreg, x; uint64 bb, vreg;
z = bn/32; z = bn/64;
bb = 1L << (bn%32); bb = 1LL << (bn%64);
vreg = regbits; vreg = regbits;
if(!(r->act.b[z] & bb)) if(!(r->act.b[z] & bb))
return vreg; return vreg;
...@@ -1105,27 +1068,19 @@ paint2(Reg *r, int bn) ...@@ -1105,27 +1068,19 @@ paint2(Reg *r, int bn)
break; break;
} }
bb = vreg;
for(; r; r=(Reg*)r->f.s1) {
x = r->regu & ~bb;
if(x) {
vreg |= reguse(r, x);
bb |= regset(r, x);
}
}
return vreg; return vreg;
} }
void void
paint3(Reg *r, int bn, int32 rb, int rn) paint3(Reg *r, int bn, uint32 rb, int rn)
{ {
Reg *r1; Reg *r1;
Prog *p; Prog *p;
int z; int z;
uint32 bb; uint64 bb;
z = bn/32; z = bn/64;
bb = 1L << (bn%32); bb = 1LL << (bn%64);
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
return; return;
for(;;) { for(;;) {
...@@ -1198,7 +1153,7 @@ addreg(Adr *a, int rn) ...@@ -1198,7 +1153,7 @@ addreg(Adr *a, int rn)
ostats.ncvtreg++; ostats.ncvtreg++;
} }
int32 uint32
RtoB(int r) RtoB(int r)
{ {
...@@ -1208,7 +1163,7 @@ RtoB(int r) ...@@ -1208,7 +1163,7 @@ RtoB(int r)
} }
int int
BtoR(int32 b) BtoR(uint32 b)
{ {
b &= 0xffffL; b &= 0xffffL;
if(nacl) if(nacl)
...@@ -1224,7 +1179,7 @@ BtoR(int32 b) ...@@ -1224,7 +1179,7 @@ BtoR(int32 b)
* ... * ...
* 31 X15 * 31 X15
*/ */
int32 uint32
FtoB(int f) FtoB(int f)
{ {
if(f < D_X0 || f > D_X15) if(f < D_X0 || f > D_X15)
...@@ -1233,7 +1188,7 @@ FtoB(int f) ...@@ -1233,7 +1188,7 @@ FtoB(int f)
} }
int int
BtoF(int32 b) BtoF(uint32 b)
{ {
b &= 0xFFFF0000L; b &= 0xFFFF0000L;
......
...@@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a) ...@@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a)
a->sym = linksym(sym); a->sym = linksym(sym);
a->node = sym->def; a->node = sym->def;
a->offset = 0; // header a->offset = 0; // header
a->etype = TINT32; a->etype = TSTRING;
} }
void void
......
...@@ -63,8 +63,8 @@ enum ...@@ -63,8 +63,8 @@ enum
uint32 BLOAD(Reg*); uint32 BLOAD(Reg*);
uint32 BSTORE(Reg*); uint32 BSTORE(Reg*);
uint32 LOAD(Reg*); uint64 LOAD(Reg*);
uint32 STORE(Reg*); uint64 STORE(Reg*);
*/ */
// A Reg is a wrapper around a single Prog (one instruction) that holds // A Reg is a wrapper around a single Prog (one instruction) that holds
...@@ -159,7 +159,7 @@ void synch(Reg*, Bits); ...@@ -159,7 +159,7 @@ void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*); uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int); void paint1(Reg*, int);
uint32 paint2(Reg*, int); uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int); void paint3(Reg*, int, uint32, int);
void addreg(Adr*, int); void addreg(Adr*, int);
void dumpone(Flow*, int); void dumpone(Flow*, int);
void dumpit(char*, Flow*, int); void dumpit(char*, Flow*, int);
...@@ -171,10 +171,10 @@ void peep(Prog*); ...@@ -171,10 +171,10 @@ void peep(Prog*);
void excise(Flow*); void excise(Flow*);
int copyu(Prog*, Adr*, Adr*); int copyu(Prog*, Adr*, Adr*);
int32 RtoB(int); uint32 RtoB(int);
int32 FtoB(int); uint32 FtoB(int);
int BtoR(int32); int BtoR(uint32);
int BtoF(int32); int BtoF(uint32);
/* /*
* prog.c * prog.c
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#include "opt.h" #include "opt.h"
#define NREGVAR 16 /* 8 integer + 8 floating */ #define NREGVAR 16 /* 8 integer + 8 floating */
#define REGBITS ((uint32)0xffff) #define REGBITS ((uint64)0xffffull)
/*c2go enum { /*c2go enum {
NREGVAR = 16, NREGVAR = 16,
REGBITS = (1<<NREGVAR) - 1, REGBITS = (1<<NREGVAR) - 1,
...@@ -71,7 +71,7 @@ setaddrs(Bits bit) ...@@ -71,7 +71,7 @@ setaddrs(Bits bit)
i = bnum(bit); i = bnum(bit);
node = var[i].node; node = var[i].node;
n = var[i].name; n = var[i].name;
bit.b[i/32] &= ~(1L<<(i%32)); biclr(&bit, i);
// disable all pieces of that variable // disable all pieces of that variable
for(i=0; i<nvar; i++) { for(i=0; i<nvar; i++) {
...@@ -336,7 +336,7 @@ loop2: ...@@ -336,7 +336,7 @@ loop2:
rgp->varno = i; rgp->varno = i;
change = 0; change = 0;
paint1(r, i); paint1(r, i);
bit.b[i/32] &= ~(1L<<(i%32)); biclr(&bit, i);
if(change <= 0) if(change <= 0)
continue; continue;
rgp->cost = change; rgp->cost = change;
...@@ -446,7 +446,7 @@ walkvardef(Node *n, Reg *r, int active) ...@@ -446,7 +446,7 @@ walkvardef(Node *n, Reg *r, int active)
break; break;
for(v=n->opt; v!=nil; v=v->nextinnode) { for(v=n->opt; v!=nil; v=v->nextinnode) {
bn = v - var; bn = v - var;
r1->act.b[bn/32] |= 1L << (bn%32); biset(&r1->act, bn);
} }
if(r1->f.prog->as == ACALL) if(r1->f.prog->as == ACALL)
break; break;
...@@ -788,10 +788,10 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -788,10 +788,10 @@ prop(Reg *r, Bits ref, Bits cal)
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
if(cal.b[z] == 0) if(cal.b[z] == 0)
continue; continue;
for(i=0; i<32; i++) { for(i=0; i<64; i++) {
if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
continue; continue;
v = var+z*32+i; v = var+z*64+i;
if(v->node->opt == nil) // v represents fixed register, not Go variable if(v->node->opt == nil) // v represents fixed register, not Go variable
continue; continue;
...@@ -807,10 +807,10 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -807,10 +807,10 @@ prop(Reg *r, Bits ref, Bits cal)
// This will set the bits at most twice, keeping the overall loop linear. // This will set the bits at most twice, keeping the overall loop linear.
v1 = v->node->opt; v1 = v->node->opt;
j = v1 - var; j = v1 - var;
if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { if(v == v1 || !btest(&cal, j)) {
for(; v1 != nil; v1 = v1->nextinnode) { for(; v1 != nil; v1 = v1->nextinnode) {
j = v1 - var; j = v1 - var;
cal.b[j/32] |= 1<<(j&31); biset(&cal, j);
} }
} }
} }
...@@ -926,10 +926,10 @@ paint1(Reg *r, int bn) ...@@ -926,10 +926,10 @@ paint1(Reg *r, int bn)
Reg *r1; Reg *r1;
Prog *p; Prog *p;
int z; int z;
uint32 bb; uint64 bb;
z = bn/32; z = bn/64;
bb = 1L<<(bn%32); bb = 1LL<<(bn%64);
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
return; return;
for(;;) { for(;;) {
...@@ -995,53 +995,15 @@ paint1(Reg *r, int bn) ...@@ -995,53 +995,15 @@ paint1(Reg *r, int bn)
} }
} }
uint32
regset(Reg *r, uint32 bb)
{
uint32 b, set;
Adr v;
int c;
set = 0;
v = zprog.from;
while(b = bb & ~(bb-1)) {
v.type = b & 0xFF ? BtoR(b): BtoF(b);
c = copyu(r->f.prog, &v, nil);
if(c == 3)
set |= b;
bb &= ~b;
}
return set;
}
uint32
reguse(Reg *r, uint32 bb)
{
uint32 b, set;
Adr v;
int c;
set = 0;
v = zprog.from;
while(b = bb & ~(bb-1)) {
v.type = b & 0xFF ? BtoR(b): BtoF(b);
c = copyu(r->f.prog, &v, nil);
if(c == 1 || c == 2 || c == 4)
set |= b;
bb &= ~b;
}
return set;
}
uint32 uint32
paint2(Reg *r, int bn) paint2(Reg *r, int bn)
{ {
Reg *r1; Reg *r1;
int z; int z;
uint32 bb, vreg, x; uint64 bb, vreg;
z = bn/32; z = bn/64;
bb = 1L << (bn%32); bb = 1LL << (bn%64);
vreg = regbits; vreg = regbits;
if(!(r->act.b[z] & bb)) if(!(r->act.b[z] & bb))
return vreg; return vreg;
...@@ -1082,27 +1044,19 @@ paint2(Reg *r, int bn) ...@@ -1082,27 +1044,19 @@ paint2(Reg *r, int bn)
break; break;
} }
bb = vreg;
for(; r; r=(Reg*)r->f.s1) {
x = r->regu & ~bb;
if(x) {
vreg |= reguse(r, x);
bb |= regset(r, x);
}
}
return vreg; return vreg;
} }
void void
paint3(Reg *r, int bn, int32 rb, int rn) paint3(Reg *r, int bn, uint32 rb, int rn)
{ {
Reg *r1; Reg *r1;
Prog *p; Prog *p;
int z; int z;
uint32 bb; uint64 bb;
z = bn/32; z = bn/64;
bb = 1L << (bn%32); bb = 1LL << (bn%64);
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
return; return;
for(;;) { for(;;) {
...@@ -1175,7 +1129,7 @@ addreg(Adr *a, int rn) ...@@ -1175,7 +1129,7 @@ addreg(Adr *a, int rn)
ostats.ncvtreg++; ostats.ncvtreg++;
} }
int32 uint32
RtoB(int r) RtoB(int r)
{ {
...@@ -1185,7 +1139,7 @@ RtoB(int r) ...@@ -1185,7 +1139,7 @@ RtoB(int r)
} }
int int
BtoR(int32 b) BtoR(uint32 b)
{ {
b &= 0xffL; b &= 0xffL;
...@@ -1194,7 +1148,7 @@ BtoR(int32 b) ...@@ -1194,7 +1148,7 @@ BtoR(int32 b)
return bitno(b) + D_AX; return bitno(b) + D_AX;
} }
int32 uint32
FtoB(int f) FtoB(int f)
{ {
if(f < D_X0 || f > D_X7) if(f < D_X0 || f > D_X7)
...@@ -1203,7 +1157,7 @@ FtoB(int f) ...@@ -1203,7 +1157,7 @@ FtoB(int f)
} }
int int
BtoF(int32 b) BtoF(uint32 b)
{ {
b &= 0xFF00L; b &= 0xFF00L;
if(b == 0) if(b == 0)
......
...@@ -900,7 +900,7 @@ ret: ...@@ -900,7 +900,7 @@ ret:
void void
clearfat(Node *nl) clearfat(Node *nl)
{ {
uint64 w, c, q, t; uint64 w, c, q, t, boff;
Node dst, end, r0, *f; Node dst, end, r0, *f;
Prog *p, *pl; Prog *p, *pl;
...@@ -944,6 +944,8 @@ clearfat(Node *nl) ...@@ -944,6 +944,8 @@ clearfat(Node *nl)
patch(gbranch(ABNE, T, 0), pl); patch(gbranch(ABNE, T, 0), pl);
regfree(&end); regfree(&end);
// The loop leaves R3 on the last zeroed dword
boff = 8;
} else if(q >= 4) { } else if(q >= 4) {
p = gins(ASUB, N, &dst); p = gins(ASUB, N, &dst);
p->from.type = D_CONST; p->from.type = D_CONST;
...@@ -953,17 +955,21 @@ clearfat(Node *nl) ...@@ -953,17 +955,21 @@ clearfat(Node *nl)
afunclit(&p->to, f); afunclit(&p->to, f);
// 4 and 128 = magic constants: see ../../runtime/asm_power64x.s // 4 and 128 = magic constants: see ../../runtime/asm_power64x.s
p->to.offset = 4*(128-q); p->to.offset = 4*(128-q);
} else // duffzero leaves R3 on the last zeroed dword
for(t = 0; t < q; t++) { boff = 8;
p = gins(AMOVD, &r0, &dst); } else {
p->to.type = D_OREG; for(t = 0; t < q; t++) {
p->to.offset = 8*t; p = gins(AMOVD, &r0, &dst);
p->to.type = D_OREG;
p->to.offset = 8*t;
}
boff = 8*q;
} }
for(t = 0; t < c; t++) { for(t = 0; t < c; t++) {
p = gins(AMOVB, &r0, &dst); p = gins(AMOVB, &r0, &dst);
p->to.type = D_OREG; p->to.type = D_OREG;
p->to.offset = t; p->to.offset = t+boff;
} }
reg[REGRT1]--; reg[REGRT1]--;
} }
......
...@@ -89,7 +89,7 @@ datagostring(Strlit *sval, Addr *a) ...@@ -89,7 +89,7 @@ datagostring(Strlit *sval, Addr *a)
a->reg = NREG; a->reg = NREG;
a->node = sym->def; a->node = sym->def;
a->offset = 0; // header a->offset = 0; // header
a->etype = TINT32; a->etype = TSTRING;
} }
void void
......
...@@ -1001,10 +1001,13 @@ hard: ...@@ -1001,10 +1001,13 @@ hard:
Prog* Prog*
gins(int as, Node *f, Node *t) gins(int as, Node *f, Node *t)
{ {
//int32 w; int32 w;
Prog *p; Prog *p;
Addr af, at; Addr af, at;
// TODO(austin): Add self-move test like in 6g (but be careful
// of truncation moves)
memset(&af, 0, sizeof af); memset(&af, 0, sizeof af);
memset(&at, 0, sizeof at); memset(&at, 0, sizeof at);
if(f != N) if(f != N)
...@@ -1021,9 +1024,6 @@ gins(int as, Node *f, Node *t) ...@@ -1021,9 +1024,6 @@ gins(int as, Node *f, Node *t)
if(debug['g']) if(debug['g'])
print("%P\n", p); print("%P\n", p);
// TODO(minux): enable these.
// right now it fails on MOVD $type."".TypeAssertionError(SB) [width=1], R7 [width=8]
/*
w = 0; w = 0;
switch(as) { switch(as) {
case AMOVB: case AMOVB:
...@@ -1046,15 +1046,16 @@ gins(int as, Node *f, Node *t) ...@@ -1046,15 +1046,16 @@ gins(int as, Node *f, Node *t)
break; break;
case AMOVD: case AMOVD:
case AMOVDU: case AMOVDU:
if(af.type == D_CONST)
break;
w = 8; w = 8;
break; break;
} }
if(w != 0 && ((f != N && af.width < w) || (t != N && at.width > w))) { if(w != 0 && ((f != N && af.width < w) || (t != N && at.type != D_REG && at.width > w))) {
dump("f", f); dump("f", f);
dump("t", t); dump("t", t);
fatal("bad width: %P (%d, %d)\n", p, af.width, at.width); fatal("bad width: %P (%d, %d)\n", p, af.width, at.width);
} }
*/
return p; return p;
} }
...@@ -1116,12 +1117,9 @@ naddr(Node *n, Addr *a, int canemitcode) ...@@ -1116,12 +1117,9 @@ naddr(Node *n, Addr *a, int canemitcode)
case ONAME: case ONAME:
a->etype = 0; a->etype = 0;
a->width = 0;
a->reg = NREG; a->reg = NREG;
if(n->type != T) { if(n->type != T)
a->etype = simtype[n->type->etype]; a->etype = simtype[n->type->etype];
a->width = n->type->width;
}
a->offset = n->xoffset; a->offset = n->xoffset;
s = n->sym; s = n->sym;
a->node = n->orig; a->node = n->orig;
...@@ -1242,15 +1240,16 @@ naddr(Node *n, Addr *a, int canemitcode) ...@@ -1242,15 +1240,16 @@ naddr(Node *n, Addr *a, int canemitcode)
naddr(n->left, a, canemitcode); naddr(n->left, a, canemitcode);
a->etype = simtype[tptr]; a->etype = simtype[tptr];
if(a->type == D_CONST && a->offset == 0) if(a->type == D_CONST && a->offset == 0)
break; // len(nil) break; // itab(nil)
a->width = widthptr;
break; break;
case OSPTR: case OSPTR:
// pointer in a string or slice // pointer in a string or slice
naddr(n->left, a, canemitcode); naddr(n->left, a, canemitcode);
a->etype = simtype[tptr];
if(a->type == D_CONST && a->offset == 0) if(a->type == D_CONST && a->offset == 0)
break; // ptr(nil) break; // ptr(nil)
a->etype = simtype[tptr];
a->offset += Array_array; a->offset += Array_array;
a->width = widthptr; a->width = widthptr;
break; break;
...@@ -1262,6 +1261,7 @@ naddr(Node *n, Addr *a, int canemitcode) ...@@ -1262,6 +1261,7 @@ naddr(Node *n, Addr *a, int canemitcode)
if(a->type == D_CONST && a->offset == 0) if(a->type == D_CONST && a->offset == 0)
break; // len(nil) break; // len(nil)
a->offset += Array_nel; a->offset += Array_nel;
a->width = widthint;
break; break;
case OCAP: case OCAP:
...@@ -1271,6 +1271,7 @@ naddr(Node *n, Addr *a, int canemitcode) ...@@ -1271,6 +1271,7 @@ naddr(Node *n, Addr *a, int canemitcode)
if(a->type == D_CONST && a->offset == 0) if(a->type == D_CONST && a->offset == 0)
break; // cap(nil) break; // cap(nil)
a->offset += Array_cap; a->offset += Array_cap;
a->width = widthint;
break; break;
case OADDR: case OADDR:
...@@ -1288,6 +1289,7 @@ naddr(Node *n, Addr *a, int canemitcode) ...@@ -1288,6 +1289,7 @@ naddr(Node *n, Addr *a, int canemitcode)
default: default:
fatal("naddr: OADDR %d\n", a->type); fatal("naddr: OADDR %d\n", a->type);
} }
break;
} }
} }
......
...@@ -70,24 +70,40 @@ struct Reg ...@@ -70,24 +70,40 @@ struct Reg
{ {
Flow f; Flow f;
Bits set; // variables written by this instruction. Bits set; // regopt variables written by this instruction.
Bits use1; // variables read by prog->from. Bits use1; // regopt variables read by prog->from.
Bits use2; // variables read by prog->to. Bits use2; // regopt variables read by prog->to.
// refahead/refbehind are the regopt variables whose current
// value may be used in the following/preceding instructions
// up to a CALL (or the value is clobbered).
Bits refbehind; Bits refbehind;
Bits refahead; Bits refahead;
// calahead/calbehind are similar, but for variables in
// instructions that are reachable after hitting at least one
// CALL.
Bits calbehind; Bits calbehind;
Bits calahead; Bits calahead;
Bits regdiff; Bits regdiff;
Bits act; Bits act;
int32 regu; // register used bitmap uint64 regu; // register used bitmap
}; };
#define R ((Reg*)0) #define R ((Reg*)0)
/*c2go extern Reg *R; */ /*c2go extern Reg *R; */
#define NRGN 600 #define NRGN 600
/*c2go enum { NRGN = 600 }; */ /*c2go enum { NRGN = 600 }; */
// A Rgn represents a single regopt variable over a region of code
// where a register could potentially be dedicated to that variable.
// The code encompassed by a Rgn is defined by the flow graph,
// starting at enter, flood-filling forward while varno is refahead
// and backward while varno is refbehind, and following branches. A
// single variable may be represented by multiple disjoint Rgns and
// each Rgn may choose a different register for that variable.
// Registers are allocated to regions greedily in order of descending
// cost.
struct Rgn struct Rgn
{ {
Reg* enter; Reg* enter;
...@@ -104,7 +120,7 @@ EXTERN Rgn* rgp; ...@@ -104,7 +120,7 @@ EXTERN Rgn* rgp;
EXTERN int nregion; EXTERN int nregion;
EXTERN int nvar; EXTERN int nvar;
EXTERN int32 regbits; EXTERN int32 regbits;
EXTERN int32 exregbits; EXTERN int32 exregbits; // TODO(austin) not used; remove
EXTERN Bits externs; EXTERN Bits externs;
EXTERN Bits params; EXTERN Bits params;
EXTERN Bits consts; EXTERN Bits consts;
...@@ -118,10 +134,8 @@ EXTERN struct ...@@ -118,10 +134,8 @@ EXTERN struct
{ {
int32 ncvtreg; int32 ncvtreg;
int32 nspill; int32 nspill;
int32 nreload;
int32 ndelmov; int32 ndelmov;
int32 nvar; int32 nvar;
int32 naddr;
} ostats; } ostats;
/* /*
...@@ -133,10 +147,10 @@ void addmove(Reg*, int, int, int); ...@@ -133,10 +147,10 @@ void addmove(Reg*, int, int, int);
Bits mkvar(Reg*, Adr*); Bits mkvar(Reg*, Adr*);
void prop(Reg*, Bits, Bits); void prop(Reg*, Bits, Bits);
void synch(Reg*, Bits); void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*); uint64 allreg(uint64, Rgn*);
void paint1(Reg*, int); void paint1(Reg*, int);
uint32 paint2(Reg*, int); uint64 paint2(Reg*, int, int);
void paint3(Reg*, int, int32, int); void paint3(Reg*, int, uint64, int);
void addreg(Adr*, int); void addreg(Adr*, int);
void dumpone(Flow*, int); void dumpone(Flow*, int);
void dumpit(char*, Flow*, int); void dumpit(char*, Flow*, int);
...@@ -160,8 +174,8 @@ typedef struct ProgInfo ProgInfo; ...@@ -160,8 +174,8 @@ typedef struct ProgInfo ProgInfo;
struct ProgInfo struct ProgInfo
{ {
uint32 flags; // the bits below uint32 flags; // the bits below
uint64 reguse; // required registers used by this instruction uint64 reguse; // registers implicitly used by this instruction
uint64 regset; // required registers set by this instruction uint64 regset; // registers implicitly set by this instruction
uint64 regindex; // registers used by addressing mode uint64 regindex; // registers used by addressing mode
}; };
...@@ -182,20 +196,21 @@ enum ...@@ -182,20 +196,21 @@ enum
SizeF = 1<<7, // float aka float32 SizeF = 1<<7, // float aka float32
SizeD = 1<<8, // double aka float64 SizeD = 1<<8, // double aka float64
// Left side: address taken, read, write. // Left side (Prog.from): address taken, read, write.
LeftAddr = 1<<9, LeftAddr = 1<<9,
LeftRead = 1<<10, LeftRead = 1<<10,
LeftWrite = 1<<11, LeftWrite = 1<<11,
// Register in middle; never written. // Register in middle (Prog.reg); only ever read.
RegRead = 1<<12, RegRead = 1<<12,
CanRegRead = 1<<13, CanRegRead = 1<<13,
// Right side: address taken, read, write. // Right side (Prog.to): address taken, read, write.
RightAddr = 1<<14, RightAddr = 1<<14,
RightRead = 1<<15, RightRead = 1<<15,
RightWrite = 1<<16, RightWrite = 1<<16,
// Instruction updates whichever of from/to is type D_OREG
PostInc = 1<<17, PostInc = 1<<17,
// Instruction kinds // Instruction kinds
......
...@@ -44,13 +44,15 @@ peep(Prog *p) ...@@ -44,13 +44,15 @@ peep(Prog *p)
void void
excise(Flow *r) excise(Flow *r)
{ {
Prog *p; Prog *p, *l;
p = r->prog; p = r->prog;
if(debug['P'] && debug['v']) if(debug['P'] && debug['v'])
print("%P ===delete===\n", p); print("%P ===delete===\n", p);
l = p->link;
*p = zprog; *p = zprog;
p->as = ANOP; p->as = ANOP;
p->link = l;
ostats.ndelmov++; ostats.ndelmov++;
} }
......
...@@ -96,11 +96,8 @@ static ProgInfo progtable[ALAST] = { ...@@ -96,11 +96,8 @@ static ProgInfo progtable[ALAST] = {
[ABGT]= {Cjmp}, [ABGT]= {Cjmp},
[ABLE]= {Cjmp}, [ABLE]= {Cjmp},
[ARETURN]= {Break}, [ARETURN]= {Break},
// In addtion, duffzero reads R0,R2 and writes R2. This fact must be
// encoded in peep.c (TODO)
[ADUFFZERO]= {Call}, [ADUFFZERO]= {Call},
// In addtion, duffcopy reads R0,R2,R3 and writes R2,R3. This fact must be
// encoded in peep.c (TODO)
[ADUFFCOPY]= {Call}, [ADUFFCOPY]= {Call},
}; };
...@@ -118,14 +115,14 @@ proginfo(ProgInfo *info, Prog *p) ...@@ -118,14 +115,14 @@ proginfo(ProgInfo *info, Prog *p)
info->flags |= /*CanRegRead |*/ RightRead; info->flags |= /*CanRegRead |*/ RightRead;
} }
if(p->from.type == D_OREG && p->from.reg != NREG) { if((p->from.type == D_OREG || p->from.type == D_CONST) && p->from.reg != NREG) {
info->reguse |= RtoB(p->from.reg); info->regindex |= RtoB(p->from.reg);
if(info->flags & PostInc) { if(info->flags & PostInc) {
info->regset |= RtoB(p->from.reg); info->regset |= RtoB(p->from.reg);
} }
} }
if(p->to.type == D_OREG && p->to.reg != NREG) { if((p->to.type == D_OREG || p->to.type == D_CONST) && p->to.reg != NREG) {
info->reguse |= RtoB(p->to.reg); info->regindex |= RtoB(p->to.reg);
if(info->flags & PostInc) { if(info->flags & PostInc) {
info->regset |= RtoB(p->to.reg); info->regset |= RtoB(p->to.reg);
} }
...@@ -135,4 +132,13 @@ proginfo(ProgInfo *info, Prog *p) ...@@ -135,4 +132,13 @@ proginfo(ProgInfo *info, Prog *p)
info->flags &= ~LeftRead; info->flags &= ~LeftRead;
info->flags |= LeftAddr; info->flags |= LeftAddr;
} }
if(p->as == ADUFFZERO) {
info->reguse |= RtoB(0) | RtoB(2);
info->regset |= RtoB(2);
}
if(p->as == ADUFFCOPY) {
info->reguse |= RtoB(0) | RtoB(2) | RtoB(3);
info->regset |= RtoB(2) | RtoB(3);
}
} }
...@@ -33,14 +33,1197 @@ ...@@ -33,14 +33,1197 @@
#include "gg.h" #include "gg.h"
#include "opt.h" #include "opt.h"
#define NREGVAR 64 /* 32 general + 32 floating */
#define REGBITS ((uint64)0xffffffffffffffffull)
/*c2go enum {
NREGVAR = 64,
REGBITS = 0xffffffffffffffff,
};
*/
static Reg* firstr;
static int first = 1;
int
rcmp(const void *a1, const void *a2)
{
Rgn *p1, *p2;
int c1, c2;
p1 = (Rgn*)a1;
p2 = (Rgn*)a2;
c1 = p2->cost;
c2 = p1->cost;
if(c1 -= c2)
return c1;
return p2->varno - p1->varno;
}
static void
setaddrs(Bits bit)
{
int i, n;
Var *v;
Node *node;
while(bany(&bit)) {
// convert each bit to a variable
i = bnum(bit);
node = var[i].node;
n = var[i].name;
biclr(&bit, i);
// disable all pieces of that variable
for(i=0; i<nvar; i++) {
v = var+i;
if(v->node == node && v->name == n)
v->addr = 2;
}
}
}
static char* regname[] = {
".R0",
".R1",
".R2",
".R3",
".R4",
".R5",
".R6",
".R7",
".R8",
".R9",
".R10",
".R11",
".R12",
".R13",
".R14",
".R15",
".R16",
".R17",
".R18",
".R19",
".R20",
".R21",
".R22",
".R23",
".R24",
".R25",
".R26",
".R27",
".R28",
".R29",
".R30",
".R31",
".F0",
".F1",
".F2",
".F3",
".F4",
".F5",
".F6",
".F7",
".F8",
".F9",
".F10",
".F11",
".F12",
".F13",
".F14",
".F15",
".F16",
".F17",
".F18",
".F19",
".F20",
".F21",
".F22",
".F23",
".F24",
".F25",
".F26",
".F27",
".F28",
".F29",
".F30",
".F31",
};
static Node* regnodes[NREGVAR];
static void walkvardef(Node *n, Reg *r, int active);
void void
regopt(Prog *p) regopt(Prog *firstp)
{ {
USED(p); Reg *r, *r1;
// TODO(minux) Prog *p;
Graph *g;
ProgInfo info;
int i, z, active;
uint64 vreg, usedreg;
Bits bit;
if(first) {
fmtinstall('Q', Qconv);
first = 0;
}
mergetemp(firstp);
/*
* control flow is more complicated in generated go code
* than in generated c code. define pseudo-variables for
* registers, so we have complete register usage information.
*/
nvar = NREGVAR;
memset(var, 0, NREGVAR*sizeof var[0]);
for(i=0; i<NREGVAR; i++) {
if(regnodes[i] == N)
regnodes[i] = newname(lookup(regname[i]));
var[i].node = regnodes[i];
}
// Exclude registers with fixed functions
regbits = (1<<D_R0)|RtoB(REGSP)|RtoB(REGG);
// Also exclude floating point registers with fixed constants
regbits |= FtoB(D_F0+27)|FtoB(D_F0+28)|FtoB(D_F0+29)|FtoB(D_F0+30)|FtoB(D_F0+31);
externs = zbits;
params = zbits;
consts = zbits;
addrs = zbits;
ivar = zbits;
ovar = zbits;
/*
* pass 1
* build aux data structure
* allocate pcs
* find use and set of variables
*/
g = flowstart(firstp, sizeof(Reg));
if(g == nil) {
for(i=0; i<nvar; i++)
var[i].node->opt = nil;
return;
}
firstr = (Reg*)g->start;
for(r = firstr; r != R; r = (Reg*)r->f.link) {
p = r->f.prog;
if(p->as == AVARDEF || p->as == AVARKILL)
continue;
proginfo(&info, p);
// Avoid making variables for direct-called functions.
if(p->as == ABL && p->to.name == D_EXTERN)
continue;
// from vs to doesn't matter for registers
r->use1.b[0] |= info.reguse | info.regindex;
r->set.b[0] |= info.regset;
// Compute used register for from
bit = mkvar(r, &p->from);
if(info.flags & LeftAddr)
setaddrs(bit);
if(info.flags & LeftRead)
for(z=0; z<BITS; z++)
r->use1.b[z] |= bit.b[z];
// Compute used register for reg
if(info.flags & RegRead) {
if(p->from.type != D_FREG)
r->use1.b[0] |= RtoB(p->reg);
else
r->use1.b[0] |= FtoB(D_F0+p->reg);
}
// Currently we never generate three register forms.
// If we do, this will need to change.
if(p->from3.type != D_NONE)
fatal("regopt not implemented for from3");
// Compute used register for to
bit = mkvar(r, &p->to);
if(info.flags & RightAddr)
setaddrs(bit);
if(info.flags & RightRead)
for(z=0; z<BITS; z++)
r->use2.b[z] |= bit.b[z];
if(info.flags & RightWrite)
for(z=0; z<BITS; z++)
r->set.b[z] |= bit.b[z];
}
for(i=0; i<nvar; i++) {
Var *v = var+i;
if(v->addr) {
bit = blsh(i);
for(z=0; z<BITS; z++)
addrs.b[z] |= bit.b[z];
}
if(debug['R'] && debug['v'])
print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
i, v->addr, v->etype, v->width, v->node, v->offset);
}
if(debug['R'] && debug['v'])
dumpit("pass1", &firstr->f, 1);
/*
* pass 2
* find looping structure
*/
flowrpo(g);
if(debug['R'] && debug['v'])
dumpit("pass2", &firstr->f, 1);
/*
* pass 2.5
* iterate propagating fat vardef covering forward
* r->act records vars with a VARDEF since the last CALL.
* (r->act will be reused in pass 5 for something else,
* but we'll be done with it by then.)
*/
active = 0;
for(r = firstr; r != R; r = (Reg*)r->f.link) {
r->f.active = 0;
r->act = zbits;
}
for(r = firstr; r != R; r = (Reg*)r->f.link) {
p = r->f.prog;
if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
active++;
walkvardef(p->to.node, r, active);
}
}
/*
* pass 3
* iterate propagating usage
* back until flow graph is complete
*/
loop1:
change = 0;
for(r = firstr; r != R; r = (Reg*)r->f.link)
r->f.active = 0;
for(r = firstr; r != R; r = (Reg*)r->f.link)
if(r->f.prog->as == ARET)
prop(r, zbits, zbits);
loop11:
/* pick up unreachable code */
i = 0;
for(r = firstr; r != R; r = r1) {
r1 = (Reg*)r->f.link;
if(r1 && r1->f.active && !r->f.active) {
prop(r, zbits, zbits);
i = 1;
}
}
if(i)
goto loop11;
if(change)
goto loop1;
if(debug['R'] && debug['v'])
dumpit("pass3", &firstr->f, 1);
/*
* pass 4
* iterate propagating register/variable synchrony
* forward until graph is complete
*/
loop2:
change = 0;
for(r = firstr; r != R; r = (Reg*)r->f.link)
r->f.active = 0;
synch(firstr, zbits);
if(change)
goto loop2;
if(debug['R'] && debug['v'])
dumpit("pass4", &firstr->f, 1);
/*
* pass 4.5
* move register pseudo-variables into regu.
*/
for(r = firstr; r != R; r = (Reg*)r->f.link) {
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
r->set.b[0] &= ~REGBITS;
r->use1.b[0] &= ~REGBITS;
r->use2.b[0] &= ~REGBITS;
r->refbehind.b[0] &= ~REGBITS;
r->refahead.b[0] &= ~REGBITS;
r->calbehind.b[0] &= ~REGBITS;
r->calahead.b[0] &= ~REGBITS;
r->regdiff.b[0] &= ~REGBITS;
r->act.b[0] &= ~REGBITS;
}
if(debug['R'] && debug['v'])
dumpit("pass4.5", &firstr->f, 1);
/*
* pass 5
* isolate regions
* calculate costs (paint1)
*/
r = firstr;
if(r) {
for(z=0; z<BITS; z++)
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
if(bany(&bit) && !r->f.refset) {
// should never happen - all variables are preset
if(debug['w'])
print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
r->f.refset = 1;
}
}
for(r = firstr; r != R; r = (Reg*)r->f.link)
r->act = zbits;
rgp = region;
nregion = 0;
for(r = firstr; r != R; r = (Reg*)r->f.link) {
for(z=0; z<BITS; z++)
bit.b[z] = r->set.b[z] &
~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
if(bany(&bit) && !r->f.refset) {
if(debug['w'])
print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
r->f.refset = 1;
excise(&r->f);
}
for(z=0; z<BITS; z++)
bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
while(bany(&bit)) {
i = bnum(bit);
rgp->enter = r;
rgp->varno = i;
change = 0;
paint1(r, i);
biclr(&bit, i);
if(change <= 0)
continue;
rgp->cost = change;
nregion++;
if(nregion >= NRGN) {
if(debug['R'] && debug['v'])
print("too many regions\n");
goto brk;
}
rgp++;
}
}
brk:
qsort(region, nregion, sizeof(region[0]), rcmp);
if(debug['R'] && debug['v'])
dumpit("pass5", &firstr->f, 1);
/*
* pass 6
* determine used registers (paint2)
* replace code (paint3)
*/
rgp = region;
if(debug['R'] && debug['v'])
print("\nregisterizing\n");
for(i=0; i<nregion; i++) {
if(debug['R'] && debug['v'])
print("region %d: cost %d varno %d enter %d\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
bit = blsh(rgp->varno);
usedreg = paint2(rgp->enter, rgp->varno, 0);
vreg = allreg(usedreg, rgp);
if(rgp->regno != 0) {
if(debug['R'] && debug['v']) {
Var *v;
v = var + rgp->varno;
print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n",
v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg);
}
paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
}
rgp++;
}
/*
* free aux structures. peep allocates new ones.
*/
for(i=0; i<nvar; i++)
var[i].node->opt = nil;
flowend(g);
firstr = R;
if(debug['R'] && debug['v']) {
// Rebuild flow graph, since we inserted instructions
g = flowstart(firstp, sizeof(Reg));
firstr = (Reg*)g->start;
dumpit("pass6", &firstr->f, 1);
flowend(g);
firstr = R;
}
/*
* pass 7
* peep-hole on basic block
*/
if(!debug['R'] || debug['P'])
peep(firstp);
/*
* eliminate nops
*/
for(p=firstp; p!=P; p=p->link) {
while(p->link != P && p->link->as == ANOP)
p->link = p->link->link;
if(p->to.type == D_BRANCH)
while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
p->to.u.branch = p->to.u.branch->link;
}
if(debug['R']) {
if(ostats.ncvtreg ||
ostats.nspill ||
ostats.ndelmov ||
ostats.nvar ||
0)
print("\nstats\n");
if(ostats.ncvtreg)
print(" %4d cvtreg\n", ostats.ncvtreg);
if(ostats.nspill)
print(" %4d spill\n", ostats.nspill);
if(ostats.ndelmov)
print(" %4d delmov\n", ostats.ndelmov);
if(ostats.nvar)
print(" %4d var\n", ostats.nvar);
memset(&ostats, 0, sizeof(ostats));
}
return; return;
} }
static void
walkvardef(Node *n, Reg *r, int active)
{
Reg *r1, *r2;
int bn;
Var *v;
for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
if(r1->f.active == active)
break;
r1->f.active = active;
if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
break;
for(v=n->opt; v!=nil; v=v->nextinnode) {
bn = v - var;
biset(&r1->act, bn);
}
if(r1->f.prog->as == ABL)
break;
}
for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
if(r2->f.s2 != nil)
walkvardef(n, (Reg*)r2->f.s2, active);
}
/*
* add mov b,rn
* just after r
*/
void
addmove(Reg *r, int bn, int rn, int f)
{
Prog *p, *p1, *p2;
Adr *a;
Var *v;
p1 = mal(sizeof(*p1));
*p1 = zprog;
p = r->f.prog;
// If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc),
// delay the load until after the fixup.
p2 = p->link;
if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == D_REG)
p = p2;
p1->link = p->link;
p->link = p1;
p1->lineno = p->lineno;
v = var + bn;
a = &p1->to;
a->name = v->name;
a->node = v->node;
a->sym = linksym(v->node->sym);
a->offset = v->offset;
a->etype = v->etype;
a->type = D_OREG;
if(a->etype == TARRAY || a->sym == nil)
a->type = D_CONST;
if(v->addr)
fatal("addmove: shouldn't be doing this %A\n", a);
switch(v->etype) {
default:
print("What is this %E\n", v->etype);
case TINT8:
p1->as = AMOVB;
break;
case TBOOL:
case TUINT8:
//print("movbu %E %d %S\n", v->etype, bn, v->sym);
p1->as = AMOVBZ;
break;
case TINT16:
p1->as = AMOVH;
break;
case TUINT16:
p1->as = AMOVHZ;
break;
case TINT32:
p1->as = AMOVW;
break;
case TUINT32:
case TPTR32:
p1->as = AMOVWZ;
break;
case TINT64:
case TUINT64:
case TPTR64:
p1->as = AMOVD;
break;
case TFLOAT32:
p1->as = AFMOVS;
break;
case TFLOAT64:
p1->as = AFMOVD;
break;
}
p1->from.type = D_REG;
p1->from.reg = rn;
if(rn >= NREG) {
p1->from.type = D_FREG;
p1->from.reg = rn-NREG;
}
if(!f) {
p1->from = *a;
*a = zprog.from;
a->type = D_REG;
a->reg = rn;
if(rn >= NREG) {
a->type = D_FREG;
a->reg = rn-NREG;
}
if(v->etype == TUINT8 || v->etype == TBOOL)
p1->as = AMOVBZ;
if(v->etype == TUINT16)
p1->as = AMOVHZ;
}
if(debug['R'])
print("%P\t.a%P\n", p, p1);
ostats.nspill++;
}
static int
overlap(int64 o1, int w1, int64 o2, int w2)
{
int64 t1, t2;
t1 = o1+w1;
t2 = o2+w2;
if(!(t1 > o2 && t2 > o1))
return 0;
return 1;
}
Bits
mkvar(Reg *r, Adr *a)
{
USED(r);
Var *v;
int i, t, n, et, z, flag;
int64 w;
int64 o;
Bits bit;
Node *node;
// mark registers used
t = a->type;
switch(t) {
default:
print("type %d %d %D\n", t, a->name, a);
goto none;
case D_NONE:
goto none;
case D_BRANCH:
case D_CONST:
case D_FCONST:
case D_SCONST:
case D_SPR:
case D_OREG:
break;
case D_REG:
if(a->reg != NREG) {
bit = zbits;
bit.b[0] = RtoB(a->reg);
return bit;
}
break;
case D_FREG:
if(a->reg != NREG) {
bit = zbits;
bit.b[0] = FtoB(D_F0+a->reg);
return bit;
}
break;
}
switch(a->name) {
default:
goto none;
case D_EXTERN:
case D_STATIC:
case D_AUTO:
case D_PARAM:
n = a->name;
break;
}
node = a->node;
if(node == N || node->op != ONAME || node->orig == N)
goto none;
node = node->orig;
if(node->orig != node)
fatal("%D: bad node", a);
if(node->sym == S || node->sym->name[0] == '.')
goto none;
et = a->etype;
o = a->offset;
w = a->width;
if(w < 0)
fatal("bad width %lld for %D", w, a);
flag = 0;
for(i=0; i<nvar; i++) {
v = var+i;
if(v->node == node && v->name == n) {
if(v->offset == o)
if(v->etype == et)
if(v->width == w)
return blsh(i);
// if they overlap, disable both
if(overlap(v->offset, v->width, o, w)) {
v->addr = 1;
flag = 1;
}
}
}
switch(et) {
case 0:
case TFUNC:
goto none;
}
if(nvar >= NVAR) {
if(debug['w'] > 1 && node != N)
fatal("variable not optimized: %#N", node);
// If we're not tracking a word in a variable, mark the rest as
// having its address taken, so that we keep the whole thing
// live at all calls. otherwise we might optimize away part of
// a variable but not all of it.
for(i=0; i<nvar; i++) {
v = var+i;
if(v->node == node)
v->addr = 1;
}
goto none;
}
i = nvar;
nvar++;
v = var+i;
v->offset = o;
v->name = n;
v->etype = et;
v->width = w;
v->addr = flag; // funny punning
v->node = node;
// node->opt is the head of a linked list
// of Vars within the given Node, so that
// we can start at a Var and find all the other
// Vars in the same Go variable.
v->nextinnode = node->opt;
node->opt = v;
bit = blsh(i);
if(n == D_EXTERN || n == D_STATIC)
for(z=0; z<BITS; z++)
externs.b[z] |= bit.b[z];
if(n == D_PARAM)
for(z=0; z<BITS; z++)
params.b[z] |= bit.b[z];
if(node->class == PPARAM)
for(z=0; z<BITS; z++)
ivar.b[z] |= bit.b[z];
if(node->class == PPARAMOUT)
for(z=0; z<BITS; z++)
ovar.b[z] |= bit.b[z];
// Treat values with their address taken as live at calls,
// because the garbage collector's liveness analysis in ../gc/plive.c does.
// These must be consistent or else we will elide stores and the garbage
// collector will see uninitialized data.
// The typical case where our own analysis is out of sync is when the
// node appears to have its address taken but that code doesn't actually
// get generated and therefore doesn't show up as an address being
// taken when we analyze the instruction stream.
// One instance of this case is when a closure uses the same name as
// an outer variable for one of its own variables declared with :=.
// The parser flags the outer variable as possibly shared, and therefore
// sets addrtaken, even though it ends up not being actually shared.
// If we were better about _ elision, _ = &x would suffice too.
// The broader := in a closure problem is mentioned in a comment in
// closure.c:/^typecheckclosure and dcl.c:/^oldname.
if(node->addrtaken)
v->addr = 1;
// Disable registerization for globals, because:
// (1) we might panic at any time and we want the recovery code
// to see the latest values (issue 1304).
// (2) we don't know what pointers might point at them and we want
// loads via those pointers to see updated values and vice versa (issue 7995).
//
// Disable registerization for results if using defer, because the deferred func
// might recover and return, causing the current values to be used.
if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
v->addr = 1;
if(debug['R'])
print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
ostats.nvar++;
return bit;
none:
return zbits;
}
void
prop(Reg *r, Bits ref, Bits cal)
{
Reg *r1, *r2;
int z, i, j;
Var *v, *v1;
for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
for(z=0; z<BITS; z++) {
ref.b[z] |= r1->refahead.b[z];
if(ref.b[z] != r1->refahead.b[z]) {
r1->refahead.b[z] = ref.b[z];
change++;
}
cal.b[z] |= r1->calahead.b[z];
if(cal.b[z] != r1->calahead.b[z]) {
r1->calahead.b[z] = cal.b[z];
change++;
}
}
switch(r1->f.prog->as) {
case ABL:
if(noreturn(r1->f.prog))
break;
// Mark all input variables (ivar) as used, because that's what the
// liveness bitmaps say. The liveness bitmaps say that so that a
// panic will not show stale values in the parameter dump.
// Mark variables with a recent VARDEF (r1->act) as used,
// so that the optimizer flushes initializations to memory,
// so that if a garbage collection happens during this CALL,
// the collector will see initialized memory. Again this is to
// match what the liveness bitmaps say.
for(z=0; z<BITS; z++) {
cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
ref.b[z] = 0;
}
// cal.b is the current approximation of what's live across the call.
// Every bit in cal.b is a single stack word. For each such word,
// find all the other tracked stack words in the same Go variable
// (struct/slice/string/interface) and mark them live too.
// This is necessary because the liveness analysis for the garbage
// collector works at variable granularity, not at word granularity.
// It is fundamental for slice/string/interface: the garbage collector
// needs the whole value, not just some of the words, in order to
// interpret the other bits correctly. Specifically, slice needs a consistent
// ptr and cap, string needs a consistent ptr and len, and interface
// needs a consistent type word and data word.
for(z=0; z<BITS; z++) {
if(cal.b[z] == 0)
continue;
for(i=0; i<64; i++) {
if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
continue;
v = var+z*64+i;
if(v->node->opt == nil) // v represents fixed register, not Go variable
continue;
// v->node->opt is the head of a linked list of Vars
// corresponding to tracked words from the Go variable v->node.
// Walk the list and set all the bits.
// For a large struct this could end up being quadratic:
// after the first setting, the outer loop (for z, i) would see a 1 bit
// for all of the remaining words in the struct, and for each such
// word would go through and turn on all the bits again.
// To avoid the quadratic behavior, we only turn on the bits if
// v is the head of the list or if the head's bit is not yet turned on.
// This will set the bits at most twice, keeping the overall loop linear.
v1 = v->node->opt;
j = v1 - var;
if(v == v1 || !btest(&cal, j)) {
for(; v1 != nil; v1 = v1->nextinnode) {
j = v1 - var;
biset(&cal, j);
}
}
}
}
break;
case ATEXT:
for(z=0; z<BITS; z++) {
cal.b[z] = 0;
ref.b[z] = 0;
}
break;
case ARET:
for(z=0; z<BITS; z++) {
cal.b[z] = externs.b[z] | ovar.b[z];
ref.b[z] = 0;
}
break;
}
for(z=0; z<BITS; z++) {
ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
r1->use1.b[z] | r1->use2.b[z];
cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
r1->refbehind.b[z] = ref.b[z];
r1->calbehind.b[z] = cal.b[z];
}
if(r1->f.active)
break;
r1->f.active = 1;
}
for(; r != r1; r = (Reg*)r->f.p1)
for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
prop(r2, r->refbehind, r->calbehind);
}
void
synch(Reg *r, Bits dif)
{
Reg *r1;
int z;
for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
for(z=0; z<BITS; z++) {
dif.b[z] = (dif.b[z] &
~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
r1->set.b[z] | r1->regdiff.b[z];
if(dif.b[z] != r1->regdiff.b[z]) {
r1->regdiff.b[z] = dif.b[z];
change++;
}
}
if(r1->f.active)
break;
r1->f.active = 1;
for(z=0; z<BITS; z++)
dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
if(r1->f.s2 != nil)
synch((Reg*)r1->f.s2, dif);
}
}
uint64
allreg(uint64 b, Rgn *r)
{
Var *v;
int i;
v = var + r->varno;
r->regno = 0;
switch(v->etype) {
default:
fatal("unknown etype %d/%E", bitno(b), v->etype);
break;
case TINT8:
case TUINT8:
case TINT16:
case TUINT16:
case TINT32:
case TUINT32:
case TINT64:
case TUINT64:
case TINT:
case TUINT:
case TUINTPTR:
case TBOOL:
case TPTR32:
case TPTR64:
i = BtoR(~b);
if(i && r->cost > 0) {
r->regno = i;
return RtoB(i);
}
break;
case TFLOAT32:
case TFLOAT64:
i = BtoF(~b);
if(i && r->cost > 0) {
r->regno = i;
return FtoB(i);
}
break;
}
return 0;
}
void
paint1(Reg *r, int bn)
{
Reg *r1;
int z;
uint64 bb;
z = bn/64;
bb = 1LL<<(bn%64);
if(r->act.b[z] & bb)
return;
for(;;) {
if(!(r->refbehind.b[z] & bb))
break;
r1 = (Reg*)r->f.p1;
if(r1 == R)
break;
if(!(r1->refahead.b[z] & bb))
break;
if(r1->act.b[z] & bb)
break;
r = r1;
}
if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
change -= CLOAD * r->f.loop;
}
for(;;) {
r->act.b[z] |= bb;
if(r->f.prog->as != ANOP) { // don't give credit for NOPs
if(r->use1.b[z] & bb)
change += CREF * r->f.loop;
if((r->use2.b[z]|r->set.b[z]) & bb)
change += CREF * r->f.loop;
}
if(STORE(r) & r->regdiff.b[z] & bb) {
change -= CLOAD * r->f.loop;
}
if(r->refbehind.b[z] & bb)
for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb)
paint1(r1, bn);
if(!(r->refahead.b[z] & bb))
break;
r1 = (Reg*)r->f.s2;
if(r1 != R)
if(r1->refbehind.b[z] & bb)
paint1(r1, bn);
r = (Reg*)r->f.s1;
if(r == R)
break;
if(r->act.b[z] & bb)
break;
if(!(r->refbehind.b[z] & bb))
break;
}
}
uint64
paint2(Reg *r, int bn, int depth)
{
Reg *r1;
int z;
uint64 bb, vreg;
z = bn/64;
bb = 1LL << (bn%64);
vreg = regbits;
if(!(r->act.b[z] & bb))
return vreg;
for(;;) {
if(!(r->refbehind.b[z] & bb))
break;
r1 = (Reg*)r->f.p1;
if(r1 == R)
break;
if(!(r1->refahead.b[z] & bb))
break;
if(!(r1->act.b[z] & bb))
break;
r = r1;
}
for(;;) {
if(debug['R'] && debug['v'])
print(" paint2 %d %P\n", depth, r->f.prog);
r->act.b[z] &= ~bb;
vreg |= r->regu;
if(r->refbehind.b[z] & bb)
for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb)
vreg |= paint2(r1, bn, depth+1);
if(!(r->refahead.b[z] & bb))
break;
r1 = (Reg*)r->f.s2;
if(r1 != R)
if(r1->refbehind.b[z] & bb)
vreg |= paint2(r1, bn, depth+1);
r = (Reg*)r->f.s1;
if(r == R)
break;
if(!(r->act.b[z] & bb))
break;
if(!(r->refbehind.b[z] & bb))
break;
}
return vreg;
}
void
paint3(Reg *r, int bn, uint64 rb, int rn)
{
Reg *r1;
Prog *p;
int z;
uint64 bb;
z = bn/64;
bb = 1LL << (bn%64);
if(r->act.b[z] & bb)
return;
for(;;) {
if(!(r->refbehind.b[z] & bb))
break;
r1 = (Reg*)r->f.p1;
if(r1 == R)
break;
if(!(r1->refahead.b[z] & bb))
break;
if(r1->act.b[z] & bb)
break;
r = r1;
}
if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
addmove(r, bn, rn, 0);
for(;;) {
r->act.b[z] |= bb;
p = r->f.prog;
if(r->use1.b[z] & bb) {
if(debug['R'] && debug['v'])
print("%P", p);
addreg(&p->from, rn);
if(debug['R'] && debug['v'])
print(" ===change== %P\n", p);
}
if((r->use2.b[z]|r->set.b[z]) & bb) {
if(debug['R'] && debug['v'])
print("%P", p);
addreg(&p->to, rn);
if(debug['R'] && debug['v'])
print(" ===change== %P\n", p);
}
if(STORE(r) & r->regdiff.b[z] & bb)
addmove(r, bn, rn, 1);
r->regu |= rb;
if(r->refbehind.b[z] & bb)
for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb)
paint3(r1, bn, rb, rn);
if(!(r->refahead.b[z] & bb))
break;
r1 = (Reg*)r->f.s2;
if(r1 != R)
if(r1->refbehind.b[z] & bb)
paint3(r1, bn, rb, rn);
r = (Reg*)r->f.s1;
if(r == R)
break;
if(r->act.b[z] & bb)
break;
if(!(r->refbehind.b[z] & bb))
break;
}
}
void
addreg(Adr *a, int rn)
{
a->sym = nil;
a->node = nil;
a->name = D_NONE;
a->type = D_REG;
a->reg = rn;
if(rn >= NREG) {
a->type = D_FREG;
a->reg = rn-NREG;
}
ostats.ncvtreg++;
}
/* /*
* track register variables including external registers: * track register variables including external registers:
* bit reg * bit reg
...@@ -56,7 +1239,7 @@ regopt(Prog *p) ...@@ -56,7 +1239,7 @@ regopt(Prog *p)
uint64 uint64
RtoB(int r) RtoB(int r)
{ {
if(r >= D_R0 && r <= D_R0+31) if(r > D_R0 && r <= D_R0+31)
return 1ULL << (r - D_R0); return 1ULL << (r - D_R0);
return 0; return 0;
} }
...@@ -64,7 +1247,7 @@ RtoB(int r) ...@@ -64,7 +1247,7 @@ RtoB(int r)
int int
BtoR(uint64 b) BtoR(uint64 b)
{ {
b &= 0xffffffff; b &= 0xffffffffull;
if(b == 0) if(b == 0)
return 0; return 0;
return bitno(b) + D_R0; return bitno(b) + D_R0;
...@@ -139,6 +1322,7 @@ void ...@@ -139,6 +1322,7 @@ void
dumpit(char *str, Flow *r0, int isreg) dumpit(char *str, Flow *r0, int isreg)
{ {
Flow *r, *r1; Flow *r, *r1;
int s1v, s2v;
print("\n%s\n", str); print("\n%s\n", str);
for(r = r0; r != nil; r = r->link) { for(r = r0; r != nil; r = r->link) {
...@@ -150,12 +1334,16 @@ dumpit(char *str, Flow *r0, int isreg) ...@@ -150,12 +1334,16 @@ dumpit(char *str, Flow *r0, int isreg)
print(" %.4ud", (int)r1->prog->pc); print(" %.4ud", (int)r1->prog->pc);
print("\n"); print("\n");
} }
// r1 = r->s1; // If at least one successor is "interesting", print both
// if(r1 != R) { s1v = (r->s1 != nil) && (r->s1->prog != r->prog->link);
// print(" succ:"); s2v = (r->s2 != nil) && (r->s2->prog != r->prog->link);
// for(; r1 != R; r1 = r1->s1) if(s1v || s2v) {
// print(" %.4ud", (int)r1->prog->pc); print(" succ:");
// print("\n"); if(r->s1 != nil)
// } print(" %.4ud", (int)r->s1->prog->pc);
if(r->s2 != nil)
print(" %.4ud", (int)r->s2->prog->pc);
print("\n");
}
} }
} }
...@@ -131,7 +131,7 @@ enum ...@@ -131,7 +131,7 @@ enum
C_NCLASS, /* must be the last */ C_NCLASS, /* must be the last */
}; };
enum as enum
{ {
AXXX, AXXX,
AADD, AADD,
...@@ -501,7 +501,7 @@ enum ...@@ -501,7 +501,7 @@ enum
D_R0 = 0, // type is D_REG D_R0 = 0, // type is D_REG
D_F0 = D_R0+NREG, // type is D_FREG D_F0 = D_R0+NREG, // type is D_FREG
/* reg names iff type is D_SPR */ /* reg names in offset field iff type is D_SPR */
D_XER = 1, D_XER = 1,
D_LR = 8, D_LR = 8,
D_CTR = 9 D_CTR = 9
......
...@@ -95,11 +95,11 @@ int ...@@ -95,11 +95,11 @@ int
bnum(Bits a) bnum(Bits a)
{ {
int i; int i;
int32 b; uint64 b;
for(i=0; i<BITS; i++) for(i=0; i<BITS; i++)
if(b = a.b[i]) if(b = a.b[i])
return 32*i + bitno(b); return 64*i + bitno(b);
fatal("bad in bnum"); fatal("bad in bnum");
return 0; return 0;
} }
...@@ -110,27 +110,35 @@ blsh(uint n) ...@@ -110,27 +110,35 @@ blsh(uint n)
Bits c; Bits c;
c = zbits; c = zbits;
c.b[n/32] = 1L << (n%32); c.b[n/64] = 1LL << (n%64);
return c; return c;
} }
/*
int int
bset(Bits a, uint n) btest(Bits *a, uint n)
{ {
if(a.b[n/32] & (1L << (n%32))) return (a->b[n/64] & (1LL << (n%64))) != 0;
return 1; }
return 0;
void
biset(Bits *a, uint n)
{
a->b[n/64] |= 1LL << (n%64);
}
void
biclr(Bits *a, uint n)
{
a->b[n/64] &= ~(1LL << (n%64));
} }
*/
int int
bitno(int32 b) bitno(uint64 b)
{ {
int i; int i;
for(i=0; i<32; i++) for(i=0; i<64; i++)
if(b & (1L<<i)) if(b & (1LL<<i))
return i; return i;
fatal("bad in bitno"); fatal("bad in bitno");
return 0; return 0;
...@@ -157,7 +165,7 @@ Qconv(Fmt *fp) ...@@ -157,7 +165,7 @@ Qconv(Fmt *fp)
if(var[i].offset != 0) if(var[i].offset != 0)
fmtprint(fp, "%+lld", (vlong)var[i].offset); fmtprint(fp, "%+lld", (vlong)var[i].offset);
} }
bits.b[i/32] &= ~(1L << (i%32)); biclr(&bits, i);
} }
return 0; return 0;
} }
...@@ -704,13 +704,13 @@ enum ...@@ -704,13 +704,13 @@ enum
Ecomplit = 1<<11, // type in composite literal Ecomplit = 1<<11, // type in composite literal
}; };
#define BITS 5 #define BITS 3
#define NVAR (BITS*sizeof(uint32)*8) #define NVAR (BITS*sizeof(uint64)*8)
typedef struct Bits Bits; typedef struct Bits Bits;
struct Bits struct Bits
{ {
uint32 b[BITS]; uint64 b[BITS];
}; };
EXTERN Bits zbits; EXTERN Bits zbits;
...@@ -1027,12 +1027,14 @@ int Qconv(Fmt *fp); ...@@ -1027,12 +1027,14 @@ int Qconv(Fmt *fp);
Bits band(Bits a, Bits b); Bits band(Bits a, Bits b);
int bany(Bits *a); int bany(Bits *a);
int beq(Bits a, Bits b); int beq(Bits a, Bits b);
int bitno(int32 b); int bitno(uint64 b);
Bits blsh(uint n); Bits blsh(uint n);
Bits bnot(Bits a); Bits bnot(Bits a);
int bnum(Bits a); int bnum(Bits a);
Bits bor(Bits a, Bits b); Bits bor(Bits a, Bits b);
int bset(Bits a, uint n); int btest(Bits *a, uint n);
void biset(Bits *a, uint n);
void biclr(Bits *a, uint n);
/* /*
* bv.c * bv.c
......
...@@ -101,6 +101,10 @@ func testDisasm(t *testing.T, flags ...string) { ...@@ -101,6 +101,10 @@ func testDisasm(t *testing.T, flags ...string) {
} }
func TestDisasm(t *testing.T) { func TestDisasm(t *testing.T) {
switch runtime.GOARCH {
case "power64", "power64le":
t.Skipf("skipping on %s, issue 9039", runtime.GOARCH)
}
testDisasm(t) testDisasm(t)
} }
...@@ -109,5 +113,9 @@ func TestDisasmExtld(t *testing.T) { ...@@ -109,5 +113,9 @@ func TestDisasmExtld(t *testing.T) {
case "plan9", "windows": case "plan9", "windows":
t.Skipf("skipping on %s", runtime.GOOS) t.Skipf("skipping on %s", runtime.GOOS)
} }
switch runtime.GOARCH {
case "power64", "power64le":
t.Skipf("skipping on %s, no support for external linking, issue 9038", runtime.GOARCH)
}
testDisasm(t, "-ldflags=-linkmode=external") testDisasm(t, "-ldflags=-linkmode=external")
} }
...@@ -259,11 +259,12 @@ Dconv(Fmt *fp) ...@@ -259,11 +259,12 @@ Dconv(Fmt *fp)
sprint(str, "%s+%.5lux(BRANCH)", a->sym->name, v); sprint(str, "%s+%.5lux(BRANCH)", a->sym->name, v);
else else
sprint(str, "%.5lux(BRANCH)", v); sprint(str, "%.5lux(BRANCH)", v);
} else } else if(a->u.branch != nil)
if(a->sym != nil) sprint(str, "%lld", a->u.branch->pc);
sprint(str, "%s+%lld(APC)", a->sym->name, a->offset); else if(a->sym != nil)
else sprint(str, "%s+%lld(APC)", a->sym->name, a->offset);
sprint(str, "%lld(APC)", a->offset); else
sprint(str, "%lld(APC)", a->offset);
break; break;
case D_FCONST: case D_FCONST:
......
...@@ -5,12 +5,14 @@ ...@@ -5,12 +5,14 @@
// +build power64 power64le // +build power64 power64le
#include "textflag.h" #include "textflag.h"
#include "funcdata.h"
// makeFuncStub is the code half of the function returned by MakeFunc. // makeFuncStub is the code half of the function returned by MakeFunc.
// See the comment on the declaration of makeFuncStub in makefunc.go // See the comment on the declaration of makeFuncStub in makefunc.go
// for more details. // for more details.
// No argsize here, gc generates argsize info at call site. // No arg size here, runtime pulls arg map out of the func value.
TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16 TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16
NO_LOCAL_POINTERS
MOVD R11, 8(R1) MOVD R11, 8(R1)
MOVD $argframe+0(FP), R3 MOVD $argframe+0(FP), R3
MOVD R3, 16(R1) MOVD R3, 16(R1)
...@@ -20,8 +22,9 @@ TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16 ...@@ -20,8 +22,9 @@ TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16
// methodValueCall is the code half of the function returned by makeMethodValue. // methodValueCall is the code half of the function returned by makeMethodValue.
// See the comment on the declaration of methodValueCall in makefunc.go // See the comment on the declaration of methodValueCall in makefunc.go
// for more details. // for more details.
// No argsize here, gc generates argsize info at call site. // No arg size here; runtime pulls arg map out of the func value.
TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$16 TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$16
NO_LOCAL_POINTERS
MOVD R11, 8(R1) MOVD R11, 8(R1)
MOVD $argframe+0(FP), R3 MOVD $argframe+0(FP), R3
MOVD R3, 16(R1) MOVD R3, 16(R1)
......
...@@ -86,7 +86,7 @@ TEXT runtime·reginit(SB),NOSPLIT,$-8-0 ...@@ -86,7 +86,7 @@ TEXT runtime·reginit(SB),NOSPLIT,$-8-0
// void gosave(Gobuf*) // void gosave(Gobuf*)
// save state in Gobuf; setjmp // save state in Gobuf; setjmp
TEXT runtime·gosave(SB), NOSPLIT, $-8-8 TEXT runtime·gosave(SB), NOSPLIT, $-8-8
MOVD gobuf+0(FP), R3 MOVD buf+0(FP), R3
MOVD R1, gobuf_sp(R3) MOVD R1, gobuf_sp(R3)
MOVD LR, R31 MOVD LR, R31
MOVD R31, gobuf_pc(R3) MOVD R31, gobuf_pc(R3)
...@@ -99,7 +99,7 @@ TEXT runtime·gosave(SB), NOSPLIT, $-8-8 ...@@ -99,7 +99,7 @@ TEXT runtime·gosave(SB), NOSPLIT, $-8-8
// void gogo(Gobuf*) // void gogo(Gobuf*)
// restore state from Gobuf; longjmp // restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $-8-8 TEXT runtime·gogo(SB), NOSPLIT, $-8-8
MOVD gobuf+0(FP), R5 MOVD buf+0(FP), R5
MOVD gobuf_g(R5), g // make sure g is not nil MOVD gobuf_g(R5), g // make sure g is not nil
MOVD 0(g), R4 MOVD 0(g), R4
MOVD gobuf_sp(R5), R1 MOVD gobuf_sp(R5), R1
...@@ -299,7 +299,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-8-0 ...@@ -299,7 +299,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-8-0
// Note: can't just "BR NAME(SB)" - bad inlining results. // Note: can't just "BR NAME(SB)" - bad inlining results.
TEXT ·reflectcall(SB), NOSPLIT, $-8-24 TEXT ·reflectcall(SB), NOSPLIT, $-8-24
MOVW argsize+16(FP), R3 MOVWZ n+16(FP), R3
DISPATCH(runtime·call16, 16) DISPATCH(runtime·call16, 16)
DISPATCH(runtime·call32, 32) DISPATCH(runtime·call32, 32)
DISPATCH(runtime·call64, 64) DISPATCH(runtime·call64, 64)
...@@ -335,8 +335,8 @@ TEXT ·reflectcall(SB), NOSPLIT, $-8-24 ...@@ -335,8 +335,8 @@ TEXT ·reflectcall(SB), NOSPLIT, $-8-24
TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
NO_LOCAL_POINTERS; \ NO_LOCAL_POINTERS; \
/* copy arguments to stack */ \ /* copy arguments to stack */ \
MOVD argptr+8(FP), R3; \ MOVD arg+8(FP), R3; \
MOVW argsize+16(FP), R4; \ MOVWZ n+16(FP), R4; \
MOVD R1, R5; \ MOVD R1, R5; \
ADD $(8-1), R5; \ ADD $(8-1), R5; \
SUB $1, R3; \ SUB $1, R3; \
...@@ -353,9 +353,9 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ ...@@ -353,9 +353,9 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
PCDATA $PCDATA_StackMapIndex, $0; \ PCDATA $PCDATA_StackMapIndex, $0; \
BL (CTR); \ BL (CTR); \
/* copy return values back */ \ /* copy return values back */ \
MOVD argptr+8(FP), R3; \ MOVD arg+8(FP), R3; \
MOVW argsize+16(FP), R4; \ MOVWZ n+16(FP), R4; \
MOVW retoffset+20(FP), R6; \ MOVWZ retoffset+20(FP), R6; \
MOVD R1, R5; \ MOVD R1, R5; \
ADD R6, R5; \ ADD R6, R5; \
ADD R6, R3; \ ADD R6, R3; \
...@@ -398,7 +398,7 @@ CALLFN(·call268435456, 268435456) ...@@ -398,7 +398,7 @@ CALLFN(·call268435456, 268435456)
CALLFNcall536870912, 536870912) CALLFNcall536870912, 536870912)
CALLFNcall1073741824, 1073741824) CALLFNcall1073741824, 1073741824)
// bool cas(int32 *val, int32 old, int32 new) // bool cas(uint32 *ptr, uint32 old, uint32 new)
// Atomically: // Atomically:
// if(*val == old){ // if(*val == old){
// *val = new; // *val = new;
...@@ -406,9 +406,9 @@ CALLFN(·call1073741824, 1073741824) ...@@ -406,9 +406,9 @@ CALLFN(·call1073741824, 1073741824)
// } else // } else
// return 0; // return 0;
TEXT runtime·cas(SB), NOSPLIT, $0-17 TEXT runtime·cas(SB), NOSPLIT, $0-17
MOVD p+0(FP), R3 MOVD ptr+0(FP), R3
MOVW old+8(FP), R4 MOVWZ old+8(FP), R4
MOVW new+12(FP), R5 MOVWZ new+12(FP), R5
cas_again: cas_again:
SYNC SYNC
LWAR (R3), R6 LWAR (R3), R6
...@@ -425,7 +425,7 @@ cas_fail: ...@@ -425,7 +425,7 @@ cas_fail:
MOVD $0, R3 MOVD $0, R3
BR -5(PC) BR -5(PC)
// bool runtime·cas64(uint64 *val, uint64 old, uint64 new) // bool runtime·cas64(uint64 *ptr, uint64 old, uint64 new)
// Atomically: // Atomically:
// if(*val == *old){ // if(*val == *old){
// *val = new; // *val = new;
...@@ -434,7 +434,7 @@ cas_fail: ...@@ -434,7 +434,7 @@ cas_fail:
// return 0; // return 0;
// } // }
TEXT runtime·cas64(SB), NOSPLIT, $0-25 TEXT runtime·cas64(SB), NOSPLIT, $0-25
MOVD p+0(FP), R3 MOVD ptr+0(FP), R3
MOVD old+8(FP), R4 MOVD old+8(FP), R4
MOVD new+16(FP), R5 MOVD new+16(FP), R5
cas64_again: cas64_again:
...@@ -475,12 +475,12 @@ TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16 ...@@ -475,12 +475,12 @@ TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
TEXT runtime·casp1(SB), NOSPLIT, $0-25 TEXT runtime·casp1(SB), NOSPLIT, $0-25
BR runtime·cas64(SB) BR runtime·cas64(SB)
// uint32 xadd(uint32 volatile *val, int32 delta) // uint32 xadd(uint32 volatile *ptr, int32 delta)
// Atomically: // Atomically:
// *val += delta; // *val += delta;
// return *val; // return *val;
TEXT runtime·xadd(SB), NOSPLIT, $0-20 TEXT runtime·xadd(SB), NOSPLIT, $0-20
MOVD p+0(FP), R4 MOVD ptr+0(FP), R4
MOVW delta+8(FP), R5 MOVW delta+8(FP), R5
SYNC SYNC
LWAR (R4), R3 LWAR (R4), R3
...@@ -493,7 +493,7 @@ TEXT runtime·xadd(SB), NOSPLIT, $0-20 ...@@ -493,7 +493,7 @@ TEXT runtime·xadd(SB), NOSPLIT, $0-20
RETURN RETURN
TEXT runtime·xadd64(SB), NOSPLIT, $0-24 TEXT runtime·xadd64(SB), NOSPLIT, $0-24
MOVD p+0(FP), R4 MOVD ptr+0(FP), R4
MOVD delta+8(FP), R5 MOVD delta+8(FP), R5
SYNC SYNC
LDAR (R4), R3 LDAR (R4), R3
...@@ -506,7 +506,7 @@ TEXT runtime·xadd64(SB), NOSPLIT, $0-24 ...@@ -506,7 +506,7 @@ TEXT runtime·xadd64(SB), NOSPLIT, $0-24
RETURN RETURN
TEXT runtime·xchg(SB), NOSPLIT, $0-20 TEXT runtime·xchg(SB), NOSPLIT, $0-20
MOVD p+0(FP), R4 MOVD ptr+0(FP), R4
MOVW new+8(FP), R5 MOVW new+8(FP), R5
SYNC SYNC
LWAR (R4), R3 LWAR (R4), R3
...@@ -518,7 +518,7 @@ TEXT runtime·xchg(SB), NOSPLIT, $0-20 ...@@ -518,7 +518,7 @@ TEXT runtime·xchg(SB), NOSPLIT, $0-20
RETURN RETURN
TEXT runtime·xchg64(SB), NOSPLIT, $0-24 TEXT runtime·xchg64(SB), NOSPLIT, $0-24
MOVD p+0(FP), R4 MOVD ptr+0(FP), R4
MOVD new+8(FP), R5 MOVD new+8(FP), R5
SYNC SYNC
LDAR (R4), R3 LDAR (R4), R3
...@@ -651,7 +651,7 @@ TEXT runtime·setcallerpc(SB),NOSPLIT,$-8-16 ...@@ -651,7 +651,7 @@ TEXT runtime·setcallerpc(SB),NOSPLIT,$-8-16
RETURN RETURN
TEXT runtime·getcallersp(SB),NOSPLIT,$0-16 TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
MOVD sp+0(FP), R3 MOVD argp+0(FP), R3
SUB $8, R3 SUB $8, R3
MOVD R3, ret+8(FP) MOVD R3, ret+8(FP)
RETURN RETURN
...@@ -695,16 +695,17 @@ TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0 ...@@ -695,16 +695,17 @@ TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0
TEXT runtime·memeq(SB),NOSPLIT,$-8-25 TEXT runtime·memeq(SB),NOSPLIT,$-8-25
MOVD a+0(FP), R3 MOVD a+0(FP), R3
MOVD b+8(FP), R4 MOVD b+8(FP), R4
MOVD count+16(FP), R5 MOVD size+16(FP), R5
SUB $1, R3 SUB $1, R3
SUB $1, R4 SUB $1, R4
ADD R3, R5, R8 ADD R3, R5, R8
loop: loop:
CMP R3, R8 CMP R3, R8
BNE 4(PC) BNE test
MOVD $1, R3 MOVD $1, R3
MOVB R3, ret+24(FP) MOVB R3, ret+24(FP)
RETURN RETURN
test:
MOVBZU 1(R3), R6 MOVBZU 1(R3), R6
MOVBZU 1(R4), R7 MOVBZU 1(R4), R7
CMP R6, R7 CMP R6, R7
...@@ -828,7 +829,7 @@ notfound: ...@@ -828,7 +829,7 @@ notfound:
// in ../../cmd/9g/ggen.c:/^clearfat. // in ../../cmd/9g/ggen.c:/^clearfat.
// R0: always zero // R0: always zero
// R3 (aka REGRT1): ptr to memory to be zeroed - 8 // R3 (aka REGRT1): ptr to memory to be zeroed - 8
// R3 is updated as a side effect. // On return, R3 points to the last zeroed dword.
TEXT runtime·duffzero(SB), NOSPLIT, $-8-0 TEXT runtime·duffzero(SB), NOSPLIT, $-8-0
MOVDU R0, 8(R3) MOVDU R0, 8(R3)
MOVDU R0, 8(R3) MOVDU R0, 8(R3)
...@@ -964,7 +965,7 @@ TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 ...@@ -964,7 +965,7 @@ TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
MOVD g_m(g), R4 MOVD g_m(g), R4
MOVWZ m_fastrand(R4), R3 MOVWZ m_fastrand(R4), R3
ADD R3, R3 ADD R3, R3
CMP R3, $0 CMPW R3, $0
BGE 2(PC) BGE 2(PC)
XOR $0x88888eef, R3 XOR $0x88888eef, R3
MOVW R3, m_fastrand(R4) MOVW R3, m_fastrand(R4)
...@@ -979,3 +980,9 @@ TEXT runtime·return0(SB), NOSPLIT, $0 ...@@ -979,3 +980,9 @@ TEXT runtime·return0(SB), NOSPLIT, $0
// Must obey the gcc calling convention. // Must obey the gcc calling convention.
TEXT _cgo_topofstack(SB),NOSPLIT,$0 TEXT _cgo_topofstack(SB),NOSPLIT,$0
MOVD R0, 26(R0) MOVD R0, 26(R0)
// The top-most function running on a goroutine
// returns to goexit+PCQuantum.
TEXT runtime·goexit(SB),NOSPLIT,$-8-0
MOVD R0, R0 // NOP
BL runtime·goexit1(SB) // does not return
...@@ -137,7 +137,7 @@ func infoBigStruct() []byte { ...@@ -137,7 +137,7 @@ func infoBigStruct() []byte {
BitsScalar, BitsScalar, BitsScalar, BitsScalar, // t int; y uint16; u uint64 BitsScalar, BitsScalar, BitsScalar, BitsScalar, // t int; y uint16; u uint64
BitsPointer, BitsDead, // i string BitsPointer, BitsDead, // i string
} }
case "amd64": case "amd64", "power64", "power64le":
return []byte{ return []byte{
BitsPointer, // q *int BitsPointer, // q *int
BitsScalar, BitsScalar, BitsScalar, // w byte; e [17]byte BitsScalar, BitsScalar, BitsScalar, // w byte; e [17]byte
...@@ -153,12 +153,6 @@ func infoBigStruct() []byte { ...@@ -153,12 +153,6 @@ func infoBigStruct() []byte {
BitsScalar, BitsScalar, BitsDead, BitsScalar, BitsScalar, // t int; y uint16; u uint64 BitsScalar, BitsScalar, BitsDead, BitsScalar, BitsScalar, // t int; y uint16; u uint64
BitsPointer, BitsDead, // i string BitsPointer, BitsDead, // i string
} }
case "power64", "power64le":
return []byte{
BitsPointer, BitsScalar, BitsScalar, BitsScalar,
BitsMultiWord, BitsSlice, BitsScalar, BitsScalar,
BitsScalar, BitsScalar, BitsMultiWord, BitsString,
}
default: default:
panic("unknown arch") panic("unknown arch")
} }
......
...@@ -122,6 +122,7 @@ ...@@ -122,6 +122,7 @@
enum { enum {
Debug = 0, Debug = 0,
DebugPtrs = 0, // if 1, print trace of every pointer load during GC
ConcurrentSweep = 1, ConcurrentSweep = 1,
FinBlockSize = 4*1024, FinBlockSize = 4*1024,
......
...@@ -69,7 +69,7 @@ runtime·recovery_m(G *gp) ...@@ -69,7 +69,7 @@ runtime·recovery_m(G *gp)
// each call to deferproc. // each call to deferproc.
// (The pc we're returning to does pop pop // (The pc we're returning to does pop pop
// before it tests the return value.) // before it tests the return value.)
// On the arm there are 2 saved LRs mixed in too. // On the arm and power there are 2 saved LRs mixed in too.
if(thechar == '5' || thechar == '9') if(thechar == '5' || thechar == '9')
gp->sched.sp = (uintptr)argp - 4*sizeof(uintptr); gp->sched.sp = (uintptr)argp - 4*sizeof(uintptr);
else else
......
...@@ -226,6 +226,12 @@ runtime·check(void) ...@@ -226,6 +226,12 @@ runtime·check(void)
if(z != 4) if(z != 4)
runtime·throw("cas4"); runtime·throw("cas4");
z = 0xffffffff;
if(!runtime·cas(&z, 0xffffffff, 0xfffffffe))
runtime·throw("cas5");
if(z != 0xfffffffe)
runtime·throw("cas6");
k = (byte*)0xfedcb123; k = (byte*)0xfedcb123;
if(sizeof(void*) == 8) if(sizeof(void*) == 8)
k = (byte*)((uintptr)k<<10); k = (byte*)((uintptr)k<<10);
......
...@@ -124,7 +124,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp) ...@@ -124,7 +124,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
if(runtime·gotraceback(&crash)){ if(runtime·gotraceback(&crash)){
runtime·goroutineheader(gp); runtime·goroutineheader(gp);
runtime·traceback(SIG_PC(info, ctxt), SIG_SP(info, ctxt), SIG_LINK(info, ctxt), gp); runtime·tracebacktrap(SIG_PC(info, ctxt), SIG_SP(info, ctxt), SIG_LINK(info, ctxt), gp);
runtime·tracebackothers(gp); runtime·tracebackothers(gp);
runtime·printf("\n"); runtime·printf("\n");
runtime·dumpregs(info, ctxt); runtime·dumpregs(info, ctxt);
......
// runoutput
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Check that {5,6,8,9}g/ggen.c:clearfat is zeroing the entire object.
package main
import (
"bytes"
"fmt"
"strconv"
"strings"
)
const ntest = 1100
func main() {
var decls, calls bytes.Buffer
for i := 1; i <= ntest; i++ {
s := strconv.Itoa(i)
decls.WriteString(strings.Replace(decl, "$", s, -1))
calls.WriteString(strings.Replace("poison$()\n\tclearfat$()\n\t", "$", s, -1))
}
program = strings.Replace(program, "$DECLS", decls.String(), 1)
program = strings.Replace(program, "$CALLS", calls.String(), 1)
fmt.Print(program)
}
var program = `package main
var count int
$DECLS
func main() {
$CALLS
if count != 0 {
println("failed", count, "case(s)")
}
}
`
const decl = `
func poison$() {
// Grow and poison the stack space that will be used by clearfat$
var t [2*$]byte
for i := range t {
t[i] = 0xff
}
}
func clearfat$() {
var t [$]byte
for _, x := range t {
if x != 0 {
// println("clearfat$: index", i, "expected 0, got", x)
count++
break
}
}
}
`
...@@ -634,8 +634,8 @@ func newT40() *T40 { ...@@ -634,8 +634,8 @@ func newT40() *T40 {
func bad40() { func bad40() {
t := newT40() t := newT40()
printnl()
_ = t _ = t
printnl()
} }
func good40() { func good40() {
......
// errorcheck -0 -d=nil // errorcheck -0 -d=nil
// Fails on power64x because of incomplete optimization. See issue 9058.
// +build !power64,!power64le
// Copyright 2013 The Go Authors. All rights reserved. // Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment