Commit dbf96add authored by Russ Cox's avatar Russ Cox

cmd/gc: move flow graph into portable opt

Now there's only one copy of the flow graph construction
and dominator computation, and different optimizations
can attach different annotations to the instructions.

R=ken2
CC=golang-dev
https://golang.org/cl/12797045
parent 954d1474
...@@ -55,6 +55,7 @@ typedef struct Rgn Rgn; ...@@ -55,6 +55,7 @@ typedef struct Rgn Rgn;
// r->prog->opt points back to r. // r->prog->opt points back to r.
struct Reg struct Reg
{ {
Flow f;
Bits set; // variables written by this instruction. Bits set; // variables written by this instruction.
Bits use1; // variables read by prog->from. Bits use1; // variables read by prog->from.
...@@ -68,19 +69,6 @@ struct Reg ...@@ -68,19 +69,6 @@ struct Reg
Bits act; Bits act;
int32 regu; // register used bitmap int32 regu; // register used bitmap
int32 rpo; // reverse post ordering
int32 active;
uint16 loop; // x5 for every loop
uchar refset; // diagnostic generated
Reg* p1; // predecessors of this instruction: p1,
Reg* p2; // and then p2 linked though p2link.
Reg* p2link;
Reg* s1; // successors of this instruction (at most two: s1 and s2).
Reg* s2;
Reg* link; // next instruction in function code
Prog* prog; // actual instruction
}; };
#define R ((Reg*)0) #define R ((Reg*)0)
...@@ -96,7 +84,6 @@ struct Rgn ...@@ -96,7 +84,6 @@ struct Rgn
EXTERN int32 exregoffset; // not set EXTERN int32 exregoffset; // not set
EXTERN int32 exfregoffset; // not set EXTERN int32 exfregoffset; // not set
EXTERN Reg* firstr; EXTERN Reg* firstr;
EXTERN Reg* lastr;
EXTERN Reg zreg; EXTERN Reg zreg;
EXTERN Reg* freer; EXTERN Reg* freer;
EXTERN Reg** rpo2r; EXTERN Reg** rpo2r;
...@@ -134,34 +121,21 @@ void regopt(Prog*); ...@@ -134,34 +121,21 @@ void regopt(Prog*);
void addmove(Reg*, int, int, int); void addmove(Reg*, int, int, int);
Bits mkvar(Reg *r, Adr *a); Bits mkvar(Reg *r, Adr *a);
void prop(Reg*, Bits, Bits); void prop(Reg*, Bits, Bits);
void loopit(Reg*, int32);
void synch(Reg*, Bits); void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*); uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int); void paint1(Reg*, int);
uint32 paint2(Reg*, int); uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int); void paint3(Reg*, int, int32, int);
void addreg(Adr*, int); void addreg(Adr*, int);
void dumpit(char *str, Reg *r0); void dumpit(char *str, Flow *r0, int);
/* /*
* peep.c * peep.c
*/ */
void peep(void); void peep(Prog*);
void excise(Reg*); void excise(Flow*);
Reg* uniqp(Reg*);
Reg* uniqs(Reg*);
int regtyp(Adr*);
int anyvar(Adr*);
int subprop(Reg*);
int copyprop(Reg*);
int copy1(Adr*, Adr*, Reg*, int);
int copyu(Prog*, Adr*, Adr*); int copyu(Prog*, Adr*, Adr*);
int copyas(Adr*, Adr*);
int copyau(Adr*, Adr*);
int copysub(Adr*, Adr*, Adr*, int);
int copysub1(Prog*, Adr*, Adr*, int);
int32 RtoB(int); int32 RtoB(int);
int32 FtoB(int); int32 FtoB(int);
int BtoR(int32); int BtoR(int32);
......
...@@ -34,57 +34,43 @@ ...@@ -34,57 +34,43 @@
#include "gg.h" #include "gg.h"
#include "opt.h" #include "opt.h"
int xtramodes(Reg*, Adr*); static int xtramodes(Graph*, Flow*, Adr*);
int shortprop(Reg *r); static int shortprop(Flow *r);
int shiftprop(Reg *r); static int regtyp(Adr*);
void constprop(Adr *c1, Adr *v1, Reg *r); static int subprop(Flow*);
static int copyprop(Graph*, Flow*);
Reg* findpre(Reg *r, Adr *v); static int copy1(Adr*, Adr*, Flow*, int);
void predicate(void); static int copyas(Adr*, Adr*);
int copyau1(Prog *p, Adr *v); static int copyau(Adr*, Adr*);
int isdconst(Addr *a); static int copysub(Adr*, Adr*, Adr*, int);
static int copysub1(Prog*, Adr*, Adr*, int);
static Flow* findpre(Flow *r, Adr *v);
static int copyau1(Prog *p, Adr *v);
static int isdconst(Addr *a);
// UNUSED
int shiftprop(Flow *r);
void constprop(Adr *c1, Adr *v1, Flow *r);
void predicate(Graph*);
void void
peep(void) peep(Prog *firstp)
{ {
Reg *r, *r1, *r2; Flow *r;
Graph *g;
Prog *p; Prog *p;
int t; int t;
ProgInfo info;
/*
* complete R structure
*/
for(r=firstr; r!=R; r=r1) {
r1 = r->link;
if(r1 == R)
break;
for(p = r->prog->link; p != r1->prog; p = p->link) {
proginfo(&info, p);
if(info.flags & Skip)
continue;
r2 = rega();
r->link = r2;
r2->link = r1;
r2->prog = p; g = flowstart(firstp, sizeof(Flow));
p->opt = r2; if(g == nil)
return;
r2->p1 = r;
r->s1 = r2;
r2->s1 = r1;
r1->p1 = r2;
r = r2;
}
}
//dumpit("begin", firstr);
loop1: loop1:
if(debug['P'] && debug['v'])
dumpit("loop1", g->start, 0);
t = 0; t = 0;
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
switch(p->as) { switch(p->as) {
case ASLL: case ASLL:
...@@ -108,12 +94,12 @@ loop1: ...@@ -108,12 +94,12 @@ loop1:
if(regtyp(&p->from)) if(regtyp(&p->from))
if(p->from.type == p->to.type) if(p->from.type == p->to.type)
if(p->scond == C_SCOND_NONE) { if(p->scond == C_SCOND_NONE) {
if(copyprop(r)) { if(copyprop(g, r)) {
excise(r); excise(r);
t++; t++;
break; break;
} }
if(subprop(r) && copyprop(r)) { if(subprop(r) && copyprop(g, r)) {
excise(r); excise(r);
t++; t++;
break; break;
...@@ -144,7 +130,7 @@ loop1: ...@@ -144,7 +130,7 @@ loop1:
if(t) if(t)
goto loop1; goto loop1;
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
switch(p->as) { switch(p->as) {
case AEOR: case AEOR:
...@@ -164,7 +150,7 @@ loop1: ...@@ -164,7 +150,7 @@ loop1:
} }
} }
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
switch(p->as) { switch(p->as) {
case AMOVW: case AMOVW:
...@@ -172,10 +158,10 @@ loop1: ...@@ -172,10 +158,10 @@ loop1:
case AMOVBS: case AMOVBS:
case AMOVBU: case AMOVBU:
if(p->from.type == D_OREG && p->from.offset == 0) if(p->from.type == D_OREG && p->from.offset == 0)
xtramodes(r, &p->from); xtramodes(g, r, &p->from);
else else
if(p->to.type == D_OREG && p->to.offset == 0) if(p->to.type == D_OREG && p->to.offset == 0)
xtramodes(r, &p->to); xtramodes(g, r, &p->to);
else else
continue; continue;
break; break;
...@@ -186,7 +172,7 @@ loop1: ...@@ -186,7 +172,7 @@ loop1:
// if(isdconst(&p->from) || p->from.offset != 0) // if(isdconst(&p->from) || p->from.offset != 0)
// continue; // continue;
// r2 = r->s1; // r2 = r->s1;
// if(r2 == R) // if(r2 == nil)
// continue; // continue;
// t = r2->prog->as; // t = r2->prog->as;
// switch(t) { // switch(t) {
...@@ -213,8 +199,8 @@ loop1: ...@@ -213,8 +199,8 @@ loop1:
// r1 = r; // r1 = r;
// do // do
// r1 = uniqp(r1); // r1 = uniqp(r1);
// while (r1 != R && r1->prog->as == ANOP); // while (r1 != nil && r1->prog->as == ANOP);
// if(r1 == R) // if(r1 == nil)
// continue; // continue;
// p1 = r1->prog; // p1 = r1->prog;
// if(p1->to.type != D_REG) // if(p1->to.type != D_REG)
...@@ -249,47 +235,10 @@ loop1: ...@@ -249,47 +235,10 @@ loop1:
} }
} }
// predicate(); // predicate(g);
}
/*
* uniqp returns a "unique" predecessor to instruction r.
* If the instruction is the first one or has multiple
* predecessors due to jump, R is returned.
*/
Reg*
uniqp(Reg *r)
{
Reg *r1;
r1 = r->p1;
if(r1 == R) {
r1 = r->p2;
if(r1 == R || r1->p2link != R)
return R;
} else
if(r->p2 != R)
return R;
return r1;
}
Reg*
uniqs(Reg *r)
{
Reg *r1;
r1 = r->s1;
if(r1 == R) {
r1 = r->s2;
if(r1 == R)
return R;
} else
if(r->s2 != R)
return R;
return r1;
} }
int static int
regtyp(Adr *a) regtyp(Adr *a)
{ {
...@@ -314,12 +263,12 @@ regtyp(Adr *a) ...@@ -314,12 +263,12 @@ regtyp(Adr *a)
* hopefully, then the former or latter MOV * hopefully, then the former or latter MOV
* will be eliminated by copy propagation. * will be eliminated by copy propagation.
*/ */
int static int
subprop(Reg *r0) subprop(Flow *r0)
{ {
Prog *p; Prog *p;
Adr *v1, *v2; Adr *v1, *v2;
Reg *r; Flow *r;
int t; int t;
ProgInfo info; ProgInfo info;
...@@ -330,8 +279,8 @@ subprop(Reg *r0) ...@@ -330,8 +279,8 @@ subprop(Reg *r0)
v2 = &p->to; v2 = &p->to;
if(!regtyp(v2)) if(!regtyp(v2))
return 0; return 0;
for(r=uniqp(r0); r!=R; r=uniqp(r)) { for(r=uniqp(r0); r!=nil; r=uniqp(r)) {
if(uniqs(r) == R) if(uniqs(r) == nil)
break; break;
p = r->prog; p = r->prog;
proginfo(&info, p); proginfo(&info, p);
...@@ -405,25 +354,25 @@ gotit: ...@@ -405,25 +354,25 @@ gotit:
* set v1 F=1 * set v1 F=1
* set v2 return success * set v2 return success
*/ */
int static int
copyprop(Reg *r0) copyprop(Graph *g, Flow *r0)
{ {
Prog *p; Prog *p;
Adr *v1, *v2; Adr *v1, *v2;
Reg *r; Flow *r;
p = r0->prog; p = r0->prog;
v1 = &p->from; v1 = &p->from;
v2 = &p->to; v2 = &p->to;
if(copyas(v1, v2)) if(copyas(v1, v2))
return 1; return 1;
for(r=firstr; r!=R; r=r->link) for(r=g->start; r!=nil; r=r->link)
r->active = 0; r->active = 0;
return copy1(v1, v2, r0->s1, 0); return copy1(v1, v2, r0->s1, 0);
} }
int static int
copy1(Adr *v1, Adr *v2, Reg *r, int f) copy1(Adr *v1, Adr *v2, Flow *r, int f)
{ {
int t; int t;
Prog *p; Prog *p;
...@@ -436,11 +385,11 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f) ...@@ -436,11 +385,11 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f)
r->active = 1; r->active = 1;
if(debug['P']) if(debug['P'])
print("copy %D->%D f=%d\n", v1, v2, f); print("copy %D->%D f=%d\n", v1, v2, f);
for(; r != R; r = r->s1) { for(; r != nil; r = r->s1) {
p = r->prog; p = r->prog;
if(debug['P']) if(debug['P'])
print("%P", p); print("%P", p);
if(!f && uniqp(r) == R) { if(!f && uniqp(r) == nil) {
f = 1; f = 1;
if(debug['P']) if(debug['P'])
print("; merge; f=%d", f); print("; merge; f=%d", f);
...@@ -499,6 +448,7 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f) ...@@ -499,6 +448,7 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f)
return 1; return 1;
} }
// UNUSED
/* /*
* The idea is to remove redundant constants. * The idea is to remove redundant constants.
* $c1->v1 * $c1->v1
...@@ -507,17 +457,17 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f) ...@@ -507,17 +457,17 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f)
* The v1->v2 should be eliminated by copy propagation. * The v1->v2 should be eliminated by copy propagation.
*/ */
void void
constprop(Adr *c1, Adr *v1, Reg *r) constprop(Adr *c1, Adr *v1, Flow *r)
{ {
Prog *p; Prog *p;
if(debug['P']) if(debug['P'])
print("constprop %D->%D\n", c1, v1); print("constprop %D->%D\n", c1, v1);
for(; r != R; r = r->s1) { for(; r != nil; r = r->s1) {
p = r->prog; p = r->prog;
if(debug['P']) if(debug['P'])
print("%P", p); print("%P", p);
if(uniqp(r) == R) { if(uniqp(r) == nil) {
if(debug['P']) if(debug['P'])
print("; merge; return\n"); print("; merge; return\n");
return; return;
...@@ -541,27 +491,27 @@ constprop(Adr *c1, Adr *v1, Reg *r) ...@@ -541,27 +491,27 @@ constprop(Adr *c1, Adr *v1, Reg *r)
/* /*
* shortprop eliminates redundant zero/sign extensions. * shortprop eliminates redundant zero/sign extensions.
* *
* MOVBS x, R * MOVBS x, nil
* <no use R> * <no use nil>
* MOVBS R, R' * MOVBS nil, nil'
* *
* changed to * changed to
* *
* MOVBS x, R * MOVBS x, nil
* ... * ...
* MOVB R, R' (compiled to mov) * MOVB nil, nil' (compiled to mov)
* *
* MOVBS above can be a MOVBS, MOVBU, MOVHS or MOVHU. * MOVBS above can be a MOVBS, MOVBU, MOVHS or MOVHU.
*/ */
int static int
shortprop(Reg *r) shortprop(Flow *r)
{ {
Prog *p, *p1; Prog *p, *p1;
Reg *r1; Flow *r1;
p = r->prog; p = r->prog;
r1 = findpre(r, &p->from); r1 = findpre(r, &p->from);
if(r1 == R) if(r1 == nil)
return 0; return 0;
p1 = r1->prog; p1 = r1->prog;
...@@ -596,6 +546,7 @@ gotit: ...@@ -596,6 +546,7 @@ gotit:
return 1; return 1;
} }
// UNUSED
/* /*
* ASLL x,y,w * ASLL x,y,w
* .. (not use w, not set x y w) * .. (not use w, not set x y w)
...@@ -609,9 +560,9 @@ gotit: ...@@ -609,9 +560,9 @@ gotit:
*/ */
#define FAIL(msg) { if(debug['P']) print("\t%s; FAILURE\n", msg); return 0; } #define FAIL(msg) { if(debug['P']) print("\t%s; FAILURE\n", msg); return 0; }
int int
shiftprop(Reg *r) shiftprop(Flow *r)
{ {
Reg *r1; Flow *r1;
Prog *p, *p1, *p2; Prog *p, *p1, *p2;
int n, o; int n, o;
Adr a; Adr a;
...@@ -631,9 +582,9 @@ shiftprop(Reg *r) ...@@ -631,9 +582,9 @@ shiftprop(Reg *r)
for(;;) { for(;;) {
/* find first use of shift result; abort if shift operands or result are changed */ /* find first use of shift result; abort if shift operands or result are changed */
r1 = uniqs(r1); r1 = uniqs(r1);
if(r1 == R) if(r1 == nil)
FAIL("branch"); FAIL("branch");
if(uniqp(r1) == R) if(uniqp(r1) == nil)
FAIL("merge"); FAIL("merge");
p1 = r1->prog; p1 = r1->prog;
if(debug['P']) if(debug['P'])
...@@ -704,7 +655,7 @@ shiftprop(Reg *r) ...@@ -704,7 +655,7 @@ shiftprop(Reg *r)
if(p1->to.reg != n) if(p1->to.reg != n)
for (;;) { for (;;) {
r1 = uniqs(r1); r1 = uniqs(r1);
if(r1 == R) if(r1 == nil)
FAIL("inconclusive"); FAIL("inconclusive");
p1 = r1->prog; p1 = r1->prog;
if(debug['P']) if(debug['P'])
...@@ -757,40 +708,40 @@ shiftprop(Reg *r) ...@@ -757,40 +708,40 @@ shiftprop(Reg *r)
* before r. It must be a set, and there must be * before r. It must be a set, and there must be
* a unique path from that instruction to r. * a unique path from that instruction to r.
*/ */
Reg* static Flow*
findpre(Reg *r, Adr *v) findpre(Flow *r, Adr *v)
{ {
Reg *r1; Flow *r1;
for(r1=uniqp(r); r1!=R; r=r1,r1=uniqp(r)) { for(r1=uniqp(r); r1!=nil; r=r1,r1=uniqp(r)) {
if(uniqs(r1) != r) if(uniqs(r1) != r)
return R; return nil;
switch(copyu(r1->prog, v, A)) { switch(copyu(r1->prog, v, A)) {
case 1: /* used */ case 1: /* used */
case 2: /* read-alter-rewrite */ case 2: /* read-alter-rewrite */
return R; return nil;
case 3: /* set */ case 3: /* set */
case 4: /* set and used */ case 4: /* set and used */
return r1; return r1;
} }
} }
return R; return nil;
} }
/* /*
* findinc finds ADD instructions with a constant * findinc finds ADD instructions with a constant
* argument which falls within the immed_12 range. * argument which falls within the immed_12 range.
*/ */
Reg* static Flow*
findinc(Reg *r, Reg *r2, Adr *v) findinc(Flow *r, Flow *r2, Adr *v)
{ {
Reg *r1; Flow *r1;
Prog *p; Prog *p;
for(r1=uniqs(r); r1!=R && r1!=r2; r=r1,r1=uniqs(r)) { for(r1=uniqs(r); r1!=nil && r1!=r2; r=r1,r1=uniqs(r)) {
if(uniqp(r1) != r) if(uniqp(r1) != r)
return R; return nil;
switch(copyu(r1->prog, v, A)) { switch(copyu(r1->prog, v, A)) {
case 0: /* not touched */ case 0: /* not touched */
continue; continue;
...@@ -801,14 +752,14 @@ findinc(Reg *r, Reg *r2, Adr *v) ...@@ -801,14 +752,14 @@ findinc(Reg *r, Reg *r2, Adr *v)
if(p->from.offset > -4096 && p->from.offset < 4096) if(p->from.offset > -4096 && p->from.offset < 4096)
return r1; return r1;
default: default:
return R; return nil;
} }
} }
return R; return nil;
} }
int static int
nochange(Reg *r, Reg *r2, Prog *p) nochange(Flow *r, Flow *r2, Prog *p)
{ {
Adr a[3]; Adr a[3];
int i, n; int i, n;
...@@ -830,7 +781,7 @@ nochange(Reg *r, Reg *r2, Prog *p) ...@@ -830,7 +781,7 @@ nochange(Reg *r, Reg *r2, Prog *p)
} }
if(n == 0) if(n == 0)
return 1; return 1;
for(; r!=R && r!=r2; r=uniqs(r)) { for(; r!=nil && r!=r2; r=uniqs(r)) {
p = r->prog; p = r->prog;
for(i=0; i<n; i++) for(i=0; i<n; i++)
if(copyu(p, &a[i], A) > 1) if(copyu(p, &a[i], A) > 1)
...@@ -839,10 +790,10 @@ nochange(Reg *r, Reg *r2, Prog *p) ...@@ -839,10 +790,10 @@ nochange(Reg *r, Reg *r2, Prog *p)
return 1; return 1;
} }
int static int
findu1(Reg *r, Adr *v) findu1(Flow *r, Adr *v)
{ {
for(; r != R; r = r->s1) { for(; r != nil; r = r->s1) {
if(r->active) if(r->active)
return 0; return 0;
r->active = 1; r->active = 1;
...@@ -861,12 +812,12 @@ findu1(Reg *r, Adr *v) ...@@ -861,12 +812,12 @@ findu1(Reg *r, Adr *v)
return 0; return 0;
} }
int static int
finduse(Reg *r, Adr *v) finduse(Graph *g, Flow *r, Adr *v)
{ {
Reg *r1; Flow *r1;
for(r1=firstr; r1!=R; r1=r1->link) for(r1=g->start; r1!=nil; r1=r1->link)
r1->active = 0; r1->active = 0;
return findu1(r, v); return findu1(r, v);
} }
...@@ -884,10 +835,10 @@ finduse(Reg *r, Adr *v) ...@@ -884,10 +835,10 @@ finduse(Reg *r, Adr *v)
* into * into
* MOVBU R0<<0(R1),R0 * MOVBU R0<<0(R1),R0
*/ */
int static int
xtramodes(Reg *r, Adr *a) xtramodes(Graph *g, Flow *r, Adr *a)
{ {
Reg *r1, *r2, *r3; Flow *r1, *r2, *r3;
Prog *p, *p1; Prog *p, *p1;
Adr v; Adr v;
...@@ -895,7 +846,7 @@ xtramodes(Reg *r, Adr *a) ...@@ -895,7 +846,7 @@ xtramodes(Reg *r, Adr *a)
v = *a; v = *a;
v.type = D_REG; v.type = D_REG;
r1 = findpre(r, &v); r1 = findpre(r, &v);
if(r1 != R) { if(r1 != nil) {
p1 = r1->prog; p1 = r1->prog;
if(p1->to.type == D_REG && p1->to.reg == v.reg) if(p1->to.type == D_REG && p1->to.reg == v.reg)
switch(p1->as) { switch(p1->as) {
...@@ -910,7 +861,7 @@ xtramodes(Reg *r, Adr *a) ...@@ -910,7 +861,7 @@ xtramodes(Reg *r, Adr *a)
p1->from.offset > -4096 && p1->from.offset < 4096)) p1->from.offset > -4096 && p1->from.offset < 4096))
if(nochange(uniqs(r1), r, p1)) { if(nochange(uniqs(r1), r, p1)) {
if(a != &p->from || v.reg != p->to.reg) if(a != &p->from || v.reg != p->to.reg)
if (finduse(r->s1, &v)) { if (finduse(g, r->s1, &v)) {
if(p1->reg == NREG || p1->reg == v.reg) if(p1->reg == NREG || p1->reg == v.reg)
/* pre-indexing */ /* pre-indexing */
p->scond |= C_WBIT; p->scond |= C_WBIT;
...@@ -938,7 +889,7 @@ xtramodes(Reg *r, Adr *a) ...@@ -938,7 +889,7 @@ xtramodes(Reg *r, Adr *a)
break; break;
case AMOVW: case AMOVW:
if(p1->from.type == D_REG) if(p1->from.type == D_REG)
if((r2 = findinc(r1, r, &p1->from)) != R) { if((r2 = findinc(r1, r, &p1->from)) != nil) {
for(r3=uniqs(r2); r3->prog->as==ANOP; r3=uniqs(r3)) for(r3=uniqs(r2); r3->prog->as==ANOP; r3=uniqs(r3))
; ;
if(r3 == r) { if(r3 == r) {
...@@ -947,7 +898,7 @@ xtramodes(Reg *r, Adr *a) ...@@ -947,7 +898,7 @@ xtramodes(Reg *r, Adr *a)
a->reg = p1->to.reg; a->reg = p1->to.reg;
a->offset = p1->from.offset; a->offset = p1->from.offset;
p->scond |= C_PBIT; p->scond |= C_PBIT;
if(!finduse(r, &r1->prog->to)) if(!finduse(g, r, &r1->prog->to))
excise(r1); excise(r1);
excise(r2); excise(r2);
return 1; return 1;
...@@ -957,7 +908,7 @@ xtramodes(Reg *r, Adr *a) ...@@ -957,7 +908,7 @@ xtramodes(Reg *r, Adr *a)
} }
} }
if(a != &p->from || a->reg != p->to.reg) if(a != &p->from || a->reg != p->to.reg)
if((r1 = findinc(r, R, &v)) != R) { if((r1 = findinc(r, nil, &v)) != nil) {
/* post-indexing */ /* post-indexing */
p1 = r1->prog; p1 = r1->prog;
a->offset = p1->from.offset; a->offset = p1->from.offset;
...@@ -1218,7 +1169,7 @@ copyu(Prog *p, Adr *v, Adr *s) ...@@ -1218,7 +1169,7 @@ copyu(Prog *p, Adr *v, Adr *s)
* could be set/use depending on * could be set/use depending on
* semantics * semantics
*/ */
int static int
copyas(Adr *a, Adr *v) copyas(Adr *a, Adr *v)
{ {
...@@ -1241,7 +1192,7 @@ copyas(Adr *a, Adr *v) ...@@ -1241,7 +1192,7 @@ copyas(Adr *a, Adr *v)
/* /*
* either direct or indirect * either direct or indirect
*/ */
int static int
copyau(Adr *a, Adr *v) copyau(Adr *a, Adr *v)
{ {
...@@ -1282,7 +1233,7 @@ copyau(Adr *a, Adr *v) ...@@ -1282,7 +1233,7 @@ copyau(Adr *a, Adr *v)
* ADD r,r,r * ADD r,r,r
* CMP r,r, * CMP r,r,
*/ */
int static int
copyau1(Prog *p, Adr *v) copyau1(Prog *p, Adr *v)
{ {
...@@ -1307,7 +1258,7 @@ copyau1(Prog *p, Adr *v) ...@@ -1307,7 +1258,7 @@ copyau1(Prog *p, Adr *v)
* substitute s for v in a * substitute s for v in a
* return failure to substitute * return failure to substitute
*/ */
int static int
copysub(Adr *a, Adr *v, Adr *s, int f) copysub(Adr *a, Adr *v, Adr *s, int f)
{ {
...@@ -1330,7 +1281,7 @@ copysub(Adr *a, Adr *v, Adr *s, int f) ...@@ -1330,7 +1281,7 @@ copysub(Adr *a, Adr *v, Adr *s, int f)
return 0; return 0;
} }
int static int
copysub1(Prog *p1, Adr *v, Adr *s, int f) copysub1(Prog *p1, Adr *v, Adr *s, int f)
{ {
...@@ -1365,9 +1316,9 @@ struct { ...@@ -1365,9 +1316,9 @@ struct {
}; };
typedef struct { typedef struct {
Reg *start; Flow *start;
Reg *last; Flow *last;
Reg *end; Flow *end;
int len; int len;
} Joininfo; } Joininfo;
...@@ -1387,13 +1338,13 @@ enum { ...@@ -1387,13 +1338,13 @@ enum {
Keepbranch Keepbranch
}; };
int static int
isbranch(Prog *p) isbranch(Prog *p)
{ {
return (ABEQ <= p->as) && (p->as <= ABLE); return (ABEQ <= p->as) && (p->as <= ABLE);
} }
int static int
predicable(Prog *p) predicable(Prog *p)
{ {
switch(p->as) { switch(p->as) {
...@@ -1423,7 +1374,7 @@ predicable(Prog *p) ...@@ -1423,7 +1374,7 @@ predicable(Prog *p)
* *
* C_SBIT may also have been set explicitly in p->scond. * C_SBIT may also have been set explicitly in p->scond.
*/ */
int static int
modifiescpsr(Prog *p) modifiescpsr(Prog *p)
{ {
switch(p->as) { switch(p->as) {
...@@ -1452,8 +1403,8 @@ modifiescpsr(Prog *p) ...@@ -1452,8 +1403,8 @@ modifiescpsr(Prog *p)
* Find the maximal chain of instructions starting with r which could * Find the maximal chain of instructions starting with r which could
* be executed conditionally * be executed conditionally
*/ */
int static int
joinsplit(Reg *r, Joininfo *j) joinsplit(Flow *r, Joininfo *j)
{ {
j->start = r; j->start = r;
j->last = r; j->last = r;
...@@ -1488,8 +1439,8 @@ joinsplit(Reg *r, Joininfo *j) ...@@ -1488,8 +1439,8 @@ joinsplit(Reg *r, Joininfo *j)
return Toolong; return Toolong;
} }
Reg* static Flow*
successor(Reg *r) successor(Flow *r)
{ {
if(r->s1) if(r->s1)
return r->s1; return r->s1;
...@@ -1497,11 +1448,11 @@ successor(Reg *r) ...@@ -1497,11 +1448,11 @@ successor(Reg *r)
return r->s2; return r->s2;
} }
void static void
applypred(Reg *rstart, Joininfo *j, int cond, int branch) applypred(Flow *rstart, Joininfo *j, int cond, int branch)
{ {
int pred; int pred;
Reg *r; Flow *r;
if(j->len == 0) if(j->len == 0)
return; return;
...@@ -1534,13 +1485,13 @@ applypred(Reg *rstart, Joininfo *j, int cond, int branch) ...@@ -1534,13 +1485,13 @@ applypred(Reg *rstart, Joininfo *j, int cond, int branch)
} }
void void
predicate(void) predicate(Graph *g)
{ {
Reg *r; Flow *r;
int t1, t2; int t1, t2;
Joininfo j1, j2; Joininfo j1, j2;
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
if (isbranch(r->prog)) { if (isbranch(r->prog)) {
t1 = joinsplit(r->s1, &j1); t1 = joinsplit(r->s1, &j1);
t2 = joinsplit(r->s2, &j2); t2 = joinsplit(r->s2, &j2);
...@@ -1563,7 +1514,7 @@ predicate(void) ...@@ -1563,7 +1514,7 @@ predicate(void)
} }
} }
int static int
isdconst(Addr *a) isdconst(Addr *a)
{ {
if(a->type == D_CONST && a->reg == NREG) if(a->type == D_CONST && a->reg == NREG)
......
...@@ -38,23 +38,7 @@ ...@@ -38,23 +38,7 @@
#define REGBITS ((uint32)0xffffffff) #define REGBITS ((uint32)0xffffffff)
void addsplits(void); void addsplits(void);
static int first = 0; static int first = 1;
Reg*
rega(void)
{
Reg *r;
r = freer;
if(r == R) {
r = mal(sizeof(*r));
} else
freer = r->link;
*r = zreg;
return r;
}
int int
rcmp(const void *a1, const void *a2) rcmp(const void *a1, const void *a2)
...@@ -96,7 +80,7 @@ setoutvar(void) ...@@ -96,7 +80,7 @@ setoutvar(void)
} }
void void
excise(Reg *r) excise(Flow *r)
{ {
Prog *p; Prog *p;
...@@ -173,40 +157,19 @@ regopt(Prog *firstp) ...@@ -173,40 +157,19 @@ regopt(Prog *firstp)
{ {
Reg *r, *r1; Reg *r, *r1;
Prog *p; Prog *p;
int i, z, nr; Graph *g;
int i, z;
uint32 vreg; uint32 vreg;
Bits bit; Bits bit;
ProgInfo info, info2; ProgInfo info;
if(first == 0) { if(first) {
fmtinstall('Q', Qconv); fmtinstall('Q', Qconv);
first = 0;
} }
fixjmp(firstp); fixjmp(firstp);
first++;
if(debug['K']) {
if(first != 13)
return;
// debug['R'] = 2;
// debug['P'] = 2;
print("optimizing %S\n", curfn->nname->sym);
}
// count instructions
nr = 0;
for(p=firstp; p!=P; p=p->link)
nr++;
// if too big dont bother
if(nr >= 10000) {
// print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
return;
}
firstr = R;
lastr = R;
/* /*
* control flow is more complicated in generated go code * control flow is more complicated in generated go code
* than in generated c code. define pseudo-variables for * than in generated c code. define pseudo-variables for
...@@ -238,34 +201,14 @@ regopt(Prog *firstp) ...@@ -238,34 +201,14 @@ regopt(Prog *firstp)
* allocate pcs * allocate pcs
* find use and set of variables * find use and set of variables
*/ */
nr = 0; g = flowstart(firstp, sizeof(Reg));
for(p=firstp; p != P; p = p->link) { if(g == nil)
proginfo(&info, p); return;
if(info.flags & Skip) firstr = (Reg*)g->start;
continue;
r = rega();
nr++;
if(firstr == R) {
firstr = r;
lastr = r;
} else {
lastr->link = r;
r->p1 = lastr;
lastr->s1 = r;
lastr = r;
}
r->prog = p;
p->opt = r;
r1 = r->p1; for(r = firstr; r != R; r = (Reg*)r->f.link) {
if(r1 != R) { p = r->f.prog;
proginfo(&info2, r1->prog); proginfo(&info, p);
if(info2.flags & Break) {
r->p1 = R;
r1->s1 = R;
}
}
// Avoid making variables for direct-called functions. // Avoid making variables for direct-called functions.
if(p->as == ABL && p->to.type == D_EXTERN) if(p->as == ABL && p->to.type == D_EXTERN)
...@@ -313,50 +256,19 @@ regopt(Prog *firstp) ...@@ -313,50 +256,19 @@ regopt(Prog *firstp)
} }
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass1", firstr); dumpit("pass1", &firstr->f, 1);
/* /*
* pass 2 * pass 2
* turn branch references to pointers
* build back pointers
*/
for(r=firstr; r!=R; r=r->link) {
p = r->prog;
if(p->to.type == D_BRANCH) {
if(p->to.u.branch == P)
fatal("pnil %P", p);
r1 = p->to.u.branch->opt;
if(r1 == R)
fatal("rnil %P", p);
if(r1 == r) {
//fatal("ref to self %P", p);
continue;
}
r->s2 = r1;
r->p2link = r1->p2;
r1->p2 = r;
}
}
if(debug['R']) {
p = firstr->prog;
print("\n%L %D\n", p->lineno, &p->from);
print(" addr = %Q\n", addrs);
}
if(debug['R'] && debug['v'])
dumpit("pass2", firstr);
/*
* pass 2.5
* find looping structure * find looping structure
*/ */
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->active = 0; r->f.active = 0;
change = 0; change = 0;
loopit(firstr, nr); flowrpo(g);
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass2.5", firstr); dumpit("pass2", &firstr->f, 1);
/* /*
* pass 3 * pass 3
...@@ -365,17 +277,17 @@ regopt(Prog *firstp) ...@@ -365,17 +277,17 @@ regopt(Prog *firstp)
*/ */
loop1: loop1:
change = 0; change = 0;
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->active = 0; r->f.active = 0;
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
if(r->prog->as == ARET) if(r->f.prog->as == ARET)
prop(r, zbits, zbits); prop(r, zbits, zbits);
loop11: loop11:
/* pick up unreachable code */ /* pick up unreachable code */
i = 0; i = 0;
for(r = firstr; r != R; r = r1) { for(r = firstr; r != R; r = r1) {
r1 = r->link; r1 = (Reg*)r->f.link;
if(r1 && r1->active && !r->active) { if(r1 && r1->f.active && !r->f.active) {
prop(r, zbits, zbits); prop(r, zbits, zbits);
i = 1; i = 1;
} }
...@@ -386,7 +298,7 @@ loop11: ...@@ -386,7 +298,7 @@ loop11:
goto loop1; goto loop1;
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass3", firstr); dumpit("pass3", &firstr->f, 1);
/* /*
...@@ -396,8 +308,8 @@ loop11: ...@@ -396,8 +308,8 @@ loop11:
*/ */
loop2: loop2:
change = 0; change = 0;
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->active = 0; r->f.active = 0;
synch(firstr, zbits); synch(firstr, zbits);
if(change) if(change)
goto loop2; goto loop2;
...@@ -405,12 +317,12 @@ loop2: ...@@ -405,12 +317,12 @@ loop2:
addsplits(); addsplits();
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass4", firstr); dumpit("pass4", &firstr->f, 1);
if(debug['R'] > 1) { if(debug['R'] > 1) {
print("\nprop structure:\n"); print("\nprop structure:\n");
for(r = firstr; r != R; r = r->link) { for(r = firstr; r != R; r = (Reg*)r->f.link) {
print("%d:%P", r->loop, r->prog); print("%d:%P", r->f.loop, r->f.prog);
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
bit.b[z] = r->set.b[z] | bit.b[z] = r->set.b[z] |
r->refahead.b[z] | r->calahead.b[z] | r->refahead.b[z] | r->calahead.b[z] |
...@@ -444,7 +356,7 @@ loop2: ...@@ -444,7 +356,7 @@ loop2:
* pass 4.5 * pass 4.5
* move register pseudo-variables into regu. * move register pseudo-variables into regu.
*/ */
for(r = firstr; r != R; r = r->link) { for(r = firstr; r != R; r = (Reg*)r->f.link) {
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
r->set.b[0] &= ~REGBITS; r->set.b[0] &= ~REGBITS;
...@@ -459,7 +371,7 @@ loop2: ...@@ -459,7 +371,7 @@ loop2:
} }
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass4.5", firstr); dumpit("pass4.5", &firstr->f, 1);
/* /*
* pass 5 * pass 5
...@@ -471,27 +383,27 @@ loop2: ...@@ -471,27 +383,27 @@ loop2:
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
if(bany(&bit) & !r->refset) { if(bany(&bit) & !r->f.refset) {
// should never happen - all variables are preset // should never happen - all variables are preset
if(debug['w']) if(debug['w'])
print("%L: used and not set: %Q\n", r->prog->lineno, bit); print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
r->refset = 1; r->f.refset = 1;
} }
} }
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->act = zbits; r->act = zbits;
rgp = region; rgp = region;
nregion = 0; nregion = 0;
for(r = firstr; r != R; r = r->link) { for(r = firstr; r != R; r = (Reg*)r->f.link) {
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = r->set.b[z] & bit.b[z] = r->set.b[z] &
~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
if(bany(&bit) && !r->refset) { if(bany(&bit) && !r->f.refset) {
if(debug['w']) if(debug['w'])
print("%L: set and not used: %Q\n", r->prog->lineno, bit); print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
r->refset = 1; r->f.refset = 1;
excise(r); excise(&r->f);
} }
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
...@@ -507,7 +419,7 @@ loop2: ...@@ -507,7 +419,7 @@ loop2:
if(change <= 0) { if(change <= 0) {
if(debug['R']) if(debug['R'])
print("%L $%d: %Q\n", print("%L $%d: %Q\n",
r->prog->lineno, change, blsh(i)); r->f.prog->lineno, change, blsh(i));
continue; continue;
} }
rgp->cost = change; rgp->cost = change;
...@@ -524,7 +436,7 @@ brk: ...@@ -524,7 +436,7 @@ brk:
qsort(region, nregion, sizeof(region[0]), rcmp); qsort(region, nregion, sizeof(region[0]), rcmp);
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass5", firstr); dumpit("pass5", &firstr->f, 1);
/* /*
* pass 6 * pass 6
...@@ -539,13 +451,13 @@ brk: ...@@ -539,13 +451,13 @@ brk:
if(debug['R']) { if(debug['R']) {
if(rgp->regno >= NREG) if(rgp->regno >= NREG)
print("%L $%d F%d: %Q\n", print("%L $%d F%d: %Q\n",
rgp->enter->prog->lineno, rgp->enter->f.prog->lineno,
rgp->cost, rgp->cost,
rgp->regno-NREG, rgp->regno-NREG,
bit); bit);
else else
print("%L $%d R%d: %Q\n", print("%L $%d R%d: %Q\n",
rgp->enter->prog->lineno, rgp->enter->f.prog->lineno,
rgp->cost, rgp->cost,
rgp->regno, rgp->regno,
bit); bit);
...@@ -556,18 +468,18 @@ brk: ...@@ -556,18 +468,18 @@ brk:
} }
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass6", firstr); dumpit("pass6", &firstr->f, 1);
/* /*
* pass 7 * pass 7
* peep-hole on basic block * peep-hole on basic block
*/ */
if(!debug['R'] || debug['P']) { if(!debug['R'] || debug['P']) {
peep(); peep(firstp);
} }
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass7", firstr); dumpit("pass7", &firstr->f, 1);
/* /*
* last pass * last pass
...@@ -623,11 +535,8 @@ brk: ...@@ -623,11 +535,8 @@ brk:
} }
} }
} }
if(lastr != R) {
lastr->link = freer;
freer = firstr;
}
flowend(g);
} }
void void
...@@ -637,13 +546,13 @@ addsplits(void) ...@@ -637,13 +546,13 @@ addsplits(void)
int z, i; int z, i;
Bits bit; Bits bit;
for(r = firstr; r != R; r = r->link) { for(r = firstr; r != R; r = (Reg*)r->f.link) {
if(r->loop > 1) if(r->f.loop > 1)
continue; continue;
if(r->prog->as == ABL) if(r->f.prog->as == ABL)
continue; continue;
for(r1 = r->p2; r1 != R; r1 = r1->p2link) { for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
if(r1->loop <= 1) if(r1->f.loop <= 1)
continue; continue;
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = r1->calbehind.b[z] & bit.b[z] = r1->calbehind.b[z] &
...@@ -670,7 +579,7 @@ addmove(Reg *r, int bn, int rn, int f) ...@@ -670,7 +579,7 @@ addmove(Reg *r, int bn, int rn, int f)
p1 = mal(sizeof(*p1)); p1 = mal(sizeof(*p1));
*p1 = zprog; *p1 = zprog;
p = r->prog; p = r->f.prog;
// If there's a stack fixup coming (after BL newproc or BL deferproc), // If there's a stack fixup coming (after BL newproc or BL deferproc),
// delay the load until after the fixup. // delay the load until after the fixup.
...@@ -814,11 +723,11 @@ mkvar(Reg *r, Adr *a) ...@@ -814,11 +723,11 @@ mkvar(Reg *r, Adr *a)
case D_OREG: case D_OREG:
if(a->reg != NREG) { if(a->reg != NREG) {
if(a == &r->prog->from) if(a == &r->f.prog->from)
r->use1.b[0] |= RtoB(a->reg); r->use1.b[0] |= RtoB(a->reg);
else else
r->use2.b[0] |= RtoB(a->reg); r->use2.b[0] |= RtoB(a->reg);
if(r->prog->scond & (C_PBIT|C_WBIT)) if(r->f.prog->scond & (C_PBIT|C_WBIT))
r->set.b[0] |= RtoB(a->reg); r->set.b[0] |= RtoB(a->reg);
} }
break; break;
...@@ -921,7 +830,7 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -921,7 +830,7 @@ prop(Reg *r, Bits ref, Bits cal)
Reg *r1, *r2; Reg *r1, *r2;
int z; int z;
for(r1 = r; r1 != R; r1 = r1->p1) { for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
ref.b[z] |= r1->refahead.b[z]; ref.b[z] |= r1->refahead.b[z];
if(ref.b[z] != r1->refahead.b[z]) { if(ref.b[z] != r1->refahead.b[z]) {
...@@ -934,9 +843,9 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -934,9 +843,9 @@ prop(Reg *r, Bits ref, Bits cal)
change++; change++;
} }
} }
switch(r1->prog->as) { switch(r1->f.prog->as) {
case ABL: case ABL:
if(noreturn(r1->prog)) if(noreturn(r1->f.prog))
break; break;
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
cal.b[z] |= ref.b[z] | externs.b[z]; cal.b[z] |= ref.b[z] | externs.b[z];
...@@ -976,158 +885,22 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -976,158 +885,22 @@ prop(Reg *r, Bits ref, Bits cal)
r1->refbehind.b[z] = ref.b[z]; r1->refbehind.b[z] = ref.b[z];
r1->calbehind.b[z] = cal.b[z]; r1->calbehind.b[z] = cal.b[z];
} }
if(r1->active) if(r1->f.active)
break; break;
r1->active = 1; r1->f.active = 1;
} }
for(; r != r1; r = r->p1) for(; r != r1; r = (Reg*)r->f.p1)
for(r2 = r->p2; r2 != R; r2 = r2->p2link) for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
prop(r2, r->refbehind, r->calbehind); prop(r2, r->refbehind, r->calbehind);
} }
/*
* find looping structure
*
* 1) find reverse postordering
* 2) find approximate dominators,
* the actual dominators if the flow graph is reducible
* otherwise, dominators plus some other non-dominators.
* See Matthew S. Hecht and Jeffrey D. Ullman,
* "Analysis of a Simple Algorithm for Global Data Flow Problems",
* Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
* Oct. 1-3, 1973, pp. 207-217.
* 3) find all nodes with a predecessor dominated by the current node.
* such a node is a loop head.
* recursively, all preds with a greater rpo number are in the loop
*/
int32
postorder(Reg *r, Reg **rpo2r, int32 n)
{
Reg *r1;
r->rpo = 1;
r1 = r->s1;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
r1 = r->s2;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
rpo2r[n] = r;
n++;
return n;
}
int32
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
{
int32 t;
if(rpo1 == -1)
return rpo2;
while(rpo1 != rpo2){
if(rpo1 > rpo2){
t = rpo2;
rpo2 = rpo1;
rpo1 = t;
}
while(rpo1 < rpo2){
t = idom[rpo2];
if(t >= rpo2)
fatal("bad idom");
rpo2 = t;
}
}
return rpo1;
}
int
doms(int32 *idom, int32 r, int32 s)
{
while(s > r)
s = idom[s];
return s == r;
}
int
loophead(int32 *idom, Reg *r)
{
int32 src;
src = r->rpo;
if(r->p1 != R && doms(idom, src, r->p1->rpo))
return 1;
for(r = r->p2; r != R; r = r->p2link)
if(doms(idom, src, r->rpo))
return 1;
return 0;
}
void
loopmark(Reg **rpo2r, int32 head, Reg *r)
{
if(r->rpo < head || r->active == head)
return;
r->active = head;
r->loop += LOOP;
if(r->p1 != R)
loopmark(rpo2r, head, r->p1);
for(r = r->p2; r != R; r = r->p2link)
loopmark(rpo2r, head, r);
}
void
loopit(Reg *r, int32 nr)
{
Reg *r1;
int32 i, d, me;
if(nr > maxnr) {
rpo2r = mal(nr * sizeof(Reg*));
idom = mal(nr * sizeof(int32));
maxnr = nr;
}
d = postorder(r, rpo2r, 0);
if(d > nr)
fatal("too many reg nodes");
nr = d;
for(i = 0; i < nr / 2; i++){
r1 = rpo2r[i];
rpo2r[i] = rpo2r[nr - 1 - i];
rpo2r[nr - 1 - i] = r1;
}
for(i = 0; i < nr; i++)
rpo2r[i]->rpo = i;
idom[0] = 0;
for(i = 0; i < nr; i++){
r1 = rpo2r[i];
me = r1->rpo;
d = -1;
// rpo2r[r->rpo] == r protects against considering dead code,
// which has r->rpo == 0.
if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
d = r1->p1->rpo;
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
d = rpolca(idom, d, r1->rpo);
idom[i] = d;
}
for(i = 0; i < nr; i++){
r1 = rpo2r[i];
r1->loop++;
if(r1->p2 != R && loophead(idom, r1))
loopmark(rpo2r, i, r1);
}
}
void void
synch(Reg *r, Bits dif) synch(Reg *r, Bits dif)
{ {
Reg *r1; Reg *r1;
int z; int z;
for(r1 = r; r1 != R; r1 = r1->s1) { for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
dif.b[z] = (dif.b[z] & dif.b[z] = (dif.b[z] &
~(~r1->refbehind.b[z] & r1->refahead.b[z])) | ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
...@@ -1137,13 +910,13 @@ synch(Reg *r, Bits dif) ...@@ -1137,13 +910,13 @@ synch(Reg *r, Bits dif)
change++; change++;
} }
} }
if(r1->active) if(r1->f.active)
break; break;
r1->active = 1; r1->f.active = 1;
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
if(r1->s2 != R) if(r1->f.s2 != nil)
synch(r1->s2, dif); synch((Reg*)r1->f.s2, dif);
} }
} }
...@@ -1214,7 +987,7 @@ paint1(Reg *r, int bn) ...@@ -1214,7 +987,7 @@ paint1(Reg *r, int bn)
for(;;) { for(;;) {
if(!(r->refbehind.b[z] & bb)) if(!(r->refbehind.b[z] & bb))
break; break;
r1 = r->p1; r1 = (Reg*)r->f.p1;
if(r1 == R) if(r1 == R)
break; break;
if(!(r1->refahead.b[z] & bb)) if(!(r1->refahead.b[z] & bb))
...@@ -1225,48 +998,48 @@ paint1(Reg *r, int bn) ...@@ -1225,48 +998,48 @@ paint1(Reg *r, int bn)
} }
if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
change -= CLOAD * r->loop; change -= CLOAD * r->f.loop;
if(debug['R'] > 1) if(debug['R'] > 1)
print("%d%P\td %Q $%d\n", r->loop, print("%d%P\td %Q $%d\n", r->f.loop,
r->prog, blsh(bn), change); r->f.prog, blsh(bn), change);
} }
for(;;) { for(;;) {
r->act.b[z] |= bb; r->act.b[z] |= bb;
p = r->prog; p = r->f.prog;
if(r->use1.b[z] & bb) { if(r->use1.b[z] & bb) {
change += CREF * r->loop; change += CREF * r->f.loop;
if(debug['R'] > 1) if(debug['R'] > 1)
print("%d%P\tu1 %Q $%d\n", r->loop, print("%d%P\tu1 %Q $%d\n", r->f.loop,
p, blsh(bn), change); p, blsh(bn), change);
} }
if((r->use2.b[z]|r->set.b[z]) & bb) { if((r->use2.b[z]|r->set.b[z]) & bb) {
change += CREF * r->loop; change += CREF * r->f.loop;
if(debug['R'] > 1) if(debug['R'] > 1)
print("%d%P\tu2 %Q $%d\n", r->loop, print("%d%P\tu2 %Q $%d\n", r->f.loop,
p, blsh(bn), change); p, blsh(bn), change);
} }
if(STORE(r) & r->regdiff.b[z] & bb) { if(STORE(r) & r->regdiff.b[z] & bb) {
change -= CLOAD * r->loop; change -= CLOAD * r->f.loop;
if(debug['R'] > 1) if(debug['R'] > 1)
print("%d%P\tst %Q $%d\n", r->loop, print("%d%P\tst %Q $%d\n", r->f.loop,
p, blsh(bn), change); p, blsh(bn), change);
} }
if(r->refbehind.b[z] & bb) if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link) for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb) if(r1->refahead.b[z] & bb)
paint1(r1, bn); paint1(r1, bn);
if(!(r->refahead.b[z] & bb)) if(!(r->refahead.b[z] & bb))
break; break;
r1 = r->s2; r1 = (Reg*)r->f.s2;
if(r1 != R) if(r1 != R)
if(r1->refbehind.b[z] & bb) if(r1->refbehind.b[z] & bb)
paint1(r1, bn); paint1(r1, bn);
r = r->s1; r = (Reg*)r->f.s1;
if(r == R) if(r == R)
break; break;
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
...@@ -1291,7 +1064,7 @@ paint2(Reg *r, int bn) ...@@ -1291,7 +1064,7 @@ paint2(Reg *r, int bn)
for(;;) { for(;;) {
if(!(r->refbehind.b[z] & bb)) if(!(r->refbehind.b[z] & bb))
break; break;
r1 = r->p1; r1 = (Reg*)r->f.p1;
if(r1 == R) if(r1 == R)
break; break;
if(!(r1->refahead.b[z] & bb)) if(!(r1->refahead.b[z] & bb))
...@@ -1306,17 +1079,17 @@ paint2(Reg *r, int bn) ...@@ -1306,17 +1079,17 @@ paint2(Reg *r, int bn)
vreg |= r->regu; vreg |= r->regu;
if(r->refbehind.b[z] & bb) if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link) for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb) if(r1->refahead.b[z] & bb)
vreg |= paint2(r1, bn); vreg |= paint2(r1, bn);
if(!(r->refahead.b[z] & bb)) if(!(r->refahead.b[z] & bb))
break; break;
r1 = r->s2; r1 = (Reg*)r->f.s2;
if(r1 != R) if(r1 != R)
if(r1->refbehind.b[z] & bb) if(r1->refbehind.b[z] & bb)
vreg |= paint2(r1, bn); vreg |= paint2(r1, bn);
r = r->s1; r = (Reg*)r->f.s1;
if(r == R) if(r == R)
break; break;
if(!(r->act.b[z] & bb)) if(!(r->act.b[z] & bb))
...@@ -1342,7 +1115,7 @@ paint3(Reg *r, int bn, int32 rb, int rn) ...@@ -1342,7 +1115,7 @@ paint3(Reg *r, int bn, int32 rb, int rn)
for(;;) { for(;;) {
if(!(r->refbehind.b[z] & bb)) if(!(r->refbehind.b[z] & bb))
break; break;
r1 = r->p1; r1 = (Reg*)r->f.p1;
if(r1 == R) if(r1 == R)
break; break;
if(!(r1->refahead.b[z] & bb)) if(!(r1->refahead.b[z] & bb))
...@@ -1357,7 +1130,7 @@ paint3(Reg *r, int bn, int32 rb, int rn) ...@@ -1357,7 +1130,7 @@ paint3(Reg *r, int bn, int32 rb, int rn)
for(;;) { for(;;) {
r->act.b[z] |= bb; r->act.b[z] |= bb;
p = r->prog; p = r->f.prog;
if(r->use1.b[z] & bb) { if(r->use1.b[z] & bb) {
if(debug['R']) if(debug['R'])
...@@ -1379,17 +1152,17 @@ paint3(Reg *r, int bn, int32 rb, int rn) ...@@ -1379,17 +1152,17 @@ paint3(Reg *r, int bn, int32 rb, int rn)
r->regu |= rb; r->regu |= rb;
if(r->refbehind.b[z] & bb) if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link) for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb) if(r1->refahead.b[z] & bb)
paint3(r1, bn, rb, rn); paint3(r1, bn, rb, rn);
if(!(r->refahead.b[z] & bb)) if(!(r->refahead.b[z] & bb))
break; break;
r1 = r->s2; r1 = (Reg*)r->f.s2;
if(r1 != R) if(r1 != R)
if(r1->refbehind.b[z] & bb) if(r1->refbehind.b[z] & bb)
paint3(r1, bn, rb, rn); paint3(r1, bn, rb, rn);
r = r->s1; r = (Reg*)r->f.s1;
if(r == R) if(r == R)
break; break;
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
...@@ -1464,12 +1237,15 @@ BtoF(int32 b) ...@@ -1464,12 +1237,15 @@ BtoF(int32 b)
} }
void void
dumpone(Reg *r) dumpone(Flow *f, int isreg)
{ {
int z; int z;
Bits bit; Bits bit;
Reg *r;
print("%d:%P", r->loop, r->prog); print("%d:%P", f->loop, f->prog);
if(isreg) {
r = (Reg*)f;
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = bit.b[z] =
r->set.b[z] | r->set.b[z] |
...@@ -1503,26 +1279,27 @@ dumpone(Reg *r) ...@@ -1503,26 +1279,27 @@ dumpone(Reg *r)
if(bany(&r->act)) if(bany(&r->act))
print(" a:%Q ", r->act); print(" a:%Q ", r->act);
} }
}
print("\n"); print("\n");
} }
void void
dumpit(char *str, Reg *r0) dumpit(char *str, Flow *r0, int isreg)
{ {
Reg *r, *r1; Flow *r, *r1;
print("\n%s\n", str); print("\n%s\n", str);
for(r = r0; r != R; r = r->link) { for(r = r0; r != nil; r = r->link) {
dumpone(r); dumpone(r, isreg);
r1 = r->p2; r1 = r->p2;
if(r1 != R) { if(r1 != nil) {
print(" pred:"); print(" pred:");
for(; r1 != R; r1 = r1->p2link) for(; r1 != nil; r1 = r1->p2link)
print(" %.4ud", r1->prog->loc); print(" %.4ud", r1->prog->loc);
print("\n"); print("\n");
} }
// r1 = r->s1; // r1 = r->s1;
// if(r1 != R) { // if(r1 != nil) {
// print(" succ:"); // print(" succ:");
// for(; r1 != R; r1 = r1->s1) // for(; r1 != R; r1 = r1->s1)
// print(" %.4ud", r1->prog->loc); // print(" %.4ud", r1->prog->loc);
......
...@@ -55,6 +55,7 @@ typedef struct Rgn Rgn; ...@@ -55,6 +55,7 @@ typedef struct Rgn Rgn;
// r->prog->opt points back to r. // r->prog->opt points back to r.
struct Reg struct Reg
{ {
Flow f;
Bits set; // variables written by this instruction. Bits set; // variables written by this instruction.
Bits use1; // variables read by prog->from. Bits use1; // variables read by prog->from.
...@@ -68,19 +69,6 @@ struct Reg ...@@ -68,19 +69,6 @@ struct Reg
Bits act; Bits act;
int32 regu; // register used bitmap int32 regu; // register used bitmap
int32 rpo; // reverse post ordering
int32 active;
uint16 loop; // x5 for every loop
uchar refset; // diagnostic generated
Reg* p1; // predecessors of this instruction: p1,
Reg* p2; // and then p2 linked though p2link.
Reg* p2link;
Reg* s1; // successors of this instruction (at most two: s1 and s2).
Reg* s2;
Reg* link; // next instruction in function code
Prog* prog; // actual instruction
}; };
#define R ((Reg*)0) #define R ((Reg*)0)
...@@ -96,10 +84,7 @@ struct Rgn ...@@ -96,10 +84,7 @@ struct Rgn
EXTERN int32 exregoffset; // not set EXTERN int32 exregoffset; // not set
EXTERN int32 exfregoffset; // not set EXTERN int32 exfregoffset; // not set
EXTERN Reg* firstr; EXTERN Reg* firstr;
EXTERN Reg* lastr;
EXTERN Reg zreg; EXTERN Reg zreg;
EXTERN Reg* freer;
EXTERN Reg** rpo2r;
EXTERN Rgn region[NRGN]; EXTERN Rgn region[NRGN];
EXTERN Rgn* rgp; EXTERN Rgn* rgp;
EXTERN int nregion; EXTERN int nregion;
...@@ -113,7 +98,6 @@ EXTERN Bits addrs; ...@@ -113,7 +98,6 @@ EXTERN Bits addrs;
EXTERN Bits ovar; EXTERN Bits ovar;
EXTERN int change; EXTERN int change;
EXTERN int32 maxnr; EXTERN int32 maxnr;
EXTERN int32* idom;
EXTERN struct EXTERN struct
{ {
...@@ -128,41 +112,27 @@ EXTERN struct ...@@ -128,41 +112,27 @@ EXTERN struct
/* /*
* reg.c * reg.c
*/ */
Reg* rega(void);
int rcmp(const void*, const void*); int rcmp(const void*, const void*);
void regopt(Prog*); void regopt(Prog*);
void addmove(Reg*, int, int, int); void addmove(Reg*, int, int, int);
Bits mkvar(Reg*, Adr*); Bits mkvar(Reg*, Adr*);
void prop(Reg*, Bits, Bits); void prop(Reg*, Bits, Bits);
void loopit(Reg*, int32);
void synch(Reg*, Bits); void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*); uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int); void paint1(Reg*, int);
uint32 paint2(Reg*, int); uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int); void paint3(Reg*, int, int32, int);
void addreg(Adr*, int); void addreg(Adr*, int);
void dumpone(Reg*); void dumpone(Flow*, int);
void dumpit(char*, Reg*); void dumpit(char*, Flow*, int);
/* /*
* peep.c * peep.c
*/ */
void peep(void); void peep(Prog*);
void excise(Reg*); void excise(Flow*);
Reg* uniqp(Reg*);
Reg* uniqs(Reg*);
int regtyp(Adr*);
int anyvar(Adr*);
int subprop(Reg*);
int copyprop(Reg*);
int copy1(Adr*, Adr*, Reg*, int);
int copyu(Prog*, Adr*, Adr*); int copyu(Prog*, Adr*, Adr*);
int copyas(Adr*, Adr*);
int copyau(Adr*, Adr*);
int copysub(Adr*, Adr*, Adr*, int);
int copysub1(Prog*, Adr*, Adr*, int);
int32 RtoB(int); int32 RtoB(int);
int32 FtoB(int); int32 FtoB(int);
int BtoR(int32); int BtoR(int32);
......
...@@ -33,11 +33,18 @@ ...@@ -33,11 +33,18 @@
#include "gg.h" #include "gg.h"
#include "opt.h" #include "opt.h"
static void conprop(Reg *r); static void conprop(Flow *r);
static void elimshortmov(Reg *r); static void elimshortmov(Graph *g);
static int prevl(Reg *r, int reg); static int prevl(Flow *r, int reg);
static void pushback(Reg *r); static void pushback(Flow *r);
static int regconsttyp(Adr*); static int regconsttyp(Adr*);
static int regtyp(Adr*);
static int subprop(Flow*);
static int copyprop(Graph*, Flow*);
static int copy1(Adr*, Adr*, Flow*, int);
static int copyas(Adr*, Adr*);
static int copyau(Adr*, Adr*);
static int copysub(Adr*, Adr*, Adr*, int);
// do we need the carry bit // do we need the carry bit
static int static int
...@@ -56,19 +63,19 @@ needc(Prog *p) ...@@ -56,19 +63,19 @@ needc(Prog *p)
return 0; return 0;
} }
static Reg* static Flow*
rnops(Reg *r) rnops(Flow *r)
{ {
Prog *p; Prog *p;
Reg *r1; Flow *r1;
if(r != R) if(r != nil)
for(;;) { for(;;) {
p = r->prog; p = r->prog;
if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE) if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
break; break;
r1 = uniqs(r); r1 = uniqs(r);
if(r1 == R) if(r1 == nil)
break; break;
r = r1; r = r1;
} }
...@@ -76,52 +83,25 @@ rnops(Reg *r) ...@@ -76,52 +83,25 @@ rnops(Reg *r)
} }
void void
peep(void) peep(Prog *firstp)
{ {
Reg *r, *r1, *r2; Flow *r, *r1;
Graph *g;
Prog *p, *p1; Prog *p, *p1;
int t; int t;
ProgInfo info;
/*
* complete R structure
*/
t = 0;
for(r=firstr; r!=R; r=r1) {
r1 = r->link;
if(r1 == R)
break;
p = r->prog->link;
for(p = r->prog->link; p != r1->prog; p = p->link) {
proginfo(&info, p);
if(info.flags & Skip)
continue;
r2 = rega();
r->link = r2;
r2->link = r1;
r2->prog = p;
p->opt = r2;
r2->p1 = r; g = flowstart(firstp, sizeof(Flow));
r->s1 = r2; if(g == nil)
r2->s1 = r1; return;
r1->p1 = r2;
r = r2;
t++;
}
}
// byte, word arithmetic elimination. // byte, word arithmetic elimination.
elimshortmov(r); elimshortmov(g);
// constant propagation // constant propagation
// find MOV $con,R followed by // find MOV $con,nil followed by
// another MOV $con,R without // another MOV $con,nil without
// setting R in the interim // setting nil in the interim
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
switch(p->as) { switch(p->as) {
case ALEAL: case ALEAL:
...@@ -147,10 +127,10 @@ peep(void) ...@@ -147,10 +127,10 @@ peep(void)
loop1: loop1:
if(debug['P'] && debug['v']) if(debug['P'] && debug['v'])
dumpit("loop1", firstr); dumpit("loop1", g->start, 0);
t = 0; t = 0;
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
switch(p->as) { switch(p->as) {
case AMOVL: case AMOVL:
...@@ -159,11 +139,11 @@ loop1: ...@@ -159,11 +139,11 @@ loop1:
case AMOVSD: case AMOVSD:
if(regtyp(&p->to)) if(regtyp(&p->to))
if(regtyp(&p->from)) { if(regtyp(&p->from)) {
if(copyprop(r)) { if(copyprop(g, r)) {
excise(r); excise(r);
t++; t++;
} else } else
if(subprop(r) && copyprop(r)) { if(subprop(r) && copyprop(g, r)) {
excise(r); excise(r);
t++; t++;
} }
...@@ -176,7 +156,7 @@ loop1: ...@@ -176,7 +156,7 @@ loop1:
case AMOVWLSX: case AMOVWLSX:
if(regtyp(&p->to)) { if(regtyp(&p->to)) {
r1 = rnops(uniqs(r)); r1 = rnops(uniqs(r));
if(r1 != R) { if(r1 != nil) {
p1 = r1->prog; p1 = r1->prog;
if(p->as == p1->as && p->to.type == p1->from.type){ if(p->as == p1->as && p->to.type == p1->from.type){
p1->as = AMOVL; p1->as = AMOVL;
...@@ -195,7 +175,7 @@ loop1: ...@@ -195,7 +175,7 @@ loop1:
case AMOVQL: case AMOVQL:
if(regtyp(&p->to)) { if(regtyp(&p->to)) {
r1 = rnops(uniqs(r)); r1 = rnops(uniqs(r));
if(r1 != R) { if(r1 != nil) {
p1 = r1->prog; p1 = r1->prog;
if(p->as == p1->as && p->to.type == p1->from.type){ if(p->as == p1->as && p->to.type == p1->from.type){
p1->as = AMOVQ; p1->as = AMOVQ;
...@@ -278,7 +258,7 @@ loop1: ...@@ -278,7 +258,7 @@ loop1:
// can be replaced by MOVAPD, which moves the pair of float64s // can be replaced by MOVAPD, which moves the pair of float64s
// instead of just the lower one. We only use the lower one, but // instead of just the lower one. We only use the lower one, but
// the processor can do better if we do moves using both. // the processor can do better if we do moves using both.
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
if(p->as == AMOVLQZX) if(p->as == AMOVLQZX)
if(regtyp(&p->from)) if(regtyp(&p->from))
...@@ -295,7 +275,7 @@ loop1: ...@@ -295,7 +275,7 @@ loop1:
// load pipelining // load pipelining
// push any load from memory as early as possible // push any load from memory as early as possible
// to give it time to complete before use. // to give it time to complete before use.
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
switch(p->as) { switch(p->as) {
case AMOVB: case AMOVB:
...@@ -307,17 +287,19 @@ loop1: ...@@ -307,17 +287,19 @@ loop1:
pushback(r); pushback(r);
} }
} }
flowend(g);
} }
static void static void
pushback(Reg *r0) pushback(Flow *r0)
{ {
Reg *r, *b; Flow *r, *b;
Prog *p0, *p, t; Prog *p0, *p, t;
b = R; b = nil;
p0 = r0->prog; p0 = r0->prog;
for(r=uniqp(r0); r!=R && uniqs(r)!=R; r=uniqp(r)) { for(r=uniqp(r0); r!=nil && uniqs(r)!=nil; r=uniqp(r)) {
p = r->prog; p = r->prog;
if(p->as != ANOP) { if(p->as != ANOP) {
if(!regconsttyp(&p->from) || !regtyp(&p->to)) if(!regconsttyp(&p->from) || !regtyp(&p->to))
...@@ -330,11 +312,11 @@ pushback(Reg *r0) ...@@ -330,11 +312,11 @@ pushback(Reg *r0)
b = r; b = r;
} }
if(b == R) { if(b == nil) {
if(debug['v']) { if(debug['v']) {
print("no pushback: %P\n", r0->prog); print("no pushback: %P\n", r0->prog);
if(r) if(r)
print("\t%P [%d]\n", r->prog, uniqs(r)!=R); print("\t%P [%d]\n", r->prog, uniqs(r)!=nil);
} }
return; return;
} }
...@@ -377,7 +359,7 @@ pushback(Reg *r0) ...@@ -377,7 +359,7 @@ pushback(Reg *r0)
} }
void void
excise(Reg *r) excise(Flow *r)
{ {
Prog *p; Prog *p;
...@@ -392,39 +374,7 @@ excise(Reg *r) ...@@ -392,39 +374,7 @@ excise(Reg *r)
ostats.ndelmov++; ostats.ndelmov++;
} }
Reg* static int
uniqp(Reg *r)
{
Reg *r1;
r1 = r->p1;
if(r1 == R) {
r1 = r->p2;
if(r1 == R || r1->p2link != R)
return R;
} else
if(r->p2 != R)
return R;
return r1;
}
Reg*
uniqs(Reg *r)
{
Reg *r1;
r1 = r->s1;
if(r1 == R) {
r1 = r->s2;
if(r1 == R)
return R;
} else
if(r->s2 != R)
return R;
return r1;
}
int
regtyp(Adr *a) regtyp(Adr *a)
{ {
int t; int t;
...@@ -448,12 +398,12 @@ regtyp(Adr *a) ...@@ -448,12 +398,12 @@ regtyp(Adr *a)
// TODO: Using the Q forms here instead of the L forms // TODO: Using the Q forms here instead of the L forms
// seems unnecessary, and it makes the instructions longer. // seems unnecessary, and it makes the instructions longer.
static void static void
elimshortmov(Reg *r) elimshortmov(Graph *g)
{ {
Prog *p; Prog *p;
Flow *r;
USED(r); for(r=g->start; r!=nil; r=r->link) {
for(r=firstr; r!=R; r=r->link) {
p = r->prog; p = r->prog;
if(regtyp(&p->to)) { if(regtyp(&p->to)) {
switch(p->as) { switch(p->as) {
...@@ -554,13 +504,13 @@ regconsttyp(Adr *a) ...@@ -554,13 +504,13 @@ regconsttyp(Adr *a)
// is reg guaranteed to be truncated by a previous L instruction? // is reg guaranteed to be truncated by a previous L instruction?
static int static int
prevl(Reg *r0, int reg) prevl(Flow *r0, int reg)
{ {
Prog *p; Prog *p;
Reg *r; Flow *r;
ProgInfo info; ProgInfo info;
for(r=uniqp(r0); r!=R; r=uniqp(r)) { for(r=uniqp(r0); r!=nil; r=uniqp(r)) {
p = r->prog; p = r->prog;
if(p->to.type == reg) { if(p->to.type == reg) {
proginfo(&info, p); proginfo(&info, p);
...@@ -588,13 +538,13 @@ prevl(Reg *r0, int reg) ...@@ -588,13 +538,13 @@ prevl(Reg *r0, int reg)
* hopefully, then the former or latter MOV * hopefully, then the former or latter MOV
* will be eliminated by copy propagation. * will be eliminated by copy propagation.
*/ */
int static int
subprop(Reg *r0) subprop(Flow *r0)
{ {
Prog *p; Prog *p;
ProgInfo info; ProgInfo info;
Adr *v1, *v2; Adr *v1, *v2;
Reg *r; Flow *r;
int t; int t;
if(debug['P'] && debug['v']) if(debug['P'] && debug['v'])
...@@ -612,10 +562,10 @@ subprop(Reg *r0) ...@@ -612,10 +562,10 @@ subprop(Reg *r0)
print("\tnot regtype %D; return 0\n", v2); print("\tnot regtype %D; return 0\n", v2);
return 0; return 0;
} }
for(r=uniqp(r0); r!=R; r=uniqp(r)) { for(r=uniqp(r0); r!=nil; r=uniqp(r)) {
if(debug['P'] && debug['v']) if(debug['P'] && debug['v'])
print("\t? %P\n", r->prog); print("\t? %P\n", r->prog);
if(uniqs(r) == R) { if(uniqs(r) == nil) {
if(debug['P'] && debug['v']) if(debug['P'] && debug['v'])
print("\tno unique successor\n"); print("\tno unique successor\n");
break; break;
...@@ -689,12 +639,12 @@ gotit: ...@@ -689,12 +639,12 @@ gotit:
* set v1 F=1 * set v1 F=1
* set v2 return success * set v2 return success
*/ */
int static int
copyprop(Reg *r0) copyprop(Graph *g, Flow *r0)
{ {
Prog *p; Prog *p;
Adr *v1, *v2; Adr *v1, *v2;
Reg *r; Flow *r;
if(debug['P'] && debug['v']) if(debug['P'] && debug['v'])
print("copyprop %P\n", r0->prog); print("copyprop %P\n", r0->prog);
...@@ -703,13 +653,13 @@ copyprop(Reg *r0) ...@@ -703,13 +653,13 @@ copyprop(Reg *r0)
v2 = &p->to; v2 = &p->to;
if(copyas(v1, v2)) if(copyas(v1, v2))
return 1; return 1;
for(r=firstr; r!=R; r=r->link) for(r=g->start; r!=nil; r=r->link)
r->active = 0; r->active = 0;
return copy1(v1, v2, r0->s1, 0); return copy1(v1, v2, r0->s1, 0);
} }
int static int
copy1(Adr *v1, Adr *v2, Reg *r, int f) copy1(Adr *v1, Adr *v2, Flow *r, int f)
{ {
int t; int t;
Prog *p; Prog *p;
...@@ -722,11 +672,11 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f) ...@@ -722,11 +672,11 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f)
r->active = 1; r->active = 1;
if(debug['P']) if(debug['P'])
print("copy %D->%D f=%d\n", v1, v2, f); print("copy %D->%D f=%d\n", v1, v2, f);
for(; r != R; r = r->s1) { for(; r != nil; r = r->s1) {
p = r->prog; p = r->prog;
if(debug['P']) if(debug['P'])
print("%P", p); print("%P", p);
if(!f && uniqp(r) == R) { if(!f && uniqp(r) == nil) {
f = 1; f = 1;
if(debug['P']) if(debug['P'])
print("; merge; f=%d", f); print("; merge; f=%d", f);
...@@ -880,7 +830,7 @@ copyu(Prog *p, Adr *v, Adr *s) ...@@ -880,7 +830,7 @@ copyu(Prog *p, Adr *v, Adr *s)
* could be set/use depending on * could be set/use depending on
* semantics * semantics
*/ */
int static int
copyas(Adr *a, Adr *v) copyas(Adr *a, Adr *v)
{ {
if(a->type != v->type) if(a->type != v->type)
...@@ -896,7 +846,7 @@ copyas(Adr *a, Adr *v) ...@@ -896,7 +846,7 @@ copyas(Adr *a, Adr *v)
/* /*
* either direct or indirect * either direct or indirect
*/ */
int static int
copyau(Adr *a, Adr *v) copyau(Adr *a, Adr *v)
{ {
...@@ -924,7 +874,7 @@ copyau(Adr *a, Adr *v) ...@@ -924,7 +874,7 @@ copyau(Adr *a, Adr *v)
* substitute s for v in a * substitute s for v in a
* return failure to substitute * return failure to substitute
*/ */
int static int
copysub(Adr *a, Adr *v, Adr *s, int f) copysub(Adr *a, Adr *v, Adr *s, int f)
{ {
int t; int t;
...@@ -957,9 +907,9 @@ copysub(Adr *a, Adr *v, Adr *s, int f) ...@@ -957,9 +907,9 @@ copysub(Adr *a, Adr *v, Adr *s, int f)
} }
static void static void
conprop(Reg *r0) conprop(Flow *r0)
{ {
Reg *r; Flow *r;
Prog *p, *p0; Prog *p, *p0;
int t; int t;
Adr *v0; Adr *v0;
...@@ -970,9 +920,9 @@ conprop(Reg *r0) ...@@ -970,9 +920,9 @@ conprop(Reg *r0)
loop: loop:
r = uniqs(r); r = uniqs(r);
if(r == R || r == r0) if(r == nil || r == r0)
return; return;
if(uniqp(r) == R) if(uniqp(r) == nil)
return; return;
p = r->prog; p = r->prog;
......
...@@ -38,21 +38,6 @@ ...@@ -38,21 +38,6 @@
static int first = 1; static int first = 1;
Reg*
rega(void)
{
Reg *r;
r = freer;
if(r == R) {
r = mal(sizeof(*r));
} else
freer = r->link;
*r = zreg;
return r;
}
int int
rcmp(const void *a1, const void *a2) rcmp(const void *a1, const void *a2)
{ {
...@@ -157,8 +142,9 @@ regopt(Prog *firstp) ...@@ -157,8 +142,9 @@ regopt(Prog *firstp)
{ {
Reg *r, *r1; Reg *r, *r1;
Prog *p; Prog *p;
ProgInfo info, info2; Graph *g;
int i, z, nr; ProgInfo info;
int i, z;
uint32 vreg; uint32 vreg;
Bits bit; Bits bit;
...@@ -170,19 +156,6 @@ regopt(Prog *firstp) ...@@ -170,19 +156,6 @@ regopt(Prog *firstp)
fixjmp(firstp); fixjmp(firstp);
// count instructions
nr = 0;
for(p=firstp; p!=P; p=p->link)
nr++;
// if too big dont bother
if(nr >= 10000) {
// print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
return;
}
firstr = R;
lastr = R;
/* /*
* control flow is more complicated in generated go code * control flow is more complicated in generated go code
* than in generated c code. define pseudo-variables for * than in generated c code. define pseudo-variables for
...@@ -214,33 +187,14 @@ regopt(Prog *firstp) ...@@ -214,33 +187,14 @@ regopt(Prog *firstp)
* allocate pcs * allocate pcs
* find use and set of variables * find use and set of variables
*/ */
nr = 0; g = flowstart(firstp, sizeof(Reg));
for(p=firstp; p!=P; p=p->link) { if(g == nil)
proginfo(&info, p); return;
if(info.flags & Skip) firstr = (Reg*)g->start;
continue;
r = rega();
nr++;
if(firstr == R) {
firstr = r;
lastr = r;
} else {
lastr->link = r;
r->p1 = lastr;
lastr->s1 = r;
lastr = r;
}
r->prog = p;
p->opt = r;
r1 = r->p1; for(r = firstr; r != R; r = (Reg*)r->f.link) {
if(r1 != R) { p = r->f.prog;
proginfo(&info2, r1->prog); proginfo(&info, p);
if(info2.flags & Break) {
r->p1 = R;
r1->s1 = R;
}
}
// Avoid making variables for direct-called functions. // Avoid making variables for direct-called functions.
if(p->as == ACALL && p->to.type == D_EXTERN) if(p->as == ACALL && p->to.type == D_EXTERN)
...@@ -273,8 +227,6 @@ regopt(Prog *firstp) ...@@ -273,8 +227,6 @@ regopt(Prog *firstp)
r->set.b[z] |= bit.b[z]; r->set.b[z] |= bit.b[z];
} }
} }
if(firstr == R)
return;
for(i=0; i<nvar; i++) { for(i=0; i<nvar; i++) {
Var *v = var+i; Var *v = var+i;
...@@ -290,45 +242,19 @@ regopt(Prog *firstp) ...@@ -290,45 +242,19 @@ regopt(Prog *firstp)
} }
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass1", firstr); dumpit("pass1", &firstr->f, 1);
/* /*
* pass 2 * pass 2
* turn branch references to pointers
* build back pointers
*/
for(r=firstr; r!=R; r=r->link) {
p = r->prog;
if(p->to.type == D_BRANCH) {
if(p->to.u.branch == P)
fatal("pnil %P", p);
r1 = p->to.u.branch->opt;
if(r1 == R)
fatal("rnil %P", p);
if(r1 == r) {
//fatal("ref to self %P", p);
continue;
}
r->s2 = r1;
r->p2link = r1->p2;
r1->p2 = r;
}
}
if(debug['R'] && debug['v'])
dumpit("pass2", firstr);
/*
* pass 2.5
* find looping structure * find looping structure
*/ */
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->active = 0; r->f.active = 0;
change = 0; change = 0;
loopit(firstr, nr); flowrpo(g);
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass2.5", firstr); dumpit("pass2", &firstr->f, 1);
/* /*
* pass 3 * pass 3
...@@ -337,17 +263,17 @@ regopt(Prog *firstp) ...@@ -337,17 +263,17 @@ regopt(Prog *firstp)
*/ */
loop1: loop1:
change = 0; change = 0;
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->active = 0; r->f.active = 0;
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
if(r->prog->as == ARET) if(r->f.prog->as == ARET)
prop(r, zbits, zbits); prop(r, zbits, zbits);
loop11: loop11:
/* pick up unreachable code */ /* pick up unreachable code */
i = 0; i = 0;
for(r = firstr; r != R; r = r1) { for(r = firstr; r != R; r = r1) {
r1 = r->link; r1 = (Reg*)r->f.link;
if(r1 && r1->active && !r->active) { if(r1 && r1->f.active && !r->f.active) {
prop(r, zbits, zbits); prop(r, zbits, zbits);
i = 1; i = 1;
} }
...@@ -358,7 +284,7 @@ loop11: ...@@ -358,7 +284,7 @@ loop11:
goto loop1; goto loop1;
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass3", firstr); dumpit("pass3", &firstr->f, 1);
/* /*
* pass 4 * pass 4
...@@ -367,20 +293,20 @@ loop11: ...@@ -367,20 +293,20 @@ loop11:
*/ */
loop2: loop2:
change = 0; change = 0;
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->active = 0; r->f.active = 0;
synch(firstr, zbits); synch(firstr, zbits);
if(change) if(change)
goto loop2; goto loop2;
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass4", firstr); dumpit("pass4", &firstr->f, 1);
/* /*
* pass 4.5 * pass 4.5
* move register pseudo-variables into regu. * move register pseudo-variables into regu.
*/ */
for(r = firstr; r != R; r = r->link) { for(r = firstr; r != R; r = (Reg*)r->f.link) {
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
r->set.b[0] &= ~REGBITS; r->set.b[0] &= ~REGBITS;
...@@ -404,26 +330,26 @@ loop2: ...@@ -404,26 +330,26 @@ loop2:
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
if(bany(&bit) && !r->refset) { if(bany(&bit) && !r->f.refset) {
// should never happen - all variables are preset // should never happen - all variables are preset
if(debug['w']) if(debug['w'])
print("%L: used and not set: %Q\n", r->prog->lineno, bit); print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
r->refset = 1; r->f.refset = 1;
} }
} }
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->act = zbits; r->act = zbits;
rgp = region; rgp = region;
nregion = 0; nregion = 0;
for(r = firstr; r != R; r = r->link) { for(r = firstr; r != R; r = (Reg*)r->f.link) {
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = r->set.b[z] & bit.b[z] = r->set.b[z] &
~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
if(bany(&bit) && !r->refset) { if(bany(&bit) && !r->f.refset) {
if(debug['w']) if(debug['w'])
print("%L: set and not used: %Q\n", r->prog->lineno, bit); print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
r->refset = 1; r->f.refset = 1;
excise(r); excise(&r->f);
} }
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
...@@ -450,7 +376,7 @@ brk: ...@@ -450,7 +376,7 @@ brk:
qsort(region, nregion, sizeof(region[0]), rcmp); qsort(region, nregion, sizeof(region[0]), rcmp);
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass5", firstr); dumpit("pass5", &firstr->f, 1);
/* /*
* pass 6 * pass 6
...@@ -476,19 +402,23 @@ brk: ...@@ -476,19 +402,23 @@ brk:
} }
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass6", firstr); dumpit("pass6", &firstr->f, 1);
/*
* free aux structures. peep allocates new ones.
*/
flowend(g);
firstr = R;
/* /*
* pass 7 * pass 7
* peep-hole on basic block * peep-hole on basic block
*/ */
if(!debug['R'] || debug['P']) { if(!debug['R'] || debug['P'])
peep(); peep(firstp);
}
/* /*
* eliminate nops * eliminate nops
* free aux structures
*/ */
for(p=firstp; p!=P; p=p->link) { for(p=firstp; p!=P; p=p->link) {
while(p->link != P && p->link->as == ANOP) while(p->link != P && p->link->as == ANOP)
...@@ -498,11 +428,6 @@ brk: ...@@ -498,11 +428,6 @@ brk:
p->to.u.branch = p->to.u.branch->link; p->to.u.branch = p->to.u.branch->link;
} }
if(lastr != R) {
lastr->link = freer;
freer = firstr;
}
if(debug['R']) { if(debug['R']) {
if(ostats.ncvtreg || if(ostats.ncvtreg ||
ostats.nspill || ostats.nspill ||
...@@ -545,7 +470,7 @@ addmove(Reg *r, int bn, int rn, int f) ...@@ -545,7 +470,7 @@ addmove(Reg *r, int bn, int rn, int f)
clearp(p1); clearp(p1);
p1->loc = 9999; p1->loc = 9999;
p = r->prog; p = r->f.prog;
p1->link = p->link; p1->link = p->link;
p->link = p1; p->link = p1;
p1->lineno = p->lineno; p1->lineno = p->lineno;
...@@ -769,7 +694,7 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -769,7 +694,7 @@ prop(Reg *r, Bits ref, Bits cal)
Reg *r1, *r2; Reg *r1, *r2;
int z; int z;
for(r1 = r; r1 != R; r1 = r1->p1) { for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
ref.b[z] |= r1->refahead.b[z]; ref.b[z] |= r1->refahead.b[z];
if(ref.b[z] != r1->refahead.b[z]) { if(ref.b[z] != r1->refahead.b[z]) {
...@@ -782,9 +707,9 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -782,9 +707,9 @@ prop(Reg *r, Bits ref, Bits cal)
change++; change++;
} }
} }
switch(r1->prog->as) { switch(r1->f.prog->as) {
case ACALL: case ACALL:
if(noreturn(r1->prog)) if(noreturn(r1->f.prog))
break; break;
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
cal.b[z] |= ref.b[z] | externs.b[z]; cal.b[z] |= ref.b[z] | externs.b[z];
...@@ -824,159 +749,22 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -824,159 +749,22 @@ prop(Reg *r, Bits ref, Bits cal)
r1->refbehind.b[z] = ref.b[z]; r1->refbehind.b[z] = ref.b[z];
r1->calbehind.b[z] = cal.b[z]; r1->calbehind.b[z] = cal.b[z];
} }
if(r1->active) if(r1->f.active)
break; break;
r1->active = 1; r1->f.active = 1;
} }
for(; r != r1; r = r->p1) for(; r != r1; r = (Reg*)r->f.p1)
for(r2 = r->p2; r2 != R; r2 = r2->p2link) for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
prop(r2, r->refbehind, r->calbehind); prop(r2, r->refbehind, r->calbehind);
} }
/*
* find looping structure
*
* 1) find reverse postordering
* 2) find approximate dominators,
* the actual dominators if the flow graph is reducible
* otherwise, dominators plus some other non-dominators.
* See Matthew S. Hecht and Jeffrey D. Ullman,
* "Analysis of a Simple Algorithm for Global Data Flow Problems",
* Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
* Oct. 1-3, 1973, pp. 207-217.
* 3) find all nodes with a predecessor dominated by the current node.
* such a node is a loop head.
* recursively, all preds with a greater rpo number are in the loop
*/
int32
postorder(Reg *r, Reg **rpo2r, int32 n)
{
Reg *r1;
r->rpo = 1;
r1 = r->s1;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
r1 = r->s2;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
rpo2r[n] = r;
n++;
return n;
}
int32
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
{
int32 t;
if(rpo1 == -1)
return rpo2;
while(rpo1 != rpo2){
if(rpo1 > rpo2){
t = rpo2;
rpo2 = rpo1;
rpo1 = t;
}
while(rpo1 < rpo2){
t = idom[rpo2];
if(t >= rpo2)
fatal("bad idom");
rpo2 = t;
}
}
return rpo1;
}
int
doms(int32 *idom, int32 r, int32 s)
{
while(s > r)
s = idom[s];
return s == r;
}
int
loophead(int32 *idom, Reg *r)
{
int32 src;
src = r->rpo;
if(r->p1 != R && doms(idom, src, r->p1->rpo))
return 1;
for(r = r->p2; r != R; r = r->p2link)
if(doms(idom, src, r->rpo))
return 1;
return 0;
}
void
loopmark(Reg **rpo2r, int32 head, Reg *r)
{
if(r->rpo < head || r->active == head)
return;
r->active = head;
r->loop += LOOP;
if(r->p1 != R)
loopmark(rpo2r, head, r->p1);
for(r = r->p2; r != R; r = r->p2link)
loopmark(rpo2r, head, r);
}
void
loopit(Reg *r, int32 nr)
{
Reg *r1;
int32 i, d, me;
if(nr > maxnr) {
rpo2r = mal(nr * sizeof(Reg*));
idom = mal(nr * sizeof(int32));
maxnr = nr;
}
d = postorder(r, rpo2r, 0);
if(d > nr)
fatal("too many reg nodes %d %d", d, nr);
nr = d;
for(i = 0; i < nr / 2; i++) {
r1 = rpo2r[i];
rpo2r[i] = rpo2r[nr - 1 - i];
rpo2r[nr - 1 - i] = r1;
}
for(i = 0; i < nr; i++)
rpo2r[i]->rpo = i;
idom[0] = 0;
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
me = r1->rpo;
d = -1;
// rpo2r[r->rpo] == r protects against considering dead code,
// which has r->rpo == 0.
if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
d = r1->p1->rpo;
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
d = rpolca(idom, d, r1->rpo);
idom[i] = d;
}
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
r1->loop++;
if(r1->p2 != R && loophead(idom, r1))
loopmark(rpo2r, i, r1);
}
}
void void
synch(Reg *r, Bits dif) synch(Reg *r, Bits dif)
{ {
Reg *r1; Reg *r1;
int z; int z;
for(r1 = r; r1 != R; r1 = r1->s1) { for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
dif.b[z] = (dif.b[z] & dif.b[z] = (dif.b[z] &
~(~r1->refbehind.b[z] & r1->refahead.b[z])) | ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
...@@ -986,13 +774,13 @@ synch(Reg *r, Bits dif) ...@@ -986,13 +774,13 @@ synch(Reg *r, Bits dif)
change++; change++;
} }
} }
if(r1->active) if(r1->f.active)
break; break;
r1->active = 1; r1->f.active = 1;
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
if(r1->s2 != R) if(r1->f.s2 != nil)
synch(r1->s2, dif); synch((Reg*)r1->f.s2, dif);
} }
} }
...@@ -1057,7 +845,7 @@ paint1(Reg *r, int bn) ...@@ -1057,7 +845,7 @@ paint1(Reg *r, int bn)
for(;;) { for(;;) {
if(!(r->refbehind.b[z] & bb)) if(!(r->refbehind.b[z] & bb))
break; break;
r1 = r->p1; r1 = (Reg*)r->f.p1;
if(r1 == R) if(r1 == R)
break; break;
if(!(r1->refahead.b[z] & bb)) if(!(r1->refahead.b[z] & bb))
...@@ -1068,35 +856,35 @@ paint1(Reg *r, int bn) ...@@ -1068,35 +856,35 @@ paint1(Reg *r, int bn)
} }
if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
change -= CLOAD * r->loop; change -= CLOAD * r->f.loop;
} }
for(;;) { for(;;) {
r->act.b[z] |= bb; r->act.b[z] |= bb;
if(r->use1.b[z] & bb) { if(r->use1.b[z] & bb) {
change += CREF * r->loop; change += CREF * r->f.loop;
} }
if((r->use2.b[z]|r->set.b[z]) & bb) { if((r->use2.b[z]|r->set.b[z]) & bb) {
change += CREF * r->loop; change += CREF * r->f.loop;
} }
if(STORE(r) & r->regdiff.b[z] & bb) { if(STORE(r) & r->regdiff.b[z] & bb) {
change -= CLOAD * r->loop; change -= CLOAD * r->f.loop;
} }
if(r->refbehind.b[z] & bb) if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link) for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb) if(r1->refahead.b[z] & bb)
paint1(r1, bn); paint1(r1, bn);
if(!(r->refahead.b[z] & bb)) if(!(r->refahead.b[z] & bb))
break; break;
r1 = r->s2; r1 = (Reg*)r->f.s2;
if(r1 != R) if(r1 != R)
if(r1->refbehind.b[z] & bb) if(r1->refbehind.b[z] & bb)
paint1(r1, bn); paint1(r1, bn);
r = r->s1; r = (Reg*)r->f.s1;
if(r == R) if(r == R)
break; break;
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
...@@ -1119,7 +907,7 @@ regset(Reg *r, uint32 bb) ...@@ -1119,7 +907,7 @@ regset(Reg *r, uint32 bb)
v.type = b & 0xFFFF? BtoR(b): BtoF(b); v.type = b & 0xFFFF? BtoR(b): BtoF(b);
if(v.type == 0) if(v.type == 0)
fatal("zero v.type for %#ux", b); fatal("zero v.type for %#ux", b);
c = copyu(r->prog, &v, A); c = copyu(r->f.prog, &v, A);
if(c == 3) if(c == 3)
set |= b; set |= b;
bb &= ~b; bb &= ~b;
...@@ -1138,7 +926,7 @@ reguse(Reg *r, uint32 bb) ...@@ -1138,7 +926,7 @@ reguse(Reg *r, uint32 bb)
v = zprog.from; v = zprog.from;
while(b = bb & ~(bb-1)) { while(b = bb & ~(bb-1)) {
v.type = b & 0xFFFF? BtoR(b): BtoF(b); v.type = b & 0xFFFF? BtoR(b): BtoF(b);
c = copyu(r->prog, &v, A); c = copyu(r->f.prog, &v, A);
if(c == 1 || c == 2 || c == 4) if(c == 1 || c == 2 || c == 4)
set |= b; set |= b;
bb &= ~b; bb &= ~b;
...@@ -1161,7 +949,7 @@ paint2(Reg *r, int bn) ...@@ -1161,7 +949,7 @@ paint2(Reg *r, int bn)
for(;;) { for(;;) {
if(!(r->refbehind.b[z] & bb)) if(!(r->refbehind.b[z] & bb))
break; break;
r1 = r->p1; r1 = (Reg*)r->f.p1;
if(r1 == R) if(r1 == R)
break; break;
if(!(r1->refahead.b[z] & bb)) if(!(r1->refahead.b[z] & bb))
...@@ -1176,17 +964,17 @@ paint2(Reg *r, int bn) ...@@ -1176,17 +964,17 @@ paint2(Reg *r, int bn)
vreg |= r->regu; vreg |= r->regu;
if(r->refbehind.b[z] & bb) if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link) for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb) if(r1->refahead.b[z] & bb)
vreg |= paint2(r1, bn); vreg |= paint2(r1, bn);
if(!(r->refahead.b[z] & bb)) if(!(r->refahead.b[z] & bb))
break; break;
r1 = r->s2; r1 = (Reg*)r->f.s2;
if(r1 != R) if(r1 != R)
if(r1->refbehind.b[z] & bb) if(r1->refbehind.b[z] & bb)
vreg |= paint2(r1, bn); vreg |= paint2(r1, bn);
r = r->s1; r = (Reg*)r->f.s1;
if(r == R) if(r == R)
break; break;
if(!(r->act.b[z] & bb)) if(!(r->act.b[z] & bb))
...@@ -1196,7 +984,7 @@ paint2(Reg *r, int bn) ...@@ -1196,7 +984,7 @@ paint2(Reg *r, int bn)
} }
bb = vreg; bb = vreg;
for(; r; r=r->s1) { for(; r; r=(Reg*)r->f.s1) {
x = r->regu & ~bb; x = r->regu & ~bb;
if(x) { if(x) {
vreg |= reguse(r, x); vreg |= reguse(r, x);
...@@ -1221,7 +1009,7 @@ paint3(Reg *r, int bn, int32 rb, int rn) ...@@ -1221,7 +1009,7 @@ paint3(Reg *r, int bn, int32 rb, int rn)
for(;;) { for(;;) {
if(!(r->refbehind.b[z] & bb)) if(!(r->refbehind.b[z] & bb))
break; break;
r1 = r->p1; r1 = (Reg*)r->f.p1;
if(r1 == R) if(r1 == R)
break; break;
if(!(r1->refahead.b[z] & bb)) if(!(r1->refahead.b[z] & bb))
...@@ -1235,7 +1023,7 @@ paint3(Reg *r, int bn, int32 rb, int rn) ...@@ -1235,7 +1023,7 @@ paint3(Reg *r, int bn, int32 rb, int rn)
addmove(r, bn, rn, 0); addmove(r, bn, rn, 0);
for(;;) { for(;;) {
r->act.b[z] |= bb; r->act.b[z] |= bb;
p = r->prog; p = r->f.prog;
if(r->use1.b[z] & bb) { if(r->use1.b[z] & bb) {
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
...@@ -1257,17 +1045,17 @@ paint3(Reg *r, int bn, int32 rb, int rn) ...@@ -1257,17 +1045,17 @@ paint3(Reg *r, int bn, int32 rb, int rn)
r->regu |= rb; r->regu |= rb;
if(r->refbehind.b[z] & bb) if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link) for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb) if(r1->refahead.b[z] & bb)
paint3(r1, bn, rb, rn); paint3(r1, bn, rb, rn);
if(!(r->refahead.b[z] & bb)) if(!(r->refahead.b[z] & bb))
break; break;
r1 = r->s2; r1 = (Reg*)r->f.s2;
if(r1 != R) if(r1 != R)
if(r1->refbehind.b[z] & bb) if(r1->refbehind.b[z] & bb)
paint3(r1, bn, rb, rn); paint3(r1, bn, rb, rn);
r = r->s1; r = (Reg*)r->f.s1;
if(r == R) if(r == R)
break; break;
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
...@@ -1331,12 +1119,15 @@ BtoF(int32 b) ...@@ -1331,12 +1119,15 @@ BtoF(int32 b)
} }
void void
dumpone(Reg *r) dumpone(Flow *f, int isreg)
{ {
int z; int z;
Bits bit; Bits bit;
Reg *r;
print("%d:%P", r->loop, r->prog); print("%d:%P", f->loop, f->prog);
if(isreg) {
r = (Reg*)f;
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = bit.b[z] =
r->set.b[z] | r->set.b[z] |
...@@ -1370,21 +1161,22 @@ dumpone(Reg *r) ...@@ -1370,21 +1161,22 @@ dumpone(Reg *r)
if(bany(&r->act)) if(bany(&r->act))
print(" a:%Q ", r->act); print(" a:%Q ", r->act);
} }
}
print("\n"); print("\n");
} }
void void
dumpit(char *str, Reg *r0) dumpit(char *str, Flow *r0, int isreg)
{ {
Reg *r, *r1; Flow *r, *r1;
print("\n%s\n", str); print("\n%s\n", str);
for(r = r0; r != R; r = r->link) { for(r = r0; r != nil; r = r->link) {
dumpone(r); dumpone(r, isreg);
r1 = r->p2; r1 = r->p2;
if(r1 != R) { if(r1 != nil) {
print(" pred:"); print(" pred:");
for(; r1 != R; r1 = r1->p2link) for(; r1 != nil; r1 = r1->p2link)
print(" %.4ud", r1->prog->loc); print(" %.4ud", r1->prog->loc);
print("\n"); print("\n");
} }
......
...@@ -55,6 +55,7 @@ typedef struct Rgn Rgn; ...@@ -55,6 +55,7 @@ typedef struct Rgn Rgn;
// r->prog->opt points back to r. // r->prog->opt points back to r.
struct Reg struct Reg
{ {
Flow f;
Bits set; // variables written by this instruction. Bits set; // variables written by this instruction.
Bits use1; // variables read by prog->from. Bits use1; // variables read by prog->from.
...@@ -96,7 +97,6 @@ struct Rgn ...@@ -96,7 +97,6 @@ struct Rgn
EXTERN int32 exregoffset; // not set EXTERN int32 exregoffset; // not set
EXTERN int32 exfregoffset; // not set EXTERN int32 exfregoffset; // not set
EXTERN Reg* firstr; EXTERN Reg* firstr;
EXTERN Reg* lastr;
EXTERN Reg zreg; EXTERN Reg zreg;
EXTERN Reg* freer; EXTERN Reg* freer;
EXTERN Reg** rpo2r; EXTERN Reg** rpo2r;
...@@ -141,28 +141,16 @@ void paint1(Reg*, int); ...@@ -141,28 +141,16 @@ void paint1(Reg*, int);
uint32 paint2(Reg*, int); uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int); void paint3(Reg*, int, int32, int);
void addreg(Adr*, int); void addreg(Adr*, int);
void dumpone(Reg*); void dumpone(Flow*, int);
void dumpit(char*, Reg*); void dumpit(char*, Flow*, int);
/* /*
* peep.c * peep.c
*/ */
void peep(void); void peep(Prog*);
void excise(Reg*); void excise(Flow*);
Reg* uniqp(Reg*);
Reg* uniqs(Reg*);
int regtyp(Adr*);
int anyvar(Adr*);
int subprop(Reg*);
int copyprop(Reg*);
int copy1(Adr*, Adr*, Reg*, int);
int copyu(Prog*, Adr*, Adr*); int copyu(Prog*, Adr*, Adr*);
int copyas(Adr*, Adr*);
int copyau(Adr*, Adr*);
int copysub(Adr*, Adr*, Adr*, int);
int copysub1(Prog*, Adr*, Adr*, int);
int32 RtoB(int); int32 RtoB(int);
int32 FtoB(int); int32 FtoB(int);
int BtoR(int32); int BtoR(int32);
......
...@@ -35,8 +35,15 @@ ...@@ -35,8 +35,15 @@
#define REGEXT 0 #define REGEXT 0
static void conprop(Reg *r); static void conprop(Flow *r);
static void elimshortmov(Reg *r); static void elimshortmov(Graph*);
static int regtyp(Adr*);
static int subprop(Flow*);
static int copyprop(Graph*, Flow*);
static int copy1(Adr*, Adr*, Flow*, int);
static int copyas(Adr*, Adr*);
static int copyau(Adr*, Adr*);
static int copysub(Adr*, Adr*, Adr*, int);
// do we need the carry bit // do we need the carry bit
static int static int
...@@ -55,19 +62,19 @@ needc(Prog *p) ...@@ -55,19 +62,19 @@ needc(Prog *p)
return 0; return 0;
} }
static Reg* static Flow*
rnops(Reg *r) rnops(Flow *r)
{ {
Prog *p; Prog *p;
Reg *r1; Flow *r1;
if(r != R) if(r != nil)
for(;;) { for(;;) {
p = r->prog; p = r->prog;
if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE) if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
break; break;
r1 = uniqs(r); r1 = uniqs(r);
if(r1 == R) if(r1 == nil)
break; break;
r = r1; r = r1;
} }
...@@ -75,49 +82,25 @@ rnops(Reg *r) ...@@ -75,49 +82,25 @@ rnops(Reg *r)
} }
void void
peep(void) peep(Prog *firstp)
{ {
Reg *r, *r1, *r2; Flow *r, *r1;
Graph *g;
Prog *p, *p1; Prog *p, *p1;
int t; int t;
ProgInfo info;
/*
* complete R structure
*/
for(r=firstr; r!=R; r=r1) {
r1 = r->link;
if(r1 == R)
break;
for(p = r->prog->link; p != r1->prog; p = p->link) {
proginfo(&info, p);
if(info.flags & Skip)
continue;
r2 = rega(); g = flowstart(firstp, sizeof(Flow));
r->link = r2; if(g == nil)
r2->link = r1; return;
r2->prog = p;
p->opt = r2;
r2->p1 = r;
r->s1 = r2;
r2->s1 = r1;
r1->p1 = r2;
r = r2;
}
}
// byte, word arithmetic elimination. // byte, word arithmetic elimination.
elimshortmov(r); elimshortmov(g);
// constant propagation // constant propagation
// find MOV $con,R followed by // find MOV $con,nil followed by
// another MOV $con,R without // another MOV $con,nil without
// setting R in the interim // setting nil in the interim
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
switch(p->as) { switch(p->as) {
case ALEAL: case ALEAL:
...@@ -141,10 +124,10 @@ peep(void) ...@@ -141,10 +124,10 @@ peep(void)
loop1: loop1:
if(debug['P'] && debug['v']) if(debug['P'] && debug['v'])
dumpit("loop1", firstr); dumpit("loop1", g->start, 0);
t = 0; t = 0;
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
switch(p->as) { switch(p->as) {
case AMOVL: case AMOVL:
...@@ -152,11 +135,11 @@ loop1: ...@@ -152,11 +135,11 @@ loop1:
case AMOVSD: case AMOVSD:
if(regtyp(&p->to)) if(regtyp(&p->to))
if(regtyp(&p->from)) { if(regtyp(&p->from)) {
if(copyprop(r)) { if(copyprop(g, r)) {
excise(r); excise(r);
t++; t++;
} else } else
if(subprop(r) && copyprop(r)) { if(subprop(r) && copyprop(g, r)) {
excise(r); excise(r);
t++; t++;
} }
...@@ -169,7 +152,7 @@ loop1: ...@@ -169,7 +152,7 @@ loop1:
case AMOVWLSX: case AMOVWLSX:
if(regtyp(&p->to)) { if(regtyp(&p->to)) {
r1 = rnops(uniqs(r)); r1 = rnops(uniqs(r));
if(r1 != R) { if(r1 != nil) {
p1 = r1->prog; p1 = r1->prog;
if(p->as == p1->as && p->to.type == p1->from.type){ if(p->as == p1->as && p->to.type == p1->from.type){
p1->as = AMOVL; p1->as = AMOVL;
...@@ -232,7 +215,7 @@ loop1: ...@@ -232,7 +215,7 @@ loop1:
// can be replaced by MOVAPD, which moves the pair of float64s // can be replaced by MOVAPD, which moves the pair of float64s
// instead of just the lower one. We only use the lower one, but // instead of just the lower one. We only use the lower one, but
// the processor can do better if we do moves using both. // the processor can do better if we do moves using both.
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
if(p->as == AMOVSD) if(p->as == AMOVSD)
if(regtyp(&p->from)) if(regtyp(&p->from))
...@@ -242,7 +225,7 @@ loop1: ...@@ -242,7 +225,7 @@ loop1:
} }
void void
excise(Reg *r) excise(Flow *r)
{ {
Prog *p; Prog *p;
...@@ -257,39 +240,7 @@ excise(Reg *r) ...@@ -257,39 +240,7 @@ excise(Reg *r)
ostats.ndelmov++; ostats.ndelmov++;
} }
Reg* static int
uniqp(Reg *r)
{
Reg *r1;
r1 = r->p1;
if(r1 == R) {
r1 = r->p2;
if(r1 == R || r1->p2link != R)
return R;
} else
if(r->p2 != R)
return R;
return r1;
}
Reg*
uniqs(Reg *r)
{
Reg *r1;
r1 = r->s1;
if(r1 == R) {
r1 = r->s2;
if(r1 == R)
return R;
} else
if(r->s2 != R)
return R;
return r1;
}
int
regtyp(Adr *a) regtyp(Adr *a)
{ {
int t; int t;
...@@ -310,11 +261,12 @@ regtyp(Adr *a) ...@@ -310,11 +261,12 @@ regtyp(Adr *a)
// can smash the entire 64-bit register without // can smash the entire 64-bit register without
// causing any trouble. // causing any trouble.
static void static void
elimshortmov(Reg *r) elimshortmov(Graph *g)
{ {
Prog *p; Prog *p;
Flow *r;
for(r=firstr; r!=R; r=r->link) { for(r=g->start; r!=nil; r=r->link) {
p = r->prog; p = r->prog;
if(regtyp(&p->to)) { if(regtyp(&p->to)) {
switch(p->as) { switch(p->as) {
...@@ -409,12 +361,12 @@ elimshortmov(Reg *r) ...@@ -409,12 +361,12 @@ elimshortmov(Reg *r)
* hopefully, then the former or latter MOV * hopefully, then the former or latter MOV
* will be eliminated by copy propagation. * will be eliminated by copy propagation.
*/ */
int static int
subprop(Reg *r0) subprop(Flow *r0)
{ {
Prog *p; Prog *p;
Adr *v1, *v2; Adr *v1, *v2;
Reg *r; Flow *r;
int t; int t;
ProgInfo info; ProgInfo info;
...@@ -425,10 +377,10 @@ subprop(Reg *r0) ...@@ -425,10 +377,10 @@ subprop(Reg *r0)
v2 = &p->to; v2 = &p->to;
if(!regtyp(v2)) if(!regtyp(v2))
return 0; return 0;
for(r=uniqp(r0); r!=R; r=uniqp(r)) { for(r=uniqp(r0); r!=nil; r=uniqp(r)) {
if(debug['P'] && debug['v']) if(debug['P'] && debug['v'])
print("\t? %P\n", r->prog); print("\t? %P\n", r->prog);
if(uniqs(r) == R) if(uniqs(r) == nil)
break; break;
p = r->prog; p = r->prog;
proginfo(&info, p); proginfo(&info, p);
...@@ -483,25 +435,25 @@ gotit: ...@@ -483,25 +435,25 @@ gotit:
* set v1 F=1 * set v1 F=1
* set v2 return success * set v2 return success
*/ */
int static int
copyprop(Reg *r0) copyprop(Graph *g, Flow *r0)
{ {
Prog *p; Prog *p;
Adr *v1, *v2; Adr *v1, *v2;
Reg *r; Flow *r;
p = r0->prog; p = r0->prog;
v1 = &p->from; v1 = &p->from;
v2 = &p->to; v2 = &p->to;
if(copyas(v1, v2)) if(copyas(v1, v2))
return 1; return 1;
for(r=firstr; r!=R; r=r->link) for(r=g->start; r!=nil; r=r->link)
r->active = 0; r->active = 0;
return copy1(v1, v2, r0->s1, 0); return copy1(v1, v2, r0->s1, 0);
} }
int static int
copy1(Adr *v1, Adr *v2, Reg *r, int f) copy1(Adr *v1, Adr *v2, Flow *r, int f)
{ {
int t; int t;
Prog *p; Prog *p;
...@@ -514,11 +466,11 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f) ...@@ -514,11 +466,11 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f)
r->active = 1; r->active = 1;
if(debug['P']) if(debug['P'])
print("copy %D->%D f=%d\n", v1, v2, f); print("copy %D->%D f=%d\n", v1, v2, f);
for(; r != R; r = r->s1) { for(; r != nil; r = r->s1) {
p = r->prog; p = r->prog;
if(debug['P']) if(debug['P'])
print("%P", p); print("%P", p);
if(!f && uniqp(r) == R) { if(!f && uniqp(r) == nil) {
f = 1; f = 1;
if(debug['P']) if(debug['P'])
print("; merge; f=%d", f); print("; merge; f=%d", f);
...@@ -672,7 +624,7 @@ copyu(Prog *p, Adr *v, Adr *s) ...@@ -672,7 +624,7 @@ copyu(Prog *p, Adr *v, Adr *s)
* could be set/use depending on * could be set/use depending on
* semantics * semantics
*/ */
int static int
copyas(Adr *a, Adr *v) copyas(Adr *a, Adr *v)
{ {
if(a->type != v->type) if(a->type != v->type)
...@@ -688,7 +640,7 @@ copyas(Adr *a, Adr *v) ...@@ -688,7 +640,7 @@ copyas(Adr *a, Adr *v)
/* /*
* either direct or indirect * either direct or indirect
*/ */
int static int
copyau(Adr *a, Adr *v) copyau(Adr *a, Adr *v)
{ {
...@@ -707,7 +659,7 @@ copyau(Adr *a, Adr *v) ...@@ -707,7 +659,7 @@ copyau(Adr *a, Adr *v)
* substitute s for v in a * substitute s for v in a
* return failure to substitute * return failure to substitute
*/ */
int static int
copysub(Adr *a, Adr *v, Adr *s, int f) copysub(Adr *a, Adr *v, Adr *s, int f)
{ {
int t; int t;
...@@ -740,9 +692,9 @@ copysub(Adr *a, Adr *v, Adr *s, int f) ...@@ -740,9 +692,9 @@ copysub(Adr *a, Adr *v, Adr *s, int f)
} }
static void static void
conprop(Reg *r0) conprop(Flow *r0)
{ {
Reg *r; Flow *r;
Prog *p, *p0; Prog *p, *p0;
int t; int t;
Adr *v0; Adr *v0;
...@@ -753,9 +705,9 @@ conprop(Reg *r0) ...@@ -753,9 +705,9 @@ conprop(Reg *r0)
loop: loop:
r = uniqs(r); r = uniqs(r);
if(r == R || r == r0) if(r == nil || r == r0)
return; return;
if(uniqp(r) == R) if(uniqp(r) == nil)
return; return;
p = r->prog; p = r->prog;
......
...@@ -40,21 +40,6 @@ static int first = 1; ...@@ -40,21 +40,6 @@ static int first = 1;
static void fixtemp(Prog*); static void fixtemp(Prog*);
Reg*
rega(void)
{
Reg *r;
r = freer;
if(r == R) {
r = mal(sizeof(*r));
} else
freer = r->link;
*r = zreg;
return r;
}
int int
rcmp(const void *a1, const void *a2) rcmp(const void *a1, const void *a2)
{ {
...@@ -129,8 +114,9 @@ regopt(Prog *firstp) ...@@ -129,8 +114,9 @@ regopt(Prog *firstp)
{ {
Reg *r, *r1; Reg *r, *r1;
Prog *p; Prog *p;
ProgInfo info, info2; Graph *g;
int i, z, nr; ProgInfo info;
int i, z;
uint32 vreg; uint32 vreg;
Bits bit; Bits bit;
...@@ -143,19 +129,6 @@ regopt(Prog *firstp) ...@@ -143,19 +129,6 @@ regopt(Prog *firstp)
fixtemp(firstp); fixtemp(firstp);
fixjmp(firstp); fixjmp(firstp);
// count instructions
nr = 0;
for(p=firstp; p!=P; p=p->link)
nr++;
// if too big dont bother
if(nr >= 10000) {
// print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
return;
}
firstr = R;
lastr = R;
/* /*
* control flow is more complicated in generated go code * control flow is more complicated in generated go code
* than in generated c code. define pseudo-variables for * than in generated c code. define pseudo-variables for
...@@ -187,33 +160,14 @@ regopt(Prog *firstp) ...@@ -187,33 +160,14 @@ regopt(Prog *firstp)
* allocate pcs * allocate pcs
* find use and set of variables * find use and set of variables
*/ */
nr = 0; g = flowstart(firstp, sizeof(Reg));
for(p=firstp; p!=P; p=p->link) { if(g == nil)
proginfo(&info, p); return;
if(info.flags & Skip) firstr = (Reg*)g->start;
continue;
r = rega();
nr++;
if(firstr == R) {
firstr = r;
lastr = r;
} else {
lastr->link = r;
r->p1 = lastr;
lastr->s1 = r;
lastr = r;
}
r->prog = p;
p->opt = r;
r1 = r->p1; for(r = firstr; r != R; r = (Reg*)r->f.link) {
if(r1 != R) { p = r->f.prog;
proginfo(&info2, r1->prog); proginfo(&info, p);
if(info2.flags & Break) {
r->p1 = R;
r1->s1 = R;
}
}
// Avoid making variables for direct-called functions. // Avoid making variables for direct-called functions.
if(p->as == ACALL && p->to.type == D_EXTERN) if(p->as == ACALL && p->to.type == D_EXTERN)
...@@ -263,45 +217,19 @@ regopt(Prog *firstp) ...@@ -263,45 +217,19 @@ regopt(Prog *firstp)
} }
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass1", firstr); dumpit("pass1", &firstr->f, 1);
/* /*
* pass 2 * pass 2
* turn branch references to pointers
* build back pointers
*/
for(r=firstr; r!=R; r=r->link) {
p = r->prog;
if(p->to.type == D_BRANCH) {
if(p->to.u.branch == P)
fatal("pnil %P", p);
r1 = p->to.u.branch->opt;
if(r1 == R)
fatal("rnil %P", p);
if(r1 == r) {
//fatal("ref to self %P", p);
continue;
}
r->s2 = r1;
r->p2link = r1->p2;
r1->p2 = r;
}
}
if(debug['R'] && debug['v'])
dumpit("pass2", firstr);
/*
* pass 2.5
* find looping structure * find looping structure
*/ */
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->active = 0; r->f.active = 0;
change = 0; change = 0;
loopit(firstr, nr); flowrpo(g);
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass2.5", firstr); dumpit("pass2", &firstr->f, 1);
/* /*
* pass 3 * pass 3
...@@ -310,17 +238,17 @@ regopt(Prog *firstp) ...@@ -310,17 +238,17 @@ regopt(Prog *firstp)
*/ */
loop1: loop1:
change = 0; change = 0;
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->active = 0; r->f.active = 0;
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
if(r->prog->as == ARET) if(r->f.prog->as == ARET)
prop(r, zbits, zbits); prop(r, zbits, zbits);
loop11: loop11:
/* pick up unreachable code */ /* pick up unreachable code */
i = 0; i = 0;
for(r = firstr; r != R; r = r1) { for(r = firstr; r != R; r = r1) {
r1 = r->link; r1 = (Reg*)r->f.link;
if(r1 && r1->active && !r->active) { if(r1 && r1->f.active && !r->f.active) {
prop(r, zbits, zbits); prop(r, zbits, zbits);
i = 1; i = 1;
} }
...@@ -331,7 +259,7 @@ loop11: ...@@ -331,7 +259,7 @@ loop11:
goto loop1; goto loop1;
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass3", firstr); dumpit("pass3", &firstr->f, 1);
/* /*
* pass 4 * pass 4
...@@ -340,20 +268,20 @@ loop11: ...@@ -340,20 +268,20 @@ loop11:
*/ */
loop2: loop2:
change = 0; change = 0;
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->active = 0; r->f.active = 0;
synch(firstr, zbits); synch(firstr, zbits);
if(change) if(change)
goto loop2; goto loop2;
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass4", firstr); dumpit("pass4", &firstr->f, 1);
/* /*
* pass 4.5 * pass 4.5
* move register pseudo-variables into regu. * move register pseudo-variables into regu.
*/ */
for(r = firstr; r != R; r = r->link) { for(r = firstr; r != R; r = (Reg*)r->f.link) {
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
r->set.b[0] &= ~REGBITS; r->set.b[0] &= ~REGBITS;
...@@ -377,26 +305,26 @@ loop2: ...@@ -377,26 +305,26 @@ loop2:
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
if(bany(&bit) && !r->refset) { if(bany(&bit) && !r->f.refset) {
// should never happen - all variables are preset // should never happen - all variables are preset
if(debug['w']) if(debug['w'])
print("%L: used and not set: %Q\n", r->prog->lineno, bit); print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
r->refset = 1; r->f.refset = 1;
} }
} }
for(r = firstr; r != R; r = r->link) for(r = firstr; r != R; r = (Reg*)r->f.link)
r->act = zbits; r->act = zbits;
rgp = region; rgp = region;
nregion = 0; nregion = 0;
for(r = firstr; r != R; r = r->link) { for(r = firstr; r != R; r = (Reg*)r->f.link) {
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = r->set.b[z] & bit.b[z] = r->set.b[z] &
~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
if(bany(&bit) && !r->refset) { if(bany(&bit) && !r->f.refset) {
if(debug['w']) if(debug['w'])
print("%L: set and not used: %Q\n", r->prog->lineno, bit); print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
r->refset = 1; r->f.refset = 1;
excise(r); excise(&r->f);
} }
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
...@@ -438,19 +366,23 @@ brk: ...@@ -438,19 +366,23 @@ brk:
} }
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
dumpit("pass6", firstr); dumpit("pass6", &firstr->f, 1);
/*
* free aux structures. peep allocates new ones.
*/
flowend(g);
firstr = R;
/* /*
* pass 7 * pass 7
* peep-hole on basic block * peep-hole on basic block
*/ */
if(!debug['R'] || debug['P']) { if(!debug['R'] || debug['P'])
peep(); peep(firstp);
}
/* /*
* eliminate nops * eliminate nops
* free aux structures
*/ */
for(p=firstp; p!=P; p=p->link) { for(p=firstp; p!=P; p=p->link) {
while(p->link != P && p->link->as == ANOP) while(p->link != P && p->link->as == ANOP)
...@@ -468,11 +400,6 @@ brk: ...@@ -468,11 +400,6 @@ brk:
fatal("invalid use of %R with GO386=387: %P", p->to.type, p); fatal("invalid use of %R with GO386=387: %P", p->to.type, p);
} }
if(lastr != R) {
lastr->link = freer;
freer = firstr;
}
if(debug['R']) { if(debug['R']) {
if(ostats.ncvtreg || if(ostats.ncvtreg ||
ostats.nspill || ostats.nspill ||
...@@ -515,7 +442,7 @@ addmove(Reg *r, int bn, int rn, int f) ...@@ -515,7 +442,7 @@ addmove(Reg *r, int bn, int rn, int f)
clearp(p1); clearp(p1);
p1->loc = 9999; p1->loc = 9999;
p = r->prog; p = r->f.prog;
p1->link = p->link; p1->link = p->link;
p->link = p1; p->link = p1;
p1->lineno = p->lineno; p1->lineno = p->lineno;
...@@ -732,7 +659,7 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -732,7 +659,7 @@ prop(Reg *r, Bits ref, Bits cal)
Reg *r1, *r2; Reg *r1, *r2;
int z; int z;
for(r1 = r; r1 != R; r1 = r1->p1) { for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
ref.b[z] |= r1->refahead.b[z]; ref.b[z] |= r1->refahead.b[z];
if(ref.b[z] != r1->refahead.b[z]) { if(ref.b[z] != r1->refahead.b[z]) {
...@@ -745,9 +672,9 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -745,9 +672,9 @@ prop(Reg *r, Bits ref, Bits cal)
change++; change++;
} }
} }
switch(r1->prog->as) { switch(r1->f.prog->as) {
case ACALL: case ACALL:
if(noreturn(r1->prog)) if(noreturn(r1->f.prog))
break; break;
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
cal.b[z] |= ref.b[z] | externs.b[z]; cal.b[z] |= ref.b[z] | externs.b[z];
...@@ -787,159 +714,22 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -787,159 +714,22 @@ prop(Reg *r, Bits ref, Bits cal)
r1->refbehind.b[z] = ref.b[z]; r1->refbehind.b[z] = ref.b[z];
r1->calbehind.b[z] = cal.b[z]; r1->calbehind.b[z] = cal.b[z];
} }
if(r1->active) if(r1->f.active)
break; break;
r1->active = 1; r1->f.active = 1;
} }
for(; r != r1; r = r->p1) for(; r != r1; r = (Reg*)r->f.p1)
for(r2 = r->p2; r2 != R; r2 = r2->p2link) for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
prop(r2, r->refbehind, r->calbehind); prop(r2, r->refbehind, r->calbehind);
} }
/*
* find looping structure
*
* 1) find reverse postordering
* 2) find approximate dominators,
* the actual dominators if the flow graph is reducible
* otherwise, dominators plus some other non-dominators.
* See Matthew S. Hecht and Jeffrey D. Ullman,
* "Analysis of a Simple Algorithm for Global Data Flow Problems",
* Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
* Oct. 1-3, 1973, pp. 207-217.
* 3) find all nodes with a predecessor dominated by the current node.
* such a node is a loop head.
* recursively, all preds with a greater rpo number are in the loop
*/
int32
postorder(Reg *r, Reg **rpo2r, int32 n)
{
Reg *r1;
r->rpo = 1;
r1 = r->s1;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
r1 = r->s2;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
rpo2r[n] = r;
n++;
return n;
}
int32
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
{
int32 t;
if(rpo1 == -1)
return rpo2;
while(rpo1 != rpo2){
if(rpo1 > rpo2){
t = rpo2;
rpo2 = rpo1;
rpo1 = t;
}
while(rpo1 < rpo2){
t = idom[rpo2];
if(t >= rpo2)
fatal("bad idom");
rpo2 = t;
}
}
return rpo1;
}
int
doms(int32 *idom, int32 r, int32 s)
{
while(s > r)
s = idom[s];
return s == r;
}
int
loophead(int32 *idom, Reg *r)
{
int32 src;
src = r->rpo;
if(r->p1 != R && doms(idom, src, r->p1->rpo))
return 1;
for(r = r->p2; r != R; r = r->p2link)
if(doms(idom, src, r->rpo))
return 1;
return 0;
}
void
loopmark(Reg **rpo2r, int32 head, Reg *r)
{
if(r->rpo < head || r->active == head)
return;
r->active = head;
r->loop += LOOP;
if(r->p1 != R)
loopmark(rpo2r, head, r->p1);
for(r = r->p2; r != R; r = r->p2link)
loopmark(rpo2r, head, r);
}
void
loopit(Reg *r, int32 nr)
{
Reg *r1;
int32 i, d, me;
if(nr > maxnr) {
rpo2r = mal(nr * sizeof(Reg*));
idom = mal(nr * sizeof(int32));
maxnr = nr;
}
d = postorder(r, rpo2r, 0);
if(d > nr)
fatal("too many reg nodes %d %d", d, nr);
nr = d;
for(i = 0; i < nr / 2; i++) {
r1 = rpo2r[i];
rpo2r[i] = rpo2r[nr - 1 - i];
rpo2r[nr - 1 - i] = r1;
}
for(i = 0; i < nr; i++)
rpo2r[i]->rpo = i;
idom[0] = 0;
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
me = r1->rpo;
d = -1;
// rpo2r[r->rpo] == r protects against considering dead code,
// which has r->rpo == 0.
if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
d = r1->p1->rpo;
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
d = rpolca(idom, d, r1->rpo);
idom[i] = d;
}
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
r1->loop++;
if(r1->p2 != R && loophead(idom, r1))
loopmark(rpo2r, i, r1);
}
}
void void
synch(Reg *r, Bits dif) synch(Reg *r, Bits dif)
{ {
Reg *r1; Reg *r1;
int z; int z;
for(r1 = r; r1 != R; r1 = r1->s1) { for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
dif.b[z] = (dif.b[z] & dif.b[z] = (dif.b[z] &
~(~r1->refbehind.b[z] & r1->refahead.b[z])) | ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
...@@ -949,13 +739,13 @@ synch(Reg *r, Bits dif) ...@@ -949,13 +739,13 @@ synch(Reg *r, Bits dif)
change++; change++;
} }
} }
if(r1->active) if(r1->f.active)
break; break;
r1->active = 1; r1->f.active = 1;
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
if(r1->s2 != R) if((Reg*)r1->f.s2 != R)
synch(r1->s2, dif); synch((Reg*)r1->f.s2, dif);
} }
} }
...@@ -1021,7 +811,7 @@ paint1(Reg *r, int bn) ...@@ -1021,7 +811,7 @@ paint1(Reg *r, int bn)
for(;;) { for(;;) {
if(!(r->refbehind.b[z] & bb)) if(!(r->refbehind.b[z] & bb))
break; break;
r1 = r->p1; r1 = (Reg*)r->f.p1;
if(r1 == R) if(r1 == R)
break; break;
if(!(r1->refahead.b[z] & bb)) if(!(r1->refahead.b[z] & bb))
...@@ -1032,45 +822,45 @@ paint1(Reg *r, int bn) ...@@ -1032,45 +822,45 @@ paint1(Reg *r, int bn)
} }
if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
change -= CLOAD * r->loop; change -= CLOAD * r->f.loop;
} }
for(;;) { for(;;) {
r->act.b[z] |= bb; r->act.b[z] |= bb;
p = r->prog; p = r->f.prog;
if(r->use1.b[z] & bb) { if(r->use1.b[z] & bb) {
change += CREF * r->loop; change += CREF * r->f.loop;
if(p->as == AFMOVL || p->as == AFMOVW) if(p->as == AFMOVL || p->as == AFMOVW)
if(BtoR(bb) != D_F0) if(BtoR(bb) != D_F0)
change = -CINF; change = -CINF;
} }
if((r->use2.b[z]|r->set.b[z]) & bb) { if((r->use2.b[z]|r->set.b[z]) & bb) {
change += CREF * r->loop; change += CREF * r->f.loop;
if(p->as == AFMOVL || p->as == AFMOVW) if(p->as == AFMOVL || p->as == AFMOVW)
if(BtoR(bb) != D_F0) if(BtoR(bb) != D_F0)
change = -CINF; change = -CINF;
} }
if(STORE(r) & r->regdiff.b[z] & bb) { if(STORE(r) & r->regdiff.b[z] & bb) {
change -= CLOAD * r->loop; change -= CLOAD * r->f.loop;
if(p->as == AFMOVL || p->as == AFMOVW) if(p->as == AFMOVL || p->as == AFMOVW)
if(BtoR(bb) != D_F0) if(BtoR(bb) != D_F0)
change = -CINF; change = -CINF;
} }
if(r->refbehind.b[z] & bb) if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link) for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb) if(r1->refahead.b[z] & bb)
paint1(r1, bn); paint1(r1, bn);
if(!(r->refahead.b[z] & bb)) if(!(r->refahead.b[z] & bb))
break; break;
r1 = r->s2; r1 = (Reg*)r->f.s2;
if(r1 != R) if(r1 != R)
if(r1->refbehind.b[z] & bb) if(r1->refbehind.b[z] & bb)
paint1(r1, bn); paint1(r1, bn);
r = r->s1; r = (Reg*)r->f.s1;
if(r == R) if(r == R)
break; break;
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
...@@ -1091,7 +881,7 @@ regset(Reg *r, uint32 bb) ...@@ -1091,7 +881,7 @@ regset(Reg *r, uint32 bb)
v = zprog.from; v = zprog.from;
while(b = bb & ~(bb-1)) { while(b = bb & ~(bb-1)) {
v.type = b & 0xFF ? BtoR(b): BtoF(b); v.type = b & 0xFF ? BtoR(b): BtoF(b);
c = copyu(r->prog, &v, A); c = copyu(r->f.prog, &v, A);
if(c == 3) if(c == 3)
set |= b; set |= b;
bb &= ~b; bb &= ~b;
...@@ -1110,7 +900,7 @@ reguse(Reg *r, uint32 bb) ...@@ -1110,7 +900,7 @@ reguse(Reg *r, uint32 bb)
v = zprog.from; v = zprog.from;
while(b = bb & ~(bb-1)) { while(b = bb & ~(bb-1)) {
v.type = b & 0xFF ? BtoR(b): BtoF(b); v.type = b & 0xFF ? BtoR(b): BtoF(b);
c = copyu(r->prog, &v, A); c = copyu(r->f.prog, &v, A);
if(c == 1 || c == 2 || c == 4) if(c == 1 || c == 2 || c == 4)
set |= b; set |= b;
bb &= ~b; bb &= ~b;
...@@ -1133,7 +923,7 @@ paint2(Reg *r, int bn) ...@@ -1133,7 +923,7 @@ paint2(Reg *r, int bn)
for(;;) { for(;;) {
if(!(r->refbehind.b[z] & bb)) if(!(r->refbehind.b[z] & bb))
break; break;
r1 = r->p1; r1 = (Reg*)r->f.p1;
if(r1 == R) if(r1 == R)
break; break;
if(!(r1->refahead.b[z] & bb)) if(!(r1->refahead.b[z] & bb))
...@@ -1148,17 +938,17 @@ paint2(Reg *r, int bn) ...@@ -1148,17 +938,17 @@ paint2(Reg *r, int bn)
vreg |= r->regu; vreg |= r->regu;
if(r->refbehind.b[z] & bb) if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link) for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb) if(r1->refahead.b[z] & bb)
vreg |= paint2(r1, bn); vreg |= paint2(r1, bn);
if(!(r->refahead.b[z] & bb)) if(!(r->refahead.b[z] & bb))
break; break;
r1 = r->s2; r1 = (Reg*)r->f.s2;
if(r1 != R) if(r1 != R)
if(r1->refbehind.b[z] & bb) if(r1->refbehind.b[z] & bb)
vreg |= paint2(r1, bn); vreg |= paint2(r1, bn);
r = r->s1; r = (Reg*)r->f.s1;
if(r == R) if(r == R)
break; break;
if(!(r->act.b[z] & bb)) if(!(r->act.b[z] & bb))
...@@ -1168,7 +958,7 @@ paint2(Reg *r, int bn) ...@@ -1168,7 +958,7 @@ paint2(Reg *r, int bn)
} }
bb = vreg; bb = vreg;
for(; r; r=r->s1) { for(; r; r=(Reg*)r->f.s1) {
x = r->regu & ~bb; x = r->regu & ~bb;
if(x) { if(x) {
vreg |= reguse(r, x); vreg |= reguse(r, x);
...@@ -1193,7 +983,7 @@ paint3(Reg *r, int bn, int32 rb, int rn) ...@@ -1193,7 +983,7 @@ paint3(Reg *r, int bn, int32 rb, int rn)
for(;;) { for(;;) {
if(!(r->refbehind.b[z] & bb)) if(!(r->refbehind.b[z] & bb))
break; break;
r1 = r->p1; r1 = (Reg*)r->f.p1;
if(r1 == R) if(r1 == R)
break; break;
if(!(r1->refahead.b[z] & bb)) if(!(r1->refahead.b[z] & bb))
...@@ -1207,7 +997,7 @@ paint3(Reg *r, int bn, int32 rb, int rn) ...@@ -1207,7 +997,7 @@ paint3(Reg *r, int bn, int32 rb, int rn)
addmove(r, bn, rn, 0); addmove(r, bn, rn, 0);
for(;;) { for(;;) {
r->act.b[z] |= bb; r->act.b[z] |= bb;
p = r->prog; p = r->f.prog;
if(r->use1.b[z] & bb) { if(r->use1.b[z] & bb) {
if(debug['R'] && debug['v']) if(debug['R'] && debug['v'])
...@@ -1229,17 +1019,17 @@ paint3(Reg *r, int bn, int32 rb, int rn) ...@@ -1229,17 +1019,17 @@ paint3(Reg *r, int bn, int32 rb, int rn)
r->regu |= rb; r->regu |= rb;
if(r->refbehind.b[z] & bb) if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link) for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
if(r1->refahead.b[z] & bb) if(r1->refahead.b[z] & bb)
paint3(r1, bn, rb, rn); paint3(r1, bn, rb, rn);
if(!(r->refahead.b[z] & bb)) if(!(r->refahead.b[z] & bb))
break; break;
r1 = r->s2; r1 = (Reg*)r->f.s2;
if(r1 != R) if(r1 != R)
if(r1->refbehind.b[z] & bb) if(r1->refbehind.b[z] & bb)
paint3(r1, bn, rb, rn); paint3(r1, bn, rb, rn);
r = r->s1; r = (Reg*)r->f.s1;
if(r == R) if(r == R)
break; break;
if(r->act.b[z] & bb) if(r->act.b[z] & bb)
...@@ -1297,12 +1087,15 @@ BtoF(int32 b) ...@@ -1297,12 +1087,15 @@ BtoF(int32 b)
} }
void void
dumpone(Reg *r) dumpone(Flow *f, int isreg)
{ {
int z; int z;
Bits bit; Bits bit;
Reg *r;
print("%d:%P", r->loop, r->prog); print("%d:%P", f->loop, f->prog);
if(isreg) {
r = (Reg*)f;
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
bit.b[z] = bit.b[z] =
r->set.b[z] | r->set.b[z] |
...@@ -1336,26 +1129,27 @@ dumpone(Reg *r) ...@@ -1336,26 +1129,27 @@ dumpone(Reg *r)
if(bany(&r->act)) if(bany(&r->act))
print(" a:%Q ", r->act); print(" a:%Q ", r->act);
} }
}
print("\n"); print("\n");
} }
void void
dumpit(char *str, Reg *r0) dumpit(char *str, Flow *r0, int isreg)
{ {
Reg *r, *r1; Flow *r, *r1;
print("\n%s\n", str); print("\n%s\n", str);
for(r = r0; r != R; r = r->link) { for(r = r0; r != nil; r = r->link) {
dumpone(r); dumpone(r, isreg);
r1 = r->p2; r1 = r->p2;
if(r1 != R) { if(r1 != nil) {
print(" pred:"); print(" pred:");
for(; r1 != R; r1 = r1->p2link) for(; r1 != nil; r1 = r->p2link)
print(" %.4ud", r1->prog->loc); print(" %.4ud", r1->prog->loc);
print("\n"); print("\n");
} }
// r1 = r->s1; // r1 = r->s1;
// if(r1 != R) { // if(r1 != nil) {
// print(" succ:"); // print(" succ:");
// for(; r1 != R; r1 = r1->s1) // for(; r1 != R; r1 = r1->s1)
// print(" %.4ud", r1->prog->loc); // print(" %.4ud", r1->prog->loc);
......
...@@ -181,3 +181,284 @@ fixjmp(Prog *firstp) ...@@ -181,3 +181,284 @@ fixjmp(Prog *firstp)
print("\n"); print("\n");
} }
} }
// Control flow analysis. The Flow structures hold predecessor and successor
// information as well as basic loop analysis.
//
// graph = flowstart(firstp, sizeof(Flow));
// ... use flow graph ...
// flowend(graph); // free graph
//
// Typical uses of the flow graph are to iterate over all the flow-relevant instructions:
//
// for(f = graph->start; f != nil; f = f->link)
//
// or, given an instruction f, to iterate over all the predecessors, which is
// f->p1 and this list:
//
// for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
//
// Often the Flow struct is embedded as the first field inside a larger struct S.
// In that case casts are needed to convert Flow* to S* in many places but the
// idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart.
Graph*
flowstart(Prog *firstp, int size)
{
int nf;
Flow *f, *f1, *start, *last;
Graph *graph;
Prog *p;
ProgInfo info;
// Count and mark instructions to annotate.
nf = 0;
for(p = firstp; p != P; p = p->link) {
p->opt = nil; // should be already, but just in case
proginfo(&info, p);
if(info.flags & Skip)
continue;
p->opt = (void*)1;
nf++;
}
if(nf == 0)
return nil;
if(nf >= 20000) {
// fatal("%S is too big (%d instructions)", curfn->nname->sym, nf);
return nil;
}
// Allocate annotations and assign to instructions.
graph = calloc(sizeof *graph + size*nf, 1);
if(graph == nil)
fatal("out of memory");
start = (Flow*)(graph+1);
last = nil;
f = start;
for(p = firstp; p != P; p = p->link) {
if(p->opt == nil)
continue;
p->opt = f;
f->prog = p;
if(last)
last->link = f;
last = f;
f = (Flow*)((uchar*)f + size);
}
// Fill in pred/succ information.
for(f = start; f != nil; f = f->link) {
p = f->prog;
proginfo(&info, p);
if(!(info.flags & Break)) {
f1 = f->link;
f->s1 = f1;
f1->p1 = f;
}
if(p->to.type == D_BRANCH) {
if(p->to.u.branch == P)
fatal("pnil %P", p);
f1 = p->to.u.branch->opt;
if(f1 == nil)
fatal("fnil %P / %P", p, p->to.u.branch);
if(f1 == f) {
//fatal("self loop %P", p);
continue;
}
f->s2 = f1;
f->p2link = f1->p2;
f1->p2 = f;
}
}
graph->start = start;
graph->num = nf;
return graph;
}
void
flowend(Graph *graph)
{
Flow *f;
for(f = graph->start; f != nil; f = f->link)
f->prog->opt = nil;
free(graph);
}
/*
* find looping structure
*
* 1) find reverse postordering
* 2) find approximate dominators,
* the actual dominators if the flow graph is reducible
* otherwise, dominators plus some other non-dominators.
* See Matthew S. Hecht and Jeffrey D. Ullman,
* "Analysis of a Simple Algorithm for Global Data Flow Problems",
* Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
* Oct. 1-3, 1973, pp. 207-217.
* 3) find all nodes with a predecessor dominated by the current node.
* such a node is a loop head.
* recursively, all preds with a greater rpo number are in the loop
*/
static int32
postorder(Flow *r, Flow **rpo2r, int32 n)
{
Flow *r1;
r->rpo = 1;
r1 = r->s1;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
r1 = r->s2;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
rpo2r[n] = r;
n++;
return n;
}
static int32
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
{
int32 t;
if(rpo1 == -1)
return rpo2;
while(rpo1 != rpo2){
if(rpo1 > rpo2){
t = rpo2;
rpo2 = rpo1;
rpo1 = t;
}
while(rpo1 < rpo2){
t = idom[rpo2];
if(t >= rpo2)
fatal("bad idom");
rpo2 = t;
}
}
return rpo1;
}
static int
doms(int32 *idom, int32 r, int32 s)
{
while(s > r)
s = idom[s];
return s == r;
}
static int
loophead(int32 *idom, Flow *r)
{
int32 src;
src = r->rpo;
if(r->p1 != nil && doms(idom, src, r->p1->rpo))
return 1;
for(r = r->p2; r != nil; r = r->p2link)
if(doms(idom, src, r->rpo))
return 1;
return 0;
}
static void
loopmark(Flow **rpo2r, int32 head, Flow *r)
{
if(r->rpo < head || r->active == head)
return;
r->active = head;
r->loop += LOOP;
if(r->p1 != nil)
loopmark(rpo2r, head, r->p1);
for(r = r->p2; r != nil; r = r->p2link)
loopmark(rpo2r, head, r);
}
void
flowrpo(Graph *g)
{
Flow *r1;
int32 i, d, me, nr, *idom;
Flow **rpo2r;
free(g->rpo);
g->rpo = calloc(g->num*sizeof g->rpo[0], 1);
idom = calloc(g->num*sizeof idom[0], 1);
if(g->rpo == nil || idom == nil)
fatal("out of memory");
rpo2r = g->rpo;
d = postorder(g->start, rpo2r, 0);
nr = g->num;
if(d > nr)
fatal("too many reg nodes %d %d", d, nr);
nr = d;
for(i = 0; i < nr / 2; i++) {
r1 = rpo2r[i];
rpo2r[i] = rpo2r[nr - 1 - i];
rpo2r[nr - 1 - i] = r1;
}
for(i = 0; i < nr; i++)
rpo2r[i]->rpo = i;
idom[0] = 0;
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
me = r1->rpo;
d = -1;
// rpo2r[r->rpo] == r protects against considering dead code,
// which has r->rpo == 0.
if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
d = r1->p1->rpo;
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
d = rpolca(idom, d, r1->rpo);
idom[i] = d;
}
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
r1->loop++;
if(r1->p2 != nil && loophead(idom, r1))
loopmark(rpo2r, i, r1);
}
free(idom);
}
Flow*
uniqp(Flow *r)
{
Flow *r1;
r1 = r->p1;
if(r1 == nil) {
r1 = r->p2;
if(r1 == nil || r1->p2link != nil)
return nil;
} else
if(r->p2 != nil)
return nil;
return r1;
}
Flow*
uniqs(Flow *r)
{
Flow *r1;
r1 = r->s1;
if(r1 == nil) {
r1 = r->s2;
if(r1 == nil)
return nil;
} else
if(r->s2 != nil)
return nil;
return r1;
}
...@@ -2,5 +2,39 @@ ...@@ -2,5 +2,39 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
typedef struct Flow Flow;
typedef struct Graph Graph;
struct Flow {
Prog* prog; // actual instruction
Flow* p1; // predecessors of this instruction: p1,
Flow* p2; // and then p2 linked though p2link.
Flow* p2link;
Flow* s1; // successors of this instruction (at most two: s1 and s2).
Flow* s2;
Flow* link; // next instruction in function code
int32 active; // usable by client
int32 rpo; // reverse post ordering
uint16 loop; // x5 for every loop
uchar refset; // diagnostic generated
};
struct Graph
{
Flow* start;
int num;
// After calling flowrpo, rpo lists the flow nodes in reverse postorder,
// and each non-dead Flow node f has g->rpo[f->rpo] == f.
Flow** rpo;
};
void fixjmp(Prog*); void fixjmp(Prog*);
Graph* flowstart(Prog*, int);
void flowrpo(Graph*);
void flowend(Graph*);
int noreturn(Prog*); int noreturn(Prog*);
Flow* uniqp(Flow*);
Flow* uniqs(Flow*);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment