Commit dbf96add authored by Russ Cox's avatar Russ Cox

cmd/gc: move flow graph into portable opt

Now there's only one copy of the flow graph construction
and dominator computation, and different optimizations
can attach different annotations to the instructions.

R=ken2
CC=golang-dev
https://golang.org/cl/12797045
parent 954d1474
......@@ -55,6 +55,7 @@ typedef struct Rgn Rgn;
// r->prog->opt points back to r.
struct Reg
{
Flow f;
Bits set; // variables written by this instruction.
Bits use1; // variables read by prog->from.
......@@ -68,19 +69,6 @@ struct Reg
Bits act;
int32 regu; // register used bitmap
int32 rpo; // reverse post ordering
int32 active;
uint16 loop; // x5 for every loop
uchar refset; // diagnostic generated
Reg* p1; // predecessors of this instruction: p1,
Reg* p2; // and then p2 linked though p2link.
Reg* p2link;
Reg* s1; // successors of this instruction (at most two: s1 and s2).
Reg* s2;
Reg* link; // next instruction in function code
Prog* prog; // actual instruction
};
#define R ((Reg*)0)
......@@ -96,7 +84,6 @@ struct Rgn
EXTERN int32 exregoffset; // not set
EXTERN int32 exfregoffset; // not set
EXTERN Reg* firstr;
EXTERN Reg* lastr;
EXTERN Reg zreg;
EXTERN Reg* freer;
EXTERN Reg** rpo2r;
......@@ -134,34 +121,21 @@ void regopt(Prog*);
void addmove(Reg*, int, int, int);
Bits mkvar(Reg *r, Adr *a);
void prop(Reg*, Bits, Bits);
void loopit(Reg*, int32);
void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int);
uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int);
void addreg(Adr*, int);
void dumpit(char *str, Reg *r0);
void dumpit(char *str, Flow *r0, int);
/*
* peep.c
*/
void peep(void);
void excise(Reg*);
Reg* uniqp(Reg*);
Reg* uniqs(Reg*);
int regtyp(Adr*);
int anyvar(Adr*);
int subprop(Reg*);
int copyprop(Reg*);
int copy1(Adr*, Adr*, Reg*, int);
void peep(Prog*);
void excise(Flow*);
int copyu(Prog*, Adr*, Adr*);
int copyas(Adr*, Adr*);
int copyau(Adr*, Adr*);
int copysub(Adr*, Adr*, Adr*, int);
int copysub1(Prog*, Adr*, Adr*, int);
int32 RtoB(int);
int32 FtoB(int);
int BtoR(int32);
......
This diff is collapsed.
This diff is collapsed.
......@@ -55,6 +55,7 @@ typedef struct Rgn Rgn;
// r->prog->opt points back to r.
struct Reg
{
Flow f;
Bits set; // variables written by this instruction.
Bits use1; // variables read by prog->from.
......@@ -68,19 +69,6 @@ struct Reg
Bits act;
int32 regu; // register used bitmap
int32 rpo; // reverse post ordering
int32 active;
uint16 loop; // x5 for every loop
uchar refset; // diagnostic generated
Reg* p1; // predecessors of this instruction: p1,
Reg* p2; // and then p2 linked though p2link.
Reg* p2link;
Reg* s1; // successors of this instruction (at most two: s1 and s2).
Reg* s2;
Reg* link; // next instruction in function code
Prog* prog; // actual instruction
};
#define R ((Reg*)0)
......@@ -96,10 +84,7 @@ struct Rgn
EXTERN int32 exregoffset; // not set
EXTERN int32 exfregoffset; // not set
EXTERN Reg* firstr;
EXTERN Reg* lastr;
EXTERN Reg zreg;
EXTERN Reg* freer;
EXTERN Reg** rpo2r;
EXTERN Rgn region[NRGN];
EXTERN Rgn* rgp;
EXTERN int nregion;
......@@ -113,7 +98,6 @@ EXTERN Bits addrs;
EXTERN Bits ovar;
EXTERN int change;
EXTERN int32 maxnr;
EXTERN int32* idom;
EXTERN struct
{
......@@ -128,41 +112,27 @@ EXTERN struct
/*
* reg.c
*/
Reg* rega(void);
int rcmp(const void*, const void*);
void regopt(Prog*);
void addmove(Reg*, int, int, int);
Bits mkvar(Reg*, Adr*);
void prop(Reg*, Bits, Bits);
void loopit(Reg*, int32);
void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int);
uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int);
void addreg(Adr*, int);
void dumpone(Reg*);
void dumpit(char*, Reg*);
void dumpone(Flow*, int);
void dumpit(char*, Flow*, int);
/*
* peep.c
*/
void peep(void);
void excise(Reg*);
Reg* uniqp(Reg*);
Reg* uniqs(Reg*);
int regtyp(Adr*);
int anyvar(Adr*);
int subprop(Reg*);
int copyprop(Reg*);
int copy1(Adr*, Adr*, Reg*, int);
void peep(Prog*);
void excise(Flow*);
int copyu(Prog*, Adr*, Adr*);
int copyas(Adr*, Adr*);
int copyau(Adr*, Adr*);
int copysub(Adr*, Adr*, Adr*, int);
int copysub1(Prog*, Adr*, Adr*, int);
int32 RtoB(int);
int32 FtoB(int);
int BtoR(int32);
......
......@@ -33,11 +33,18 @@
#include "gg.h"
#include "opt.h"
static void conprop(Reg *r);
static void elimshortmov(Reg *r);
static int prevl(Reg *r, int reg);
static void pushback(Reg *r);
static void conprop(Flow *r);
static void elimshortmov(Graph *g);
static int prevl(Flow *r, int reg);
static void pushback(Flow *r);
static int regconsttyp(Adr*);
static int regtyp(Adr*);
static int subprop(Flow*);
static int copyprop(Graph*, Flow*);
static int copy1(Adr*, Adr*, Flow*, int);
static int copyas(Adr*, Adr*);
static int copyau(Adr*, Adr*);
static int copysub(Adr*, Adr*, Adr*, int);
// do we need the carry bit
static int
......@@ -56,19 +63,19 @@ needc(Prog *p)
return 0;
}
static Reg*
rnops(Reg *r)
static Flow*
rnops(Flow *r)
{
Prog *p;
Reg *r1;
Flow *r1;
if(r != R)
if(r != nil)
for(;;) {
p = r->prog;
if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
break;
r1 = uniqs(r);
if(r1 == R)
if(r1 == nil)
break;
r = r1;
}
......@@ -76,52 +83,25 @@ rnops(Reg *r)
}
void
peep(void)
peep(Prog *firstp)
{
Reg *r, *r1, *r2;
Flow *r, *r1;
Graph *g;
Prog *p, *p1;
int t;
ProgInfo info;
/*
* complete R structure
*/
t = 0;
for(r=firstr; r!=R; r=r1) {
r1 = r->link;
if(r1 == R)
break;
p = r->prog->link;
for(p = r->prog->link; p != r1->prog; p = p->link) {
proginfo(&info, p);
if(info.flags & Skip)
continue;
r2 = rega();
r->link = r2;
r2->link = r1;
r2->prog = p;
p->opt = r2;
r2->p1 = r;
r->s1 = r2;
r2->s1 = r1;
r1->p1 = r2;
r = r2;
t++;
}
}
g = flowstart(firstp, sizeof(Flow));
if(g == nil)
return;
// byte, word arithmetic elimination.
elimshortmov(r);
elimshortmov(g);
// constant propagation
// find MOV $con,R followed by
// another MOV $con,R without
// setting R in the interim
for(r=firstr; r!=R; r=r->link) {
// find MOV $con,nil followed by
// another MOV $con,nil without
// setting nil in the interim
for(r=g->start; r!=nil; r=r->link) {
p = r->prog;
switch(p->as) {
case ALEAL:
......@@ -147,10 +127,10 @@ peep(void)
loop1:
if(debug['P'] && debug['v'])
dumpit("loop1", firstr);
dumpit("loop1", g->start, 0);
t = 0;
for(r=firstr; r!=R; r=r->link) {
for(r=g->start; r!=nil; r=r->link) {
p = r->prog;
switch(p->as) {
case AMOVL:
......@@ -159,11 +139,11 @@ loop1:
case AMOVSD:
if(regtyp(&p->to))
if(regtyp(&p->from)) {
if(copyprop(r)) {
if(copyprop(g, r)) {
excise(r);
t++;
} else
if(subprop(r) && copyprop(r)) {
if(subprop(r) && copyprop(g, r)) {
excise(r);
t++;
}
......@@ -176,7 +156,7 @@ loop1:
case AMOVWLSX:
if(regtyp(&p->to)) {
r1 = rnops(uniqs(r));
if(r1 != R) {
if(r1 != nil) {
p1 = r1->prog;
if(p->as == p1->as && p->to.type == p1->from.type){
p1->as = AMOVL;
......@@ -195,7 +175,7 @@ loop1:
case AMOVQL:
if(regtyp(&p->to)) {
r1 = rnops(uniqs(r));
if(r1 != R) {
if(r1 != nil) {
p1 = r1->prog;
if(p->as == p1->as && p->to.type == p1->from.type){
p1->as = AMOVQ;
......@@ -278,7 +258,7 @@ loop1:
// can be replaced by MOVAPD, which moves the pair of float64s
// instead of just the lower one. We only use the lower one, but
// the processor can do better if we do moves using both.
for(r=firstr; r!=R; r=r->link) {
for(r=g->start; r!=nil; r=r->link) {
p = r->prog;
if(p->as == AMOVLQZX)
if(regtyp(&p->from))
......@@ -295,7 +275,7 @@ loop1:
// load pipelining
// push any load from memory as early as possible
// to give it time to complete before use.
for(r=firstr; r!=R; r=r->link) {
for(r=g->start; r!=nil; r=r->link) {
p = r->prog;
switch(p->as) {
case AMOVB:
......@@ -307,17 +287,19 @@ loop1:
pushback(r);
}
}
flowend(g);
}
static void
pushback(Reg *r0)
pushback(Flow *r0)
{
Reg *r, *b;
Flow *r, *b;
Prog *p0, *p, t;
b = R;
b = nil;
p0 = r0->prog;
for(r=uniqp(r0); r!=R && uniqs(r)!=R; r=uniqp(r)) {
for(r=uniqp(r0); r!=nil && uniqs(r)!=nil; r=uniqp(r)) {
p = r->prog;
if(p->as != ANOP) {
if(!regconsttyp(&p->from) || !regtyp(&p->to))
......@@ -330,11 +312,11 @@ pushback(Reg *r0)
b = r;
}
if(b == R) {
if(b == nil) {
if(debug['v']) {
print("no pushback: %P\n", r0->prog);
if(r)
print("\t%P [%d]\n", r->prog, uniqs(r)!=R);
print("\t%P [%d]\n", r->prog, uniqs(r)!=nil);
}
return;
}
......@@ -377,7 +359,7 @@ pushback(Reg *r0)
}
void
excise(Reg *r)
excise(Flow *r)
{
Prog *p;
......@@ -392,39 +374,7 @@ excise(Reg *r)
ostats.ndelmov++;
}
Reg*
uniqp(Reg *r)
{
Reg *r1;
r1 = r->p1;
if(r1 == R) {
r1 = r->p2;
if(r1 == R || r1->p2link != R)
return R;
} else
if(r->p2 != R)
return R;
return r1;
}
Reg*
uniqs(Reg *r)
{
Reg *r1;
r1 = r->s1;
if(r1 == R) {
r1 = r->s2;
if(r1 == R)
return R;
} else
if(r->s2 != R)
return R;
return r1;
}
int
static int
regtyp(Adr *a)
{
int t;
......@@ -448,12 +398,12 @@ regtyp(Adr *a)
// TODO: Using the Q forms here instead of the L forms
// seems unnecessary, and it makes the instructions longer.
static void
elimshortmov(Reg *r)
elimshortmov(Graph *g)
{
Prog *p;
Flow *r;
USED(r);
for(r=firstr; r!=R; r=r->link) {
for(r=g->start; r!=nil; r=r->link) {
p = r->prog;
if(regtyp(&p->to)) {
switch(p->as) {
......@@ -554,13 +504,13 @@ regconsttyp(Adr *a)
// is reg guaranteed to be truncated by a previous L instruction?
static int
prevl(Reg *r0, int reg)
prevl(Flow *r0, int reg)
{
Prog *p;
Reg *r;
Flow *r;
ProgInfo info;
for(r=uniqp(r0); r!=R; r=uniqp(r)) {
for(r=uniqp(r0); r!=nil; r=uniqp(r)) {
p = r->prog;
if(p->to.type == reg) {
proginfo(&info, p);
......@@ -588,13 +538,13 @@ prevl(Reg *r0, int reg)
* hopefully, then the former or latter MOV
* will be eliminated by copy propagation.
*/
int
subprop(Reg *r0)
static int
subprop(Flow *r0)
{
Prog *p;
ProgInfo info;
Adr *v1, *v2;
Reg *r;
Flow *r;
int t;
if(debug['P'] && debug['v'])
......@@ -612,10 +562,10 @@ subprop(Reg *r0)
print("\tnot regtype %D; return 0\n", v2);
return 0;
}
for(r=uniqp(r0); r!=R; r=uniqp(r)) {
for(r=uniqp(r0); r!=nil; r=uniqp(r)) {
if(debug['P'] && debug['v'])
print("\t? %P\n", r->prog);
if(uniqs(r) == R) {
if(uniqs(r) == nil) {
if(debug['P'] && debug['v'])
print("\tno unique successor\n");
break;
......@@ -689,12 +639,12 @@ gotit:
* set v1 F=1
* set v2 return success
*/
int
copyprop(Reg *r0)
static int
copyprop(Graph *g, Flow *r0)
{
Prog *p;
Adr *v1, *v2;
Reg *r;
Flow *r;
if(debug['P'] && debug['v'])
print("copyprop %P\n", r0->prog);
......@@ -703,13 +653,13 @@ copyprop(Reg *r0)
v2 = &p->to;
if(copyas(v1, v2))
return 1;
for(r=firstr; r!=R; r=r->link)
for(r=g->start; r!=nil; r=r->link)
r->active = 0;
return copy1(v1, v2, r0->s1, 0);
}
int
copy1(Adr *v1, Adr *v2, Reg *r, int f)
static int
copy1(Adr *v1, Adr *v2, Flow *r, int f)
{
int t;
Prog *p;
......@@ -722,11 +672,11 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f)
r->active = 1;
if(debug['P'])
print("copy %D->%D f=%d\n", v1, v2, f);
for(; r != R; r = r->s1) {
for(; r != nil; r = r->s1) {
p = r->prog;
if(debug['P'])
print("%P", p);
if(!f && uniqp(r) == R) {
if(!f && uniqp(r) == nil) {
f = 1;
if(debug['P'])
print("; merge; f=%d", f);
......@@ -880,7 +830,7 @@ copyu(Prog *p, Adr *v, Adr *s)
* could be set/use depending on
* semantics
*/
int
static int
copyas(Adr *a, Adr *v)
{
if(a->type != v->type)
......@@ -896,7 +846,7 @@ copyas(Adr *a, Adr *v)
/*
* either direct or indirect
*/
int
static int
copyau(Adr *a, Adr *v)
{
......@@ -924,7 +874,7 @@ copyau(Adr *a, Adr *v)
* substitute s for v in a
* return failure to substitute
*/
int
static int
copysub(Adr *a, Adr *v, Adr *s, int f)
{
int t;
......@@ -957,9 +907,9 @@ copysub(Adr *a, Adr *v, Adr *s, int f)
}
static void
conprop(Reg *r0)
conprop(Flow *r0)
{
Reg *r;
Flow *r;
Prog *p, *p0;
int t;
Adr *v0;
......@@ -970,9 +920,9 @@ conprop(Reg *r0)
loop:
r = uniqs(r);
if(r == R || r == r0)
if(r == nil || r == r0)
return;
if(uniqp(r) == R)
if(uniqp(r) == nil)
return;
p = r->prog;
......
This diff is collapsed.
......@@ -55,6 +55,7 @@ typedef struct Rgn Rgn;
// r->prog->opt points back to r.
struct Reg
{
Flow f;
Bits set; // variables written by this instruction.
Bits use1; // variables read by prog->from.
......@@ -96,7 +97,6 @@ struct Rgn
EXTERN int32 exregoffset; // not set
EXTERN int32 exfregoffset; // not set
EXTERN Reg* firstr;
EXTERN Reg* lastr;
EXTERN Reg zreg;
EXTERN Reg* freer;
EXTERN Reg** rpo2r;
......@@ -141,28 +141,16 @@ void paint1(Reg*, int);
uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int);
void addreg(Adr*, int);
void dumpone(Reg*);
void dumpit(char*, Reg*);
void dumpone(Flow*, int);
void dumpit(char*, Flow*, int);
/*
* peep.c
*/
void peep(void);
void excise(Reg*);
Reg* uniqp(Reg*);
Reg* uniqs(Reg*);
int regtyp(Adr*);
int anyvar(Adr*);
int subprop(Reg*);
int copyprop(Reg*);
int copy1(Adr*, Adr*, Reg*, int);
void peep(Prog*);
void excise(Flow*);
int copyu(Prog*, Adr*, Adr*);
int copyas(Adr*, Adr*);
int copyau(Adr*, Adr*);
int copysub(Adr*, Adr*, Adr*, int);
int copysub1(Prog*, Adr*, Adr*, int);
int32 RtoB(int);
int32 FtoB(int);
int BtoR(int32);
......
......@@ -35,8 +35,15 @@
#define REGEXT 0
static void conprop(Reg *r);
static void elimshortmov(Reg *r);
static void conprop(Flow *r);
static void elimshortmov(Graph*);
static int regtyp(Adr*);
static int subprop(Flow*);
static int copyprop(Graph*, Flow*);
static int copy1(Adr*, Adr*, Flow*, int);
static int copyas(Adr*, Adr*);
static int copyau(Adr*, Adr*);
static int copysub(Adr*, Adr*, Adr*, int);
// do we need the carry bit
static int
......@@ -55,19 +62,19 @@ needc(Prog *p)
return 0;
}
static Reg*
rnops(Reg *r)
static Flow*
rnops(Flow *r)
{
Prog *p;
Reg *r1;
Flow *r1;
if(r != R)
if(r != nil)
for(;;) {
p = r->prog;
if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
break;
r1 = uniqs(r);
if(r1 == R)
if(r1 == nil)
break;
r = r1;
}
......@@ -75,49 +82,25 @@ rnops(Reg *r)
}
void
peep(void)
peep(Prog *firstp)
{
Reg *r, *r1, *r2;
Flow *r, *r1;
Graph *g;
Prog *p, *p1;
int t;
ProgInfo info;
/*
* complete R structure
*/
for(r=firstr; r!=R; r=r1) {
r1 = r->link;
if(r1 == R)
break;
for(p = r->prog->link; p != r1->prog; p = p->link) {
proginfo(&info, p);
if(info.flags & Skip)
continue;
r2 = rega();
r->link = r2;
r2->link = r1;
r2->prog = p;
p->opt = r2;
r2->p1 = r;
r->s1 = r2;
r2->s1 = r1;
r1->p1 = r2;
r = r2;
}
}
g = flowstart(firstp, sizeof(Flow));
if(g == nil)
return;
// byte, word arithmetic elimination.
elimshortmov(r);
elimshortmov(g);
// constant propagation
// find MOV $con,R followed by
// another MOV $con,R without
// setting R in the interim
for(r=firstr; r!=R; r=r->link) {
// find MOV $con,nil followed by
// another MOV $con,nil without
// setting nil in the interim
for(r=g->start; r!=nil; r=r->link) {
p = r->prog;
switch(p->as) {
case ALEAL:
......@@ -141,10 +124,10 @@ peep(void)
loop1:
if(debug['P'] && debug['v'])
dumpit("loop1", firstr);
dumpit("loop1", g->start, 0);
t = 0;
for(r=firstr; r!=R; r=r->link) {
for(r=g->start; r!=nil; r=r->link) {
p = r->prog;
switch(p->as) {
case AMOVL:
......@@ -152,11 +135,11 @@ loop1:
case AMOVSD:
if(regtyp(&p->to))
if(regtyp(&p->from)) {
if(copyprop(r)) {
if(copyprop(g, r)) {
excise(r);
t++;
} else
if(subprop(r) && copyprop(r)) {
if(subprop(r) && copyprop(g, r)) {
excise(r);
t++;
}
......@@ -169,7 +152,7 @@ loop1:
case AMOVWLSX:
if(regtyp(&p->to)) {
r1 = rnops(uniqs(r));
if(r1 != R) {
if(r1 != nil) {
p1 = r1->prog;
if(p->as == p1->as && p->to.type == p1->from.type){
p1->as = AMOVL;
......@@ -232,7 +215,7 @@ loop1:
// can be replaced by MOVAPD, which moves the pair of float64s
// instead of just the lower one. We only use the lower one, but
// the processor can do better if we do moves using both.
for(r=firstr; r!=R; r=r->link) {
for(r=g->start; r!=nil; r=r->link) {
p = r->prog;
if(p->as == AMOVSD)
if(regtyp(&p->from))
......@@ -242,7 +225,7 @@ loop1:
}
void
excise(Reg *r)
excise(Flow *r)
{
Prog *p;
......@@ -257,39 +240,7 @@ excise(Reg *r)
ostats.ndelmov++;
}
Reg*
uniqp(Reg *r)
{
Reg *r1;
r1 = r->p1;
if(r1 == R) {
r1 = r->p2;
if(r1 == R || r1->p2link != R)
return R;
} else
if(r->p2 != R)
return R;
return r1;
}
Reg*
uniqs(Reg *r)
{
Reg *r1;
r1 = r->s1;
if(r1 == R) {
r1 = r->s2;
if(r1 == R)
return R;
} else
if(r->s2 != R)
return R;
return r1;
}
int
static int
regtyp(Adr *a)
{
int t;
......@@ -310,11 +261,12 @@ regtyp(Adr *a)
// can smash the entire 64-bit register without
// causing any trouble.
static void
elimshortmov(Reg *r)
elimshortmov(Graph *g)
{
Prog *p;
Flow *r;
for(r=firstr; r!=R; r=r->link) {
for(r=g->start; r!=nil; r=r->link) {
p = r->prog;
if(regtyp(&p->to)) {
switch(p->as) {
......@@ -409,12 +361,12 @@ elimshortmov(Reg *r)
* hopefully, then the former or latter MOV
* will be eliminated by copy propagation.
*/
int
subprop(Reg *r0)
static int
subprop(Flow *r0)
{
Prog *p;
Adr *v1, *v2;
Reg *r;
Flow *r;
int t;
ProgInfo info;
......@@ -425,10 +377,10 @@ subprop(Reg *r0)
v2 = &p->to;
if(!regtyp(v2))
return 0;
for(r=uniqp(r0); r!=R; r=uniqp(r)) {
for(r=uniqp(r0); r!=nil; r=uniqp(r)) {
if(debug['P'] && debug['v'])
print("\t? %P\n", r->prog);
if(uniqs(r) == R)
if(uniqs(r) == nil)
break;
p = r->prog;
proginfo(&info, p);
......@@ -483,25 +435,25 @@ gotit:
* set v1 F=1
* set v2 return success
*/
int
copyprop(Reg *r0)
static int
copyprop(Graph *g, Flow *r0)
{
Prog *p;
Adr *v1, *v2;
Reg *r;
Flow *r;
p = r0->prog;
v1 = &p->from;
v2 = &p->to;
if(copyas(v1, v2))
return 1;
for(r=firstr; r!=R; r=r->link)
for(r=g->start; r!=nil; r=r->link)
r->active = 0;
return copy1(v1, v2, r0->s1, 0);
}
int
copy1(Adr *v1, Adr *v2, Reg *r, int f)
static int
copy1(Adr *v1, Adr *v2, Flow *r, int f)
{
int t;
Prog *p;
......@@ -514,11 +466,11 @@ copy1(Adr *v1, Adr *v2, Reg *r, int f)
r->active = 1;
if(debug['P'])
print("copy %D->%D f=%d\n", v1, v2, f);
for(; r != R; r = r->s1) {
for(; r != nil; r = r->s1) {
p = r->prog;
if(debug['P'])
print("%P", p);
if(!f && uniqp(r) == R) {
if(!f && uniqp(r) == nil) {
f = 1;
if(debug['P'])
print("; merge; f=%d", f);
......@@ -672,7 +624,7 @@ copyu(Prog *p, Adr *v, Adr *s)
* could be set/use depending on
* semantics
*/
int
static int
copyas(Adr *a, Adr *v)
{
if(a->type != v->type)
......@@ -688,7 +640,7 @@ copyas(Adr *a, Adr *v)
/*
* either direct or indirect
*/
int
static int
copyau(Adr *a, Adr *v)
{
......@@ -707,7 +659,7 @@ copyau(Adr *a, Adr *v)
* substitute s for v in a
* return failure to substitute
*/
int
static int
copysub(Adr *a, Adr *v, Adr *s, int f)
{
int t;
......@@ -740,9 +692,9 @@ copysub(Adr *a, Adr *v, Adr *s, int f)
}
static void
conprop(Reg *r0)
conprop(Flow *r0)
{
Reg *r;
Flow *r;
Prog *p, *p0;
int t;
Adr *v0;
......@@ -753,9 +705,9 @@ conprop(Reg *r0)
loop:
r = uniqs(r);
if(r == R || r == r0)
if(r == nil || r == r0)
return;
if(uniqp(r) == R)
if(uniqp(r) == nil)
return;
p = r->prog;
......
This diff is collapsed.
......@@ -181,3 +181,284 @@ fixjmp(Prog *firstp)
print("\n");
}
}
// Control flow analysis. The Flow structures hold predecessor and successor
// information as well as basic loop analysis.
//
// graph = flowstart(firstp, sizeof(Flow));
// ... use flow graph ...
// flowend(graph); // free graph
//
// Typical uses of the flow graph are to iterate over all the flow-relevant instructions:
//
// for(f = graph->start; f != nil; f = f->link)
//
// or, given an instruction f, to iterate over all the predecessors, which is
// f->p1 and this list:
//
// for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
//
// Often the Flow struct is embedded as the first field inside a larger struct S.
// In that case casts are needed to convert Flow* to S* in many places but the
// idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart.
Graph*
flowstart(Prog *firstp, int size)
{
int nf;
Flow *f, *f1, *start, *last;
Graph *graph;
Prog *p;
ProgInfo info;
// Count and mark instructions to annotate.
nf = 0;
for(p = firstp; p != P; p = p->link) {
p->opt = nil; // should be already, but just in case
proginfo(&info, p);
if(info.flags & Skip)
continue;
p->opt = (void*)1;
nf++;
}
if(nf == 0)
return nil;
if(nf >= 20000) {
// fatal("%S is too big (%d instructions)", curfn->nname->sym, nf);
return nil;
}
// Allocate annotations and assign to instructions.
graph = calloc(sizeof *graph + size*nf, 1);
if(graph == nil)
fatal("out of memory");
start = (Flow*)(graph+1);
last = nil;
f = start;
for(p = firstp; p != P; p = p->link) {
if(p->opt == nil)
continue;
p->opt = f;
f->prog = p;
if(last)
last->link = f;
last = f;
f = (Flow*)((uchar*)f + size);
}
// Fill in pred/succ information.
for(f = start; f != nil; f = f->link) {
p = f->prog;
proginfo(&info, p);
if(!(info.flags & Break)) {
f1 = f->link;
f->s1 = f1;
f1->p1 = f;
}
if(p->to.type == D_BRANCH) {
if(p->to.u.branch == P)
fatal("pnil %P", p);
f1 = p->to.u.branch->opt;
if(f1 == nil)
fatal("fnil %P / %P", p, p->to.u.branch);
if(f1 == f) {
//fatal("self loop %P", p);
continue;
}
f->s2 = f1;
f->p2link = f1->p2;
f1->p2 = f;
}
}
graph->start = start;
graph->num = nf;
return graph;
}
void
flowend(Graph *graph)
{
Flow *f;
for(f = graph->start; f != nil; f = f->link)
f->prog->opt = nil;
free(graph);
}
/*
* find looping structure
*
* 1) find reverse postordering
* 2) find approximate dominators,
* the actual dominators if the flow graph is reducible
* otherwise, dominators plus some other non-dominators.
* See Matthew S. Hecht and Jeffrey D. Ullman,
* "Analysis of a Simple Algorithm for Global Data Flow Problems",
* Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
* Oct. 1-3, 1973, pp. 207-217.
* 3) find all nodes with a predecessor dominated by the current node.
* such a node is a loop head.
* recursively, all preds with a greater rpo number are in the loop
*/
static int32
postorder(Flow *r, Flow **rpo2r, int32 n)
{
Flow *r1;
r->rpo = 1;
r1 = r->s1;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
r1 = r->s2;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
rpo2r[n] = r;
n++;
return n;
}
static int32
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
{
int32 t;
if(rpo1 == -1)
return rpo2;
while(rpo1 != rpo2){
if(rpo1 > rpo2){
t = rpo2;
rpo2 = rpo1;
rpo1 = t;
}
while(rpo1 < rpo2){
t = idom[rpo2];
if(t >= rpo2)
fatal("bad idom");
rpo2 = t;
}
}
return rpo1;
}
static int
doms(int32 *idom, int32 r, int32 s)
{
while(s > r)
s = idom[s];
return s == r;
}
static int
loophead(int32 *idom, Flow *r)
{
int32 src;
src = r->rpo;
if(r->p1 != nil && doms(idom, src, r->p1->rpo))
return 1;
for(r = r->p2; r != nil; r = r->p2link)
if(doms(idom, src, r->rpo))
return 1;
return 0;
}
static void
loopmark(Flow **rpo2r, int32 head, Flow *r)
{
if(r->rpo < head || r->active == head)
return;
r->active = head;
r->loop += LOOP;
if(r->p1 != nil)
loopmark(rpo2r, head, r->p1);
for(r = r->p2; r != nil; r = r->p2link)
loopmark(rpo2r, head, r);
}
void
flowrpo(Graph *g)
{
Flow *r1;
int32 i, d, me, nr, *idom;
Flow **rpo2r;
free(g->rpo);
g->rpo = calloc(g->num*sizeof g->rpo[0], 1);
idom = calloc(g->num*sizeof idom[0], 1);
if(g->rpo == nil || idom == nil)
fatal("out of memory");
rpo2r = g->rpo;
d = postorder(g->start, rpo2r, 0);
nr = g->num;
if(d > nr)
fatal("too many reg nodes %d %d", d, nr);
nr = d;
for(i = 0; i < nr / 2; i++) {
r1 = rpo2r[i];
rpo2r[i] = rpo2r[nr - 1 - i];
rpo2r[nr - 1 - i] = r1;
}
for(i = 0; i < nr; i++)
rpo2r[i]->rpo = i;
idom[0] = 0;
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
me = r1->rpo;
d = -1;
// rpo2r[r->rpo] == r protects against considering dead code,
// which has r->rpo == 0.
if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
d = r1->p1->rpo;
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
d = rpolca(idom, d, r1->rpo);
idom[i] = d;
}
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
r1->loop++;
if(r1->p2 != nil && loophead(idom, r1))
loopmark(rpo2r, i, r1);
}
free(idom);
}
Flow*
uniqp(Flow *r)
{
Flow *r1;
r1 = r->p1;
if(r1 == nil) {
r1 = r->p2;
if(r1 == nil || r1->p2link != nil)
return nil;
} else
if(r->p2 != nil)
return nil;
return r1;
}
Flow*
uniqs(Flow *r)
{
Flow *r1;
r1 = r->s1;
if(r1 == nil) {
r1 = r->s2;
if(r1 == nil)
return nil;
} else
if(r->s2 != nil)
return nil;
return r1;
}
......@@ -2,5 +2,39 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
typedef struct Flow Flow;
typedef struct Graph Graph;
struct Flow {
Prog* prog; // actual instruction
Flow* p1; // predecessors of this instruction: p1,
Flow* p2; // and then p2 linked though p2link.
Flow* p2link;
Flow* s1; // successors of this instruction (at most two: s1 and s2).
Flow* s2;
Flow* link; // next instruction in function code
int32 active; // usable by client
int32 rpo; // reverse post ordering
uint16 loop; // x5 for every loop
uchar refset; // diagnostic generated
};
struct Graph
{
Flow* start;
int num;
// After calling flowrpo, rpo lists the flow nodes in reverse postorder,
// and each non-dead Flow node f has g->rpo[f->rpo] == f.
Flow** rpo;
};
void fixjmp(Prog*);
Graph* flowstart(Prog*, int);
void flowrpo(Graph*);
void flowend(Graph*);
int noreturn(Prog*);
Flow* uniqp(Flow*);
Flow* uniqs(Flow*);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment