Commit b612ab3a authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

cmd/compile: make liveness more efficient

When the number of variables in a function is very large,
liveness analysis gets less efficient, since every bit vector
is O(number of variables).

Improve the situation by returning a sparse representation
from progeffects. In all scenarios, progeffects either
returns a slice that is shared function-wide, 
and which is usually small, or a slice that is guaranteed
to have at most three values.

Reduces compilation time for the code in #8225 Comment 1 by ~10%.
Minor effects on regular packages (below).

Passes toolstash -cmp.

Updates #8225

name       old time/op      new time/op      delta
Template        215ms ± 2%       212ms ± 4%  -1.31%  (p=0.001 n=30+30)
Unicode        98.3ms ± 3%      98.4ms ± 5%    ~     (p=0.971 n=30+30)
GoTypes         657ms ± 3%       651ms ± 2%  -0.98%  (p=0.001 n=30+27)
Compiler        2.78s ± 2%       2.77s ± 2%  -0.60%  (p=0.006 n=30+30)
Flate           130ms ± 4%       130ms ± 4%    ~     (p=0.712 n=29+30)
GoParser        159ms ± 5%       158ms ± 3%    ~     (p=0.331 n=29+30)
Reflect         406ms ± 3%       404ms ± 3%  -0.69%  (p=0.041 n=29+30)
Tar             117ms ± 4%       117ms ± 3%    ~     (p=0.886 n=30+29)
XML             219ms ± 2%       217ms ± 2%    ~     (p=0.091 n=29+24)

name       old user-ns/op   new user-ns/op   delta
Template   272user-ms ± 3%  270user-ms ± 3%  -1.03%  (p=0.004 n=30+30)
Unicode    138user-ms ± 2%  138user-ms ± 3%    ~     (p=0.902 n=29+29)
GoTypes    891user-ms ± 2%  883user-ms ± 2%  -0.95%  (p=0.000 n=29+29)
Compiler   3.85user-s ± 2%  3.84user-s ± 2%    ~     (p=0.236 n=30+30)
Flate      167user-ms ± 2%  166user-ms ± 4%    ~     (p=0.511 n=28+30)
GoParser   211user-ms ± 4%  210user-ms ± 3%    ~     (p=0.287 n=29+30)
Reflect    539user-ms ± 3%  536user-ms ± 2%  -0.59%  (p=0.034 n=29+30)
Tar        154user-ms ± 3%  155user-ms ± 4%    ~     (p=0.786 n=30+30)
XML        289user-ms ± 3%  288user-ms ± 4%    ~     (p=0.249 n=30+26)

name       old alloc/op     new alloc/op     delta
Template       40.7MB ± 0%      40.8MB ± 0%  +0.09%  (p=0.001 n=30+30)
Unicode        30.8MB ± 0%      30.8MB ± 0%    ~     (p=0.112 n=30+30)
GoTypes         123MB ± 0%       124MB ± 0%  +0.09%  (p=0.000 n=30+30)
Compiler        473MB ± 0%       473MB ± 0%  +0.05%  (p=0.000 n=30+30)
Flate          26.5MB ± 0%      26.5MB ± 0%    ~     (p=0.186 n=29+30)
GoParser       32.3MB ± 0%      32.4MB ± 0%  +0.07%  (p=0.021 n=28+30)
Reflect        84.4MB ± 0%      84.6MB ± 0%  +0.21%  (p=0.000 n=30+30)
Tar            27.3MB ± 0%      27.3MB ± 0%  +0.09%  (p=0.010 n=30+28)
XML            44.7MB ± 0%      44.7MB ± 0%  +0.07%  (p=0.002 n=30+30)

name       old allocs/op    new allocs/op    delta
Template         401k ± 1%        400k ± 1%    ~     (p=0.321 n=30+30)
Unicode          331k ± 1%        331k ± 1%    ~     (p=0.357 n=30+28)
GoTypes         1.24M ± 0%       1.24M ± 1%  -0.19%  (p=0.001 n=30+30)
Compiler        4.27M ± 0%       4.27M ± 0%  -0.13%  (p=0.000 n=30+30)
Flate            252k ± 1%        251k ± 1%  -0.30%  (p=0.005 n=30+30)
GoParser         325k ± 1%        325k ± 1%    ~     (p=0.224 n=28+30)
Reflect         1.06M ± 0%       1.05M ± 0%  -0.34%  (p=0.000 n=30+30)
Tar              266k ± 1%        266k ± 1%    ~     (p=0.333 n=30+30)
XML              416k ± 1%        415k ± 1%    ~     (p=0.144 n=30+29)


Change-Id: I6ba67a9203516373062a2618122306da73333d98
Reviewed-on: https://go-review.googlesource.com/36211
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarMatthew Dempsky <mdempsky@google.com>
parent ef259390
......@@ -55,9 +55,7 @@ func (bv1 bvec) Eq(bv2 bvec) bool {
}
func (dst bvec) Copy(src bvec) {
for i, x := range src.b {
dst.b[i] = x
}
copy(dst.b, src.b)
}
func (bv bvec) Get(i int32) bool {
......@@ -76,6 +74,14 @@ func (bv bvec) Set(i int32) {
bv.b[i/WORDBITS] |= mask
}
func (bv bvec) Unset(i int32) {
if i < 0 || i >= bv.n {
Fatalf("bvunset: index %d is out of bounds with length %d\n", i, bv.n)
}
mask := uint32(1 << uint(i%WORDBITS))
bv.b[i/WORDBITS] &^= mask
}
// bvnext returns the smallest index >= i for which bvget(bv, i) == 1.
// If there is no such index, bvnext returns -1.
func (bv bvec) Next(i int32) int32 {
......
......@@ -94,6 +94,19 @@ type Liveness struct {
// in the arguments and locals area, indexed by bb.rpo.
argslivepointers []bvec
livepointers []bvec
cache progeffectscache
}
type progeffectscache struct {
tailuevar []int32
retuevar []int32
textvarkill []int32
textavarinit []int32
uevar [3]int32
varkill [3]int32
avarinit [3]int32
initialized bool
}
// ProgInfo holds information about the instruction for use
......@@ -539,6 +552,44 @@ func isfunny(n *Node) bool {
return n.Sym != nil && (n.Sym.Name == ".fp" || n.Sym.Name == ".args")
}
func (lv *Liveness) initcache() {
if lv.cache.initialized {
Fatalf("liveness cache initialized twice")
return
}
lv.cache.initialized = true
for i, node := range lv.vars {
switch node.Class {
case PPARAM:
// A return instruction with a p.to is a tail return, which brings
// the stack pointer back up (if it ever went down) and then jumps
// to a new function entirely. That form of instruction must read
// all the parameters for correctness, and similarly it must not
// read the out arguments - they won't be set until the new
// function runs.
lv.cache.tailuevar = append(lv.cache.tailuevar, int32(i))
if node.Addrtaken {
lv.cache.textavarinit = append(lv.cache.textavarinit, int32(i))
}
lv.cache.textvarkill = append(lv.cache.textvarkill, int32(i))
case PPARAMOUT:
// If the result had its address taken, it is being tracked
// by the avarinit code, which does not use uevar.
// If we added it to uevar too, we'd not see any kill
// and decide that the variable was live entry, which it is not.
// So only use uevar in the non-addrtaken case.
// The p.to.type == obj.TYPE_NONE limits the bvset to
// non-tail-call return instructions; see note below for details.
if !node.Addrtaken {
lv.cache.retuevar = append(lv.cache.retuevar, int32(i))
}
}
}
}
// Computes the effects of an instruction on a set of
// variables. The vars argument is a slice of *Nodes.
//
......@@ -553,10 +604,11 @@ func isfunny(n *Node) bool {
// The avarinit output serves as a signal that the data has been
// initialized, because any use of a variable must come after its
// initialization.
func progeffects(prog *obj.Prog, vars []*Node, uevar bvec, varkill bvec, avarinit bvec) {
uevar.Clear()
varkill.Clear()
avarinit.Clear()
func (lv *Liveness) progeffects(prog *obj.Prog) (uevar, varkill, avarinit []int32) {
if !lv.cache.initialized {
Fatalf("liveness progeffects cache not initialized")
return
}
// A return instruction with a p.to is a tail return, which brings
// the stack pointer back up (if it ever went down) and then jumps
......@@ -567,67 +619,41 @@ func progeffects(prog *obj.Prog, vars []*Node, uevar bvec, varkill bvec, avarini
if (prog.As == obj.AJMP || prog.As == obj.ARET) && prog.To.Type == obj.TYPE_MEM && prog.To.Name == obj.NAME_EXTERN {
// This is a tail call. Ensure the arguments are still alive.
// See issue 16016.
for i, node := range vars {
if node.Class == PPARAM {
uevar.Set(int32(i))
}
}
return lv.cache.tailuevar, nil, nil
}
if prog.As == obj.ARET {
// Return instructions read all of the out arguments.
for i, node := range vars {
switch node.Class {
// If the result had its address taken, it is being tracked
// by the avarinit code, which does not use uevar.
// If we added it to uevar too, we'd not see any kill
// and decide that the variable was live entry, which it is not.
// So only use uevar in the non-addrtaken case.
// The p.to.type == obj.TYPE_NONE limits the bvset to
// non-tail-call return instructions; see note below for details.
case PPARAMOUT:
if !node.Addrtaken && prog.To.Type == obj.TYPE_NONE {
uevar.Set(int32(i))
}
}
if prog.To.Type == obj.TYPE_NONE {
return lv.cache.retuevar, nil, nil
}
return
return nil, nil, nil
}
if prog.As == obj.ATEXT {
// A text instruction marks the entry point to a function and
// the definition point of all in arguments.
for i, node := range vars {
switch node.Class {
case PPARAM:
if node.Addrtaken {
avarinit.Set(int32(i))
}
varkill.Set(int32(i))
}
}
return
return nil, lv.cache.textvarkill, lv.cache.textavarinit
}
uevar = lv.cache.uevar[:0]
varkill = lv.cache.varkill[:0]
avarinit = lv.cache.avarinit[:0]
info := Thearch.Proginfo(prog)
if info.Flags&(LeftRead|LeftWrite|LeftAddr) != 0 {
from := &prog.From
if from.Node != nil && from.Sym != nil {
n := from.Node.(*Node)
if pos := liveIndex(n, vars); pos >= 0 {
if pos := liveIndex(n, lv.vars); pos >= 0 {
if n.Addrtaken {
avarinit.Set(pos)
avarinit = append(avarinit, pos)
} else {
if info.Flags&(LeftRead|LeftAddr) != 0 {
uevar.Set(pos)
uevar = append(uevar, pos)
}
if info.Flags&LeftWrite != 0 {
if !isfat(n.Type) {
varkill.Set(pos)
}
if info.Flags&LeftWrite != 0 && !isfat(n.Type) {
varkill = append(varkill, pos)
}
}
}
......@@ -638,11 +664,11 @@ func progeffects(prog *obj.Prog, vars []*Node, uevar bvec, varkill bvec, avarini
from := prog.From3
if from.Node != nil && from.Sym != nil {
n := from.Node.(*Node)
if pos := liveIndex(n, vars); pos >= 0 {
if pos := liveIndex(n, lv.vars); pos >= 0 {
if n.Addrtaken {
avarinit.Set(pos)
avarinit = append(avarinit, pos)
} else {
uevar.Set(pos)
uevar = append(uevar, pos)
}
}
}
......@@ -652,13 +678,13 @@ func progeffects(prog *obj.Prog, vars []*Node, uevar bvec, varkill bvec, avarini
to := &prog.To
if to.Node != nil && to.Sym != nil {
n := to.Node.(*Node)
if pos := liveIndex(n, vars); pos >= 0 {
if pos := liveIndex(n, lv.vars); pos >= 0 {
if n.Addrtaken {
if prog.As != obj.AVARKILL {
avarinit.Set(pos)
avarinit = append(avarinit, pos)
}
if prog.As == obj.AVARDEF || prog.As == obj.AVARKILL {
varkill.Set(pos)
varkill = append(varkill, pos)
}
} else {
// RightRead is a read, obviously.
......@@ -670,17 +696,19 @@ func progeffects(prog *obj.Prog, vars []*Node, uevar bvec, varkill bvec, avarini
// having the RightAddr bit set keeps the registerizer from
// trying to substitute a register for the memory location.
if (info.Flags&RightRead != 0) || info.Flags&(RightAddr|RightWrite) == RightAddr {
uevar.Set(pos)
uevar = append(uevar, pos)
}
if info.Flags&RightWrite != 0 {
if !isfat(n.Type) || prog.As == obj.AVARDEF {
varkill.Set(pos)
varkill = append(varkill, pos)
}
}
}
}
}
}
return uevar, varkill, avarinit
}
// liveIndex returns the index of n in the set of tracked vars.
......@@ -727,11 +755,11 @@ func newliveness(fn *Node, ptxt *obj.Prog, cfg []*BasicBlock, vars []*Node) *Liv
return &result
}
func printeffects(p *obj.Prog, uevar bvec, varkill bvec, avarinit bvec) {
func (lv *Liveness) printeffects(p *obj.Prog, uevar, varkill, avarinit []int32) {
fmt.Printf("effects of %v\n", p)
fmt.Println("uevar:", uevar)
fmt.Println("varkill:", varkill)
fmt.Println("avarinit:", avarinit)
fmt.Println("uevar:", lv.slice2bvec(uevar))
fmt.Println("varkill:", lv.slice2bvec(varkill))
fmt.Println("avarinit:", lv.slice2bvec(avarinit))
}
// Pretty print a variable node. Uses Pascal like conventions for pointers and
......@@ -760,6 +788,14 @@ func printvars(name string, bv bvec, vars []*Node) {
fmt.Printf("\n")
}
func (lv *Liveness) slice2bvec(vars []int32) bvec {
bv := bvalloc(int32(len(lv.vars)))
for _, id := range vars {
bv.Set(id)
}
return bv
}
// Prints a basic block annotated with the information computed by liveness
// analysis.
func livenessprintblock(lv *Liveness, bb *BasicBlock) {
......@@ -1032,34 +1068,38 @@ func issafepoint(prog *obj.Prog) bool {
// instructions in each basic block to summarizes the information at each basic
// block
func livenessprologue(lv *Liveness) {
nvars := int32(len(lv.vars))
uevar := bvalloc(nvars)
varkill := bvalloc(nvars)
avarinit := bvalloc(nvars)
lv.initcache()
for _, bb := range lv.cfg {
// Walk the block instructions backward and update the block
// effects with the each prog effects.
for p := bb.last; p != nil; p = p.Opt.(*obj.Prog) {
progeffects(p, lv.vars, uevar, varkill, avarinit)
uevar, varkill, _ := lv.progeffects(p)
if debuglive >= 3 {
printeffects(p, uevar, varkill, avarinit)
lv.printeffects(p, uevar, varkill, nil)
}
for _, pos := range varkill {
bb.varkill.Set(pos)
bb.uevar.Unset(pos)
}
for _, pos := range uevar {
bb.uevar.Set(pos)
}
bb.varkill.Or(bb.varkill, varkill)
bb.uevar.AndNot(bb.uevar, varkill)
bb.uevar.Or(bb.uevar, uevar)
}
// Walk the block instructions forward to update avarinit bits.
// avarinit describes the effect at the end of the block, not the beginning.
varkill.Clear()
for p := bb.first; ; p = p.Link {
progeffects(p, lv.vars, uevar, varkill, avarinit)
_, varkill, avarinit := lv.progeffects(p)
if debuglive >= 3 {
printeffects(p, uevar, varkill, avarinit)
lv.printeffects(p, nil, varkill, avarinit)
}
for _, pos := range varkill {
bb.avarinit.Unset(pos)
}
for _, pos := range avarinit {
bb.avarinit.Set(pos)
}
bb.avarinit.AndNot(bb.avarinit, varkill)
bb.avarinit.Or(bb.avarinit, avarinit)
if p == bb.last {
break
}
......@@ -1187,9 +1227,6 @@ func livenessepilogue(lv *Liveness) {
nvars := int32(len(lv.vars))
livein := bvalloc(nvars)
liveout := bvalloc(nvars)
uevar := bvalloc(nvars)
varkill := bvalloc(nvars)
avarinit := bvalloc(nvars)
any := bvalloc(nvars)
all := bvalloc(nvars)
outLive := bvalloc(argswords()) // always-live output params
......@@ -1244,11 +1281,15 @@ func livenessepilogue(lv *Liveness) {
// allocate liveness maps for those instructions that need them.
// Seed the maps with information about the addrtaken variables.
for p := bb.first; ; p = p.Link {
progeffects(p, lv.vars, uevar, varkill, avarinit)
any.AndNot(any, varkill)
all.AndNot(all, varkill)
any.Or(any, avarinit)
all.Or(all, avarinit)
_, varkill, avarinit := lv.progeffects(p)
for _, pos := range varkill {
any.Unset(pos)
all.Unset(pos)
}
for _, pos := range avarinit {
any.Set(pos)
all.Set(pos)
}
if issafepoint(p) {
// Annotate ambiguously live variables so that they can
......@@ -1331,15 +1372,19 @@ func livenessepilogue(lv *Liveness) {
next = p.Opt.(*obj.Prog) // splicebefore modifies p.opt
// Propagate liveness information
progeffects(p, lv.vars, uevar, varkill, avarinit)
uevar, varkill, _ := lv.progeffects(p)
liveout.Copy(livein)
livein.AndNot(liveout, varkill)
livein.Or(livein, uevar)
for _, pos := range varkill {
livein.Unset(pos)
}
for _, pos := range uevar {
livein.Set(pos)
}
if debuglive >= 3 && issafepoint(p) {
fmt.Printf("%v\n", p)
printvars("uevar", uevar, lv.vars)
printvars("varkill", varkill, lv.vars)
printvars("uevar", lv.slice2bvec(uevar), lv.vars)
printvars("varkill", lv.slice2bvec(varkill), lv.vars)
printvars("livein", livein, lv.vars)
printvars("liveout", liveout, lv.vars)
}
......@@ -1595,10 +1640,6 @@ func printbitset(printed bool, name string, vars []*Node, bits bvec) bool {
func livenessprintdebug(lv *Liveness) {
fmt.Printf("liveness: %s\n", Curfn.Func.Nname.Sym.Name)
uevar := bvalloc(int32(len(lv.vars)))
varkill := bvalloc(int32(len(lv.vars)))
avarinit := bvalloc(int32(len(lv.vars)))
pcdata := 0
for i, bb := range lv.cfg {
if i > 0 {
......@@ -1640,11 +1681,11 @@ func livenessprintdebug(lv *Liveness) {
if p.As == obj.APCDATA && p.From.Offset == obj.PCDATA_StackMapIndex {
pcdata = int(p.To.Offset)
}
progeffects(p, lv.vars, uevar, varkill, avarinit)
uevar, varkill, avarinit := lv.progeffects(p)
printed = false
printed = printbitset(printed, "uevar", lv.vars, uevar)
printed = printbitset(printed, "varkill", lv.vars, varkill)
printed = printbitset(printed, "avarinit", lv.vars, avarinit)
printed = printbitset(printed, "uevar", lv.vars, lv.slice2bvec(uevar))
printed = printbitset(printed, "varkill", lv.vars, lv.slice2bvec(varkill))
printed = printbitset(printed, "avarinit", lv.vars, lv.slice2bvec(avarinit))
if printed {
fmt.Printf("\n")
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment