Commit 5a6e511c authored by Keith Randall's avatar Keith Randall

cmd/compile: Use Sreedhar+Gao phi building algorithm

Should be more asymptotically happy.

We process each variable in turn to find all the
locations where it needs a phi (the dominance frontier
of all of its definitions).  Then we add all those phis.
This takes O(n * #variables), although hopefully much less.

Then we do a single tree walk to match all the
FwdRefs with the nearest definition or phi.
This takes O(n) time.

The one remaining inefficiency is that we might end up
introducing a bunch of dead phis in the first step.
A TODO is to introduce phis only where they might be
used by a read.

The old algorithm is still faster on small functions,
so there's a cutover size (currently 500 blocks).

This algorithm supercedes the David's sparse phi
placement algorithm for large functions.

Lowers compile time of example from #14934 from
~10 sec to ~4 sec.
Lowers compile time of example from #16361 from
~4.5 sec to ~3 sec.
Lowers #16407 from ~20 min to ~30 sec.

Update #14934
Update #16361
Fixes #16407

Change-Id: I1cff6364e1623c143190b6a924d7599e309db58f
Reviewed-on: https://go-review.googlesource.com/30163Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent d0e92f61
This diff is collapsed.
......@@ -72,6 +72,7 @@ func instrument(fn *Node) {
fn.Func.Enter.Prepend(nd)
nd = mkcall("racefuncexit", nil, nil)
fn.Func.Exit.Append(nd)
fn.Func.Dcl = append(fn.Func.Dcl, &nodpc)
}
if Debug['W'] != 0 {
......
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gc
import (
"cmd/compile/internal/ssa"
"fmt"
"math"
)
// sparseDefState contains a Go map from ONAMEs (*Node) to sparse definition trees, and
// a search helper for the CFG's dominator tree in which those definitions are embedded.
// Once initialized, given a use of an ONAME within a block, the ssa definition for
// that ONAME can be discovered in time roughly proportional to the log of the number
// of SSA definitions of that ONAME (thus avoiding pathological quadratic behavior for
// very large programs). The helper contains state (a dominator tree numbering) common
// to all the sparse definition trees, as well as some necessary data obtained from
// the ssa package.
//
// This algorithm has improved asymptotic complexity, but the constant factor is
// rather large and thus it is only preferred for very large inputs containing
// 1000s of blocks and variables.
type sparseDefState struct {
helper *ssa.SparseTreeHelper // contains one copy of information needed to do sparse mapping
defmapForOname map[*Node]*onameDefs // for each ONAME, its definition set (normal and phi)
}
// onameDefs contains a record of definitions (ordinary and implied phi function) for a single OName.
// stm is the set of definitions for the OName.
// firstdef and lastuse are postorder block numberings that
// conservatively bracket the entire lifetime of the OName.
type onameDefs struct {
stm *ssa.SparseTreeMap
// firstdef and lastuse define an interval in the postorder numbering
// that is guaranteed to include the entire lifetime of an ONAME.
// In the postorder numbering, math.MaxInt32 is before anything,
// and 0 is after-or-equal all exit nodes and infinite loops.
firstdef int32 // the first definition of this ONAME *in the postorder numbering*
lastuse int32 // the last use of this ONAME *in the postorder numbering*
}
// defsFor finds or creates-and-inserts-in-map the definition information
// (sparse tree and live range) for a given OName.
func (m *sparseDefState) defsFor(n *Node) *onameDefs {
d := m.defmapForOname[n]
if d != nil {
return d
}
// Reminder: firstdef/lastuse are postorder indices, not block indices,
// so these default values define an empty interval, not the entire one.
d = &onameDefs{stm: m.helper.NewTree(), firstdef: 0, lastuse: math.MaxInt32}
m.defmapForOname[n] = d
return d
}
// Insert adds a definition at b (with specified before/within/after adjustment)
// to sparse tree onameDefs. The lifetime is extended as necessary.
func (m *sparseDefState) Insert(tree *onameDefs, b *ssa.Block, adjust int32) {
bponum := m.helper.Ponums[b.ID]
if bponum > tree.firstdef {
tree.firstdef = bponum
}
tree.stm.Insert(b, adjust, b, m.helper)
}
// Use updates tree to record a use within b, extending the lifetime as necessary.
func (m *sparseDefState) Use(tree *onameDefs, b *ssa.Block) {
bponum := m.helper.Ponums[b.ID]
if bponum < tree.lastuse {
tree.lastuse = bponum
}
}
// locatePotentialPhiFunctions finds all the places where phi functions
// will be inserted into a program and records those and ordinary definitions
// in a "map" (not a Go map) that given an OName and use site, returns the
// SSA definition for that OName that will reach the use site (that is,
// the use site's nearest def/phi site in the dominator tree.)
func (s *state) locatePotentialPhiFunctions(fn *Node) *sparseDefState {
// s.config.SparsePhiCutoff() is compared with product of numblocks and numvalues,
// if product is smaller than cutoff, use old non-sparse method.
// cutoff == 0 implies all sparse
// cutoff == uint(-1) implies all non-sparse
if uint64(s.f.NumValues())*uint64(s.f.NumBlocks()) < s.config.SparsePhiCutoff() {
return nil
}
helper := ssa.NewSparseTreeHelper(s.f)
po := helper.Po // index by block.ID to obtain postorder # of block.
trees := make(map[*Node]*onameDefs)
dm := &sparseDefState{defmapForOname: trees, helper: helper}
// Process params, taking note of their special lifetimes
b := s.f.Entry
for _, n := range fn.Func.Dcl {
switch n.Class {
case PPARAM, PPARAMOUT:
t := dm.defsFor(n)
dm.Insert(t, b, ssa.AdjustBefore) // define param at entry block
if n.Class == PPARAMOUT {
dm.Use(t, po[0]) // Explicitly use PPARAMOUT at very last block
}
default:
}
}
// Process memory variable.
t := dm.defsFor(&memVar)
dm.Insert(t, b, ssa.AdjustBefore) // define memory at entry block
dm.Use(t, po[0]) // Explicitly use memory at last block
// Next load the map w/ basic definitions for ONames recorded per-block
// Iterate over po to avoid unreachable blocks.
for i := len(po) - 1; i >= 0; i-- {
b := po[i]
m := s.defvars[b.ID]
for n := range m { // no specified order, but per-node trees are independent.
t := dm.defsFor(n)
dm.Insert(t, b, ssa.AdjustWithin)
}
}
// Find last use of each variable
for _, v := range s.fwdRefs {
b := v.Block
name := v.Aux.(*Node)
t := dm.defsFor(name)
dm.Use(t, b)
}
for _, t := range trees {
// iterating over names in the outer loop
for change := true; change; {
change = false
for i := t.firstdef; i >= t.lastuse; i-- {
// Iterating in reverse of post-order reduces number of 'change' iterations;
// all possible forward flow goes through each time.
b := po[i]
// Within tree t, would a use at b require a phi function to ensure a single definition?
// TODO: perhaps more efficient to record specific use sites instead of range?
if len(b.Preds) < 2 {
continue // no phi possible
}
phi := t.stm.Find(b, ssa.AdjustWithin, helper) // Look for defs in earlier block or AdjustBefore in this one.
if phi != nil && phi.(*ssa.Block) == b {
continue // has a phi already in this block.
}
var defseen interface{}
// Do preds see different definitions? if so, need a phi function.
for _, e := range b.Preds {
p := e.Block()
dm.Use(t, p) // always count phi pred as "use"; no-op except for loop edges, which matter.
x := t.stm.Find(p, ssa.AdjustAfter, helper) // Look for defs reaching or within predecessors.
if x == nil { // nil def from a predecessor means a backedge that will be visited soon.
continue
}
if defseen == nil {
defseen = x
}
if defseen != x {
// Need to insert a phi function here because predecessors's definitions differ.
change = true
// Phi insertion is at AdjustBefore, visible with find in same block at AdjustWithin or AdjustAfter.
dm.Insert(t, b, ssa.AdjustBefore)
break
}
}
}
}
}
return dm
}
// FindBetterDefiningBlock tries to find a better block for a definition of OName name
// reaching (or within) p than p itself. If it cannot, it returns p instead.
// This aids in more efficient location of phi functions, since it can skip over
// branch code that might contain a definition of name if it actually does not.
func (m *sparseDefState) FindBetterDefiningBlock(name *Node, p *ssa.Block) *ssa.Block {
if m == nil {
return p
}
t := m.defmapForOname[name]
// For now this is fail-soft, since the old algorithm still works using the unimproved block.
if t == nil {
return p
}
x := t.stm.Find(p, ssa.AdjustAfter, m.helper)
if x == nil {
return p
}
b := x.(*ssa.Block)
if b == nil {
return p
}
return b
}
func (d *onameDefs) String() string {
return fmt.Sprintf("onameDefs:first=%d,last=%d,tree=%s", d.firstdef, d.lastuse, d.stm.String())
}
......@@ -80,6 +80,7 @@ func buildssa(fn *Node) *ssa.Func {
// Allocate starting values
s.labels = map[string]*ssaLabel{}
s.labeledNodes = map[*Node]*ssaLabel{}
s.fwdVars = map[*Node]*ssa.Value{}
s.startmem = s.entryNewValue0(ssa.OpInitMem, ssa.TypeMem)
s.sp = s.entryNewValue0(ssa.OpSP, Types[TUINTPTR]) // TODO: use generic pointer type (unsafe.Pointer?) instead
s.sb = s.entryNewValue0(ssa.OpSB, Types[TUINTPTR])
......@@ -114,6 +115,21 @@ func buildssa(fn *Node) *ssa.Func {
}
}
// Populate arguments.
for _, n := range fn.Func.Dcl {
if n.Class != PPARAM {
continue
}
var v *ssa.Value
if s.canSSA(n) {
v = s.newValue0A(ssa.OpArg, n.Type, n)
} else {
// Not SSAable. Load it.
v = s.newValue2(ssa.OpLoad, n.Type, s.decladdrs[n], s.startmem)
}
s.vars[n] = v
}
// Convert the AST-based IR to the SSA-based IR
s.stmts(fn.Func.Enter)
s.stmts(fn.Nbody)
......@@ -151,16 +167,7 @@ func buildssa(fn *Node) *ssa.Func {
return nil
}
prelinkNumvars := s.f.NumValues()
sparseDefState := s.locatePotentialPhiFunctions(fn)
// Link up variable uses to variable definitions
s.linkForwardReferences(sparseDefState)
if ssa.BuildStats > 0 {
s.f.LogStat("build", s.f.NumBlocks(), "blocks", prelinkNumvars, "vars_before",
s.f.NumValues(), "vars_after", prelinkNumvars*s.f.NumBlocks(), "ssa_phi_loc_cutoff_score")
}
s.insertPhis()
// Don't carry reference this around longer than necessary
s.exitCode = Nodes{}
......@@ -197,8 +204,14 @@ type state struct {
// variable assignments in the current block (map from variable symbol to ssa value)
// *Node is the unique identifier (an ONAME Node) for the variable.
// TODO: keep a single varnum map, then make all of these maps slices instead?
vars map[*Node]*ssa.Value
// fwdVars are variables that are used before they are defined in the current block.
// This map exists just to coalesce multiple references into a single FwdRef op.
// *Node is the unique identifier (an ONAME Node) for the variable.
fwdVars map[*Node]*ssa.Value
// all defined variables at the end of each block. Indexed by block ID.
defvars []map[*Node]*ssa.Value
......@@ -220,12 +233,12 @@ type state struct {
// Used to deduplicate panic calls.
panics map[funcLine]*ssa.Block
// list of FwdRef values.
fwdRefs []*ssa.Value
// list of PPARAMOUT (return) variables.
returns []*Node
// A dummy value used during phi construction.
placeholder *ssa.Value
cgoUnsafeArgs bool
noWB bool
WBLineno int32 // line number of first write barrier. 0=no write barriers
......@@ -292,6 +305,9 @@ func (s *state) startBlock(b *ssa.Block) {
}
s.curBlock = b
s.vars = map[*Node]*ssa.Value{}
for n := range s.fwdVars {
delete(s.fwdVars, n)
}
}
// endBlock marks the end of generating code for the current block.
......@@ -2951,9 +2967,8 @@ func (s *state) addr(n *Node, bounded bool) (*ssa.Value, bool) {
if v != nil {
return v, false
}
if n.String() == ".fp" {
// Special arg that points to the frame pointer.
// (Used by the race detector, others?)
if n == nodfp {
// Special arg that points to the frame pointer (Used by ORECOVER).
aux := s.lookupSymbol(n, &ssa.ArgSymbol{Typ: n.Type, Node: n})
return s.entryNewValue1A(ssa.OpAddr, t, aux, s.sp), false
}
......@@ -3971,132 +3986,30 @@ func (s *state) checkgoto(from *Node, to *Node) {
// variable returns the value of a variable at the current location.
func (s *state) variable(name *Node, t ssa.Type) *ssa.Value {
v := s.vars[name]
if v == nil {
v = s.newValue0A(ssa.OpFwdRef, t, name)
s.fwdRefs = append(s.fwdRefs, v)
s.vars[name] = v
s.addNamedValue(name, v)
}
return v
}
func (s *state) mem() *ssa.Value {
return s.variable(&memVar, ssa.TypeMem)
}
func (s *state) linkForwardReferences(dm *sparseDefState) {
// Build SSA graph. Each variable on its first use in a basic block
// leaves a FwdRef in that block representing the incoming value
// of that variable. This function links that ref up with possible definitions,
// inserting Phi values as needed. This is essentially the algorithm
// described by Braun, Buchwald, Hack, Leißa, Mallon, and Zwinkau:
// http://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf
// Differences:
// - We use FwdRef nodes to postpone phi building until the CFG is
// completely built. That way we can avoid the notion of "sealed"
// blocks.
// - Phi optimization is a separate pass (in ../ssa/phielim.go).
for len(s.fwdRefs) > 0 {
v := s.fwdRefs[len(s.fwdRefs)-1]
s.fwdRefs = s.fwdRefs[:len(s.fwdRefs)-1]
s.resolveFwdRef(v, dm)
}
}
// resolveFwdRef modifies v to be the variable's value at the start of its block.
// v must be a FwdRef op.
func (s *state) resolveFwdRef(v *ssa.Value, dm *sparseDefState) {
b := v.Block
name := v.Aux.(*Node)
v.Aux = nil
if b == s.f.Entry {
// Live variable at start of function.
if s.canSSA(name) {
if strings.HasPrefix(name.Sym.Name, "autotmp_") {
// It's likely that this is an uninitialized variable in the entry block.
s.Fatalf("Treating auto as if it were arg, func %s, node %v, value %v", b.Func.Name, name, v)
}
v.Op = ssa.OpArg
v.Aux = name
return
}
// Not SSAable. Load it.
addr := s.decladdrs[name]
if addr == nil {
// TODO: closure args reach here.
s.Fatalf("unhandled closure arg %v at entry to function %s", name, b.Func.Name)
}
if _, ok := addr.Aux.(*ssa.ArgSymbol); !ok {
s.Fatalf("variable live at start of function %s is not an argument %v", b.Func.Name, name)
}
v.Op = ssa.OpLoad
v.AddArgs(addr, s.startmem)
return
}
if len(b.Preds) == 0 {
// This block is dead; we have no predecessors and we're not the entry block.
// It doesn't matter what we use here as long as it is well-formed.
v.Op = ssa.OpUnknown
return
}
// Find variable value on each predecessor.
var argstore [4]*ssa.Value
args := argstore[:0]
for _, e := range b.Preds {
p := e.Block()
p = dm.FindBetterDefiningBlock(name, p) // try sparse improvement on p
args = append(args, s.lookupVarOutgoing(p, v.Type, name, v.Line))
}
// Decide if we need a phi or not. We need a phi if there
// are two different args (which are both not v).
var w *ssa.Value
for _, a := range args {
if a == v {
continue // self-reference
}
if a == w {
continue // already have this witness
}
if w != nil {
// two witnesses, need a phi value
v.Op = ssa.OpPhi
v.AddArgs(args...)
return
}
w = a // save witness
if v != nil {
return v
}
if w == nil {
s.Fatalf("no witness for reachable phi %s", v)
v = s.fwdVars[name]
if v != nil {
return v
}
// One witness. Make v a copy of w.
v.Op = ssa.OpCopy
v.AddArg(w)
}
// lookupVarOutgoing finds the variable's value at the end of block b.
func (s *state) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name *Node, line int32) *ssa.Value {
for {
if v, ok := s.defvars[b.ID][name]; ok {
return v
}
// The variable is not defined by b and we haven't looked it up yet.
// If b has exactly one predecessor, loop to look it up there.
// Otherwise, give up and insert a new FwdRef and resolve it later.
if len(b.Preds) != 1 {
break
}
b = b.Preds[0].Block()
if s.curBlock == s.f.Entry {
// No variable should be live at entry.
s.Fatalf("Value live at entry. It shouldn't be. func %s, node %v, value %v", s.f.Name, name, v)
}
// Generate a FwdRef for the variable and return that.
v := b.NewValue0A(line, ssa.OpFwdRef, t, name)
s.fwdRefs = append(s.fwdRefs, v)
s.defvars[b.ID][name] = v
// Make a FwdRef, which records a value that's live on block input.
// We'll find the matching definition as part of insertPhis.
v = s.newValue0A(ssa.OpFwdRef, t, name)
s.fwdVars[name] = v
s.addNamedValue(name, v)
return v
}
func (s *state) mem() *ssa.Value {
return s.variable(&memVar, ssa.TypeMem)
}
func (s *state) addNamedValue(n *Node, v *ssa.Value) {
if n.Class == Pxxx {
// Don't track our dummy nodes (&memVar etc.).
......
......@@ -89,6 +89,9 @@ type Edge struct {
func (e Edge) Block() *Block {
return e.b
}
func (e Edge) Index() int {
return e.i
}
// kind control successors
// ------------------------------------------
......
......@@ -459,6 +459,9 @@ func (f *Func) idom() []*Block {
}
return f.cachedIdom
}
func (f *Func) Idom() []*Block {
return f.idom()
}
// sdom returns a sparse tree representing the dominator relationships
// among the blocks of f.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment