Commit 0b79dde1 authored by Keith Randall's avatar Keith Randall Committed by Keith Randall

cmd/compile: don't use CMOV ops to compute load addresses

We want to issue loads as soon as possible, especially when they
are going to miss in the cache. Using a conditional move (CMOV) here:

i := ...
if cond {
   i++
}
... = a[i]

means that we have to wait for cond to be computed before the load
is issued. Without a CMOV, if the branch is predicted correctly the
load can be issued in parallel with computing cond.
Even if the branch is predicted incorrectly, maybe the speculative
load is close to the real load, and we get a prefetch for free.
In the worst case, when the prediction is wrong and the address is
way off, we only lose by the time difference between the CMOV
latency (~2 cycles) and the mispredict restart latency (~15 cycles).

We only squash CMOVs that affect load addresses. Results of CMOVs
that are used for other things (store addresses, store values) we
use as before.

Fixes #26306

Change-Id: I82ca14b664bf05e1d45e58de8c4d9c775a127ca1
Reviewed-on: https://go-review.googlesource.com/c/145717
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarJosh Bleecher Snyder <josharian@gmail.com>
parent 5680874e
...@@ -26,16 +26,61 @@ func branchelim(f *Func) { ...@@ -26,16 +26,61 @@ func branchelim(f *Func) {
return return
} }
// Find all the values used in computing the address of any load.
// Typically these values have operations like AddPtr, Lsh64x64, etc.
loadAddr := f.newSparseSet(f.NumValues())
defer f.retSparseSet(loadAddr)
for _, b := range f.Blocks {
for _, v := range b.Values {
switch v.Op {
case OpLoad, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32:
loadAddr.add(v.Args[0].ID)
case OpMove:
loadAddr.add(v.Args[1].ID)
}
}
}
po := f.postorder()
for {
n := loadAddr.size()
for _, b := range po {
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
if !loadAddr.contains(v.ID) {
continue
}
for _, a := range v.Args {
if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
loadAddr.add(a.ID)
}
}
}
}
if loadAddr.size() == n {
break
}
}
change := true change := true
for change { for change {
change = false change = false
for _, b := range f.Blocks { for _, b := range f.Blocks {
change = elimIf(f, b) || elimIfElse(f, b) || change change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
} }
} }
} }
func canCondSelect(v *Value, arch string) bool { func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
if loadAddr.contains(v.ID) {
// The result of the soon-to-be conditional move is used to compute a load address.
// We want to avoid generating a conditional move in this case
// because the load address would now be data-dependent on the condition.
// Previously it would only be control-dependent on the condition, which is faster
// if the branch predicts well (or possibly even if it doesn't, if the load will
// be an expensive cache miss).
// See issue #26306.
return false
}
// For now, stick to simple scalars that fit in registers // For now, stick to simple scalars that fit in registers
switch { switch {
case v.Type.Size() > v.Block.Func.Config.RegSize: case v.Type.Size() > v.Block.Func.Config.RegSize:
...@@ -53,7 +98,10 @@ func canCondSelect(v *Value, arch string) bool { ...@@ -53,7 +98,10 @@ func canCondSelect(v *Value, arch string) bool {
} }
} }
func elimIf(f *Func, dom *Block) bool { // elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
// loadAddr is a set of values which are used to compute the address of a load.
// Those values are exempt from CMOV generation.
func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
// See if dom is an If with one arm that // See if dom is an If with one arm that
// is trivial and succeeded by the other // is trivial and succeeded by the other
// successor of dom. // successor of dom.
...@@ -83,7 +131,7 @@ func elimIf(f *Func, dom *Block) bool { ...@@ -83,7 +131,7 @@ func elimIf(f *Func, dom *Block) bool {
for _, v := range post.Values { for _, v := range post.Values {
if v.Op == OpPhi { if v.Op == OpPhi {
hasphis = true hasphis = true
if !canCondSelect(v, f.Config.arch) { if !canCondSelect(v, f.Config.arch, loadAddr) {
return false return false
} }
} }
...@@ -158,7 +206,10 @@ func clobberBlock(b *Block) { ...@@ -158,7 +206,10 @@ func clobberBlock(b *Block) {
b.Kind = BlockInvalid b.Kind = BlockInvalid
} }
func elimIfElse(f *Func, b *Block) bool { // elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
// loadAddr is a set of values which are used to compute the address of a load.
// Those values are exempt from CMOV generation.
func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
// See if 'b' ends in an if/else: it should // See if 'b' ends in an if/else: it should
// have two successors, both of which are BlockPlain // have two successors, both of which are BlockPlain
// and succeeded by the same block. // and succeeded by the same block.
...@@ -184,7 +235,7 @@ func elimIfElse(f *Func, b *Block) bool { ...@@ -184,7 +235,7 @@ func elimIfElse(f *Func, b *Block) bool {
for _, v := range post.Values { for _, v := range post.Values {
if v.Op == OpPhi { if v.Op == OpPhi {
hasphis = true hasphis = true
if !canCondSelect(v, f.Config.arch) { if !canCondSelect(v, f.Config.arch, loadAddr) {
return false return false
} }
} }
......
...@@ -180,3 +180,20 @@ func cmovinvert6(x, y uint64) uint64 { ...@@ -180,3 +180,20 @@ func cmovinvert6(x, y uint64) uint64 {
// amd64:"CMOVQLS" // amd64:"CMOVQLS"
return y return y
} }
func cmovload(a []int, i int, b bool) int {
if b {
i++
}
// See issue 26306
// amd64:-"CMOVQNE"
return a[i]
}
func cmovstore(a []int, i int, b bool) {
if b {
i++
}
// amd64:"CMOVQNE"
a[i] = 7
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment