Commit 8a5175df authored by Cherry Zhang's avatar Cherry Zhang

cmd/compile: improve startRegs calculation

In register allocation, we calculate what values are used in
and after the current block. If a value is used only after a
function call, since registers are clobbered in call, we don't
need to mark the value live at the entrance of the block.
Before this CL it is considered live, and unnecessary copy or
load may be generated when resolving merge edge.

Fixes #14761.

On AMD64:
name                      old time/op    new time/op    delta
BinaryTree17-12              2.84s ± 1%     2.81s ± 1%   -1.06%  (p=0.000 n=10+9)
Fannkuch11-12                3.61s ± 0%     3.55s ± 1%   -1.77%  (p=0.000 n=10+9)
FmtFprintfEmpty-12          50.4ns ± 4%    50.0ns ± 1%     ~     (p=0.785 n=9+8)
FmtFprintfString-12         80.0ns ± 3%    78.2ns ± 3%   -2.35%  (p=0.004 n=10+9)
FmtFprintfInt-12            81.3ns ± 4%    81.8ns ± 2%     ~     (p=0.159 n=10+10)
FmtFprintfIntInt-12          120ns ± 4%     118ns ± 2%     ~     (p=0.218 n=10+10)
FmtFprintfPrefixedInt-12     152ns ± 3%     155ns ± 2%   +2.11%  (p=0.026 n=10+10)
FmtFprintfFloat-12           240ns ± 1%     238ns ± 1%   -0.79%  (p=0.005 n=9+9)
FmtManyArgs-12               504ns ± 1%     510ns ± 1%   +1.14%  (p=0.000 n=8+9)
GobDecode-12                7.00ms ± 1%    6.99ms ± 0%     ~     (p=0.497 n=9+10)
GobEncode-12                5.47ms ± 1%    5.48ms ± 1%     ~     (p=0.218 n=10+10)
Gzip-12                      258ms ± 2%     256ms ± 1%   -0.96%  (p=0.043 n=10+9)
Gunzip-12                   38.6ms ± 0%    38.3ms ± 0%   -0.64%  (p=0.000 n=9+8)
HTTPClientServer-12         90.4µs ± 3%    87.2µs ±11%     ~     (p=0.053 n=9+10)
JSONEncode-12               15.6ms ± 0%    15.6ms ± 1%     ~     (p=0.077 n=9+9)
JSONDecode-12               55.1ms ± 1%    54.6ms ± 1%   -0.85%  (p=0.010 n=10+9)
Mandelbrot200-12            4.49ms ± 0%    4.47ms ± 0%   -0.25%  (p=0.000 n=10+8)
GoParse-12                  3.38ms ± 0%    3.37ms ± 1%     ~     (p=0.315 n=8+10)
RegexpMatchEasy0_32-12      82.5ns ± 4%    82.0ns ± 0%     ~     (p=0.164 n=10+8)
RegexpMatchEasy0_1K-12       203ns ± 1%     202ns ± 1%   -0.85%  (p=0.000 n=9+10)
RegexpMatchEasy1_32-12      82.3ns ± 1%    81.1ns ± 0%   -1.39%  (p=0.000 n=10+8)
RegexpMatchEasy1_1K-12       357ns ± 1%     357ns ± 1%     ~     (p=0.697 n=8+9)
RegexpMatchMedium_32-12      125ns ± 2%     126ns ± 2%     ~     (p=0.197 n=10+10)
RegexpMatchMedium_1K-12     39.6µs ± 3%    39.6µs ± 1%     ~     (p=0.971 n=10+10)
RegexpMatchHard_32-12       1.99µs ± 2%    1.99µs ± 4%     ~     (p=0.891 n=10+9)
RegexpMatchHard_1K-12       60.1µs ± 3%    60.4µs ± 3%     ~     (p=0.684 n=10+10)
Revcomp-12                   531ms ± 6%     441ms ± 0%  -16.94%  (p=0.000 n=10+9)
Template-12                 58.9ms ± 1%    58.7ms ± 1%     ~     (p=0.315 n=10+10)
TimeParse-12                 319ns ± 1%     320ns ± 4%     ~     (p=0.215 n=9+9)
TimeFormat-12                345ns ± 0%     333ns ± 1%   -3.36%  (p=0.000 n=9+10)
[Geo mean]                  52.2µs         51.6µs        -1.13%

On ARM64:
name                     old time/op    new time/op    delta
BinaryTree17-8              8.53s ± 0%     8.36s ± 0%   -1.89%  (p=0.000 n=10+10)
Fannkuch11-8                6.15s ± 0%     6.10s ± 0%   -0.67%  (p=0.000 n=10+10)
FmtFprintfEmpty-8           117ns ± 0%     117ns ± 0%     ~     (all equal)
FmtFprintfString-8          192ns ± 0%     192ns ± 0%     ~     (all equal)
FmtFprintfInt-8             198ns ± 0%     198ns ± 0%     ~     (p=0.211 n=10+10)
FmtFprintfIntInt-8          289ns ± 0%     291ns ± 0%   +0.59%  (p=0.000 n=7+10)
FmtFprintfPrefixedInt-8     320ns ± 2%     317ns ± 0%     ~     (p=0.431 n=10+8)
FmtFprintfFloat-8           538ns ± 0%     538ns ± 0%     ~     (all equal)
FmtManyArgs-8              1.17µs ± 1%    1.18µs ± 1%     ~     (p=0.063 n=10+10)
GobDecode-8                17.0ms ± 1%    17.2ms ± 1%   +0.83%  (p=0.000 n=10+10)
GobEncode-8                14.2ms ± 0%    14.1ms ± 1%   -0.78%  (p=0.001 n=9+10)
Gzip-8                      806ms ± 0%     797ms ± 0%   -1.12%  (p=0.000 n=6+9)
Gunzip-8                    131ms ± 0%     130ms ± 0%   -0.51%  (p=0.000 n=10+9)
HTTPClientServer-8          206µs ± 9%     212µs ± 2%     ~     (p=0.829 n=10+8)
JSONEncode-8               40.1ms ± 0%    40.1ms ± 0%     ~     (p=0.136 n=9+9)
JSONDecode-8                157ms ± 0%     151ms ± 0%   -3.32%  (p=0.000 n=9+9)
Mandelbrot200-8            10.1ms ± 0%    10.1ms ± 0%   -0.05%  (p=0.000 n=9+8)
GoParse-8                  8.43ms ± 0%    8.43ms ± 0%     ~     (p=0.912 n=10+10)
RegexpMatchEasy0_32-8       228ns ± 1%     227ns ± 0%   -0.26%  (p=0.026 n=10+9)
RegexpMatchEasy0_1K-8      1.92µs ± 0%    1.63µs ± 0%  -15.18%  (p=0.001 n=7+7)
RegexpMatchEasy1_32-8       258ns ± 1%     250ns ± 0%   -2.83%  (p=0.000 n=10+10)
RegexpMatchEasy1_1K-8      2.39µs ± 0%    2.13µs ± 0%  -10.94%  (p=0.000 n=9+9)
RegexpMatchMedium_32-8      352ns ± 0%     351ns ± 0%   -0.29%  (p=0.004 n=9+10)
RegexpMatchMedium_1K-8      104µs ± 0%     105µs ± 0%   +0.58%  (p=0.000 n=8+9)
RegexpMatchHard_32-8       5.84µs ± 0%    5.82µs ± 0%   -0.27%  (p=0.000 n=9+10)
RegexpMatchHard_1K-8        177µs ± 0%     177µs ± 0%   -0.07%  (p=0.000 n=9+9)
Revcomp-8                   1.57s ± 1%     1.50s ± 1%   -4.60%  (p=0.000 n=9+10)
Template-8                  157ms ± 1%     153ms ± 1%   -2.28%  (p=0.000 n=10+9)
TimeParse-8                 779ns ± 1%     770ns ± 1%   -1.18%  (p=0.013 n=10+10)
TimeFormat-8                823ns ± 2%     826ns ± 1%     ~     (p=0.324 n=10+9)
[Geo mean]                  144µs          142µs        -1.45%

Reduce cmd/go text size by 0.5%.

Change-Id: I9288ff983c4a7cf03fc0cb35b9b1750828013117
Reviewed-on: https://go-review.googlesource.com/38457
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarDavid Chase <drchase@google.com>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent fa1d54c2
...@@ -241,6 +241,9 @@ type regAllocState struct { ...@@ -241,6 +241,9 @@ type regAllocState struct {
// current state of each (preregalloc) Value // current state of each (preregalloc) Value
values []valState values []valState
// ID of SP, SB values
sp, sb ID
// For each Value, map from its value ID back to the // For each Value, map from its value ID back to the
// preregalloc Value it was derived from. // preregalloc Value it was derived from.
orig []*Value orig []*Value
...@@ -709,8 +712,8 @@ func (s *regAllocState) compatRegs(t Type) regMask { ...@@ -709,8 +712,8 @@ func (s *regAllocState) compatRegs(t Type) regMask {
} }
func (s *regAllocState) regalloc(f *Func) { func (s *regAllocState) regalloc(f *Func) {
liveSet := f.newSparseSet(f.NumValues()) regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
defer f.retSparseSet(liveSet) defer f.retSparseSet(regValLiveSet)
var oldSched []*Value var oldSched []*Value
var phis []*Value var phis []*Value
var phiRegs []register var phiRegs []register
...@@ -733,32 +736,42 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -733,32 +736,42 @@ func (s *regAllocState) regalloc(f *Func) {
for _, b := range f.Blocks { for _, b := range f.Blocks {
s.curBlock = b s.curBlock = b
// Initialize liveSet and uses fields for this block. // Initialize regValLiveSet and uses fields for this block.
// Walk backwards through the block doing liveness analysis. // Walk backwards through the block doing liveness analysis.
liveSet.clear() regValLiveSet.clear()
for _, e := range s.live[b.ID] { for _, e := range s.live[b.ID] {
s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
liveSet.add(e.ID) regValLiveSet.add(e.ID)
} }
if v := b.Control; v != nil && s.values[v.ID].needReg { if v := b.Control; v != nil && s.values[v.ID].needReg {
s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value
liveSet.add(v.ID) regValLiveSet.add(v.ID)
} }
for i := len(b.Values) - 1; i >= 0; i-- { for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i] v := b.Values[i]
liveSet.remove(v.ID) regValLiveSet.remove(v.ID)
if v.Op == OpPhi { if v.Op == OpPhi {
// Remove v from the live set, but don't add // Remove v from the live set, but don't add
// any inputs. This is the state the len(b.Preds)>1 // any inputs. This is the state the len(b.Preds)>1
// case below desires; it wants to process phis specially. // case below desires; it wants to process phis specially.
continue continue
} }
if opcodeTable[v.Op].call {
// Function call clobbers all the registers but SP and SB.
regValLiveSet.clear()
if s.sp != 0 && s.values[s.sp].uses != nil {
regValLiveSet.add(s.sp)
}
if s.sb != 0 && s.values[s.sb].uses != nil {
regValLiveSet.add(s.sb)
}
}
for _, a := range v.Args { for _, a := range v.Args {
if !s.values[a.ID].needReg { if !s.values[a.ID].needReg {
continue continue
} }
s.addUse(a.ID, int32(i), v.Pos) s.addUse(a.ID, int32(i), v.Pos)
liveSet.add(a.ID) regValLiveSet.add(a.ID)
} }
} }
if s.f.pass.debug > regDebug { if s.f.pass.debug > regDebug {
...@@ -808,7 +821,7 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -808,7 +821,7 @@ func (s *regAllocState) regalloc(f *Func) {
// live but only used by some other successor of p. // live but only used by some other successor of p.
for r := register(0); r < s.numRegs; r++ { for r := register(0); r < s.numRegs; r++ {
v := s.regs[r].v v := s.regs[r].v
if v != nil && !liveSet.contains(v.ID) { if v != nil && !regValLiveSet.contains(v.ID) {
s.freeReg(r) s.freeReg(r)
} }
} }
...@@ -864,7 +877,7 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -864,7 +877,7 @@ func (s *regAllocState) regalloc(f *Func) {
continue continue
} }
a := v.Args[idx] a := v.Args[idx]
if !liveSet.contains(a.ID) { if !regValLiveSet.contains(a.ID) {
// Input is dead beyond the phi, deallocate // Input is dead beyond the phi, deallocate
// anywhere else it might live. // anywhere else it might live.
s.freeRegs(s.values[a.ID].regs) s.freeRegs(s.values[a.ID].regs)
...@@ -932,6 +945,17 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -932,6 +945,17 @@ func (s *regAllocState) regalloc(f *Func) {
s.assignReg(r, v, v) s.assignReg(r, v, v)
} }
// Deallocate any values which are no longer live. Phis are excluded.
for r := register(0); r < s.numRegs; r++ {
if phiUsed>>r&1 != 0 {
continue
}
v := s.regs[r].v
if v != nil && !regValLiveSet.contains(v.ID) {
s.freeReg(r)
}
}
// Save the starting state for use by merge edges. // Save the starting state for use by merge edges.
var regList []startReg var regList []startReg
for r := register(0); r < s.numRegs; r++ { for r := register(0); r < s.numRegs; r++ {
...@@ -1034,12 +1058,14 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -1034,12 +1058,14 @@ func (s *regAllocState) regalloc(f *Func) {
s.assignReg(s.SPReg, v, v) s.assignReg(s.SPReg, v, v)
b.Values = append(b.Values, v) b.Values = append(b.Values, v)
s.advanceUses(v) s.advanceUses(v)
s.sp = v.ID
continue continue
} }
if v.Op == OpSB { if v.Op == OpSB {
s.assignReg(s.SBReg, v, v) s.assignReg(s.SBReg, v, v)
b.Values = append(b.Values, v) b.Values = append(b.Values, v)
s.advanceUses(v) s.advanceUses(v)
s.sb = v.ID
continue continue
} }
if v.Op == OpSelect0 || v.Op == OpSelect1 { if v.Op == OpSelect0 || v.Op == OpSelect1 {
...@@ -1435,16 +1461,16 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -1435,16 +1461,16 @@ func (s *regAllocState) regalloc(f *Func) {
s.endRegs[b.ID] = regList s.endRegs[b.ID] = regList
if checkEnabled { if checkEnabled {
liveSet.clear() regValLiveSet.clear()
for _, x := range s.live[b.ID] { for _, x := range s.live[b.ID] {
liveSet.add(x.ID) regValLiveSet.add(x.ID)
} }
for r := register(0); r < s.numRegs; r++ { for r := register(0); r < s.numRegs; r++ {
v := s.regs[r].v v := s.regs[r].v
if v == nil { if v == nil {
continue continue
} }
if !liveSet.contains(v.ID) { if !regValLiveSet.contains(v.ID) {
s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment