Commit 14cb4158 authored by David du Colombier's avatar David du Colombier

cmd/compile: don't use MOVOstore instruction on plan9/amd64

CL 54410 and CL 56250 recently added use of the MOVOstore
instruction to improve performance.

However, we can't use the MOVOstore instruction on Plan 9,
because floating point operations are not allowed in the
note handler.

This change adds a configuration flag useSSE to enable the
use of SSE instructions for non-floating point operations.
This flag is enabled by default and disabled on Plan 9.
When this flag is disabled, the MOVOstore instruction is
not used and the MOVQstoreconst instruction is used instead.

Fixes #21599

Change-Id: Ie609e5d9b82ec0092ae874bab4ce01caa5bc8fb8
Reviewed-on: https://go-review.googlesource.com/58850Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent a164a2f5
......@@ -33,6 +33,7 @@ type Config struct {
ctxt *obj.Link // Generic arch information
optimize bool // Do optimization
noDuffDevice bool // Don't use Duff's device
useSSE bool // Use SSE for non-float operations
nacl bool // GOOS=nacl
use387 bool // GO386=387
NeedsFpScratch bool // No direct move between GP and FP register sets
......@@ -264,11 +265,13 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config
c.ctxt = ctxt
c.optimize = optimize
c.nacl = objabi.GOOS == "nacl"
c.useSSE = true
// Don't use Duff's device on Plan 9 AMD64, because floating
// point operations are not allowed in note handler.
// Don't use Duff's device nor SSE on Plan 9 AMD64, because
// floating point operations are not allowed in note handler.
if objabi.GOOS == "plan9" && arch == "amd64" {
c.noDuffDevice = true
c.useSSE = false
}
if c.nacl {
......
......@@ -386,29 +386,48 @@
(MOVLstoreconst [makeValAndOff(0,3)] destptr
(MOVLstoreconst [0] destptr mem))
(Zero [s] destptr mem) && s > 8 && s < 16 ->
// Strip off any fractional word zeroing.
(Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE ->
(Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
(MOVQstoreconst [0] destptr mem))
// Zero small numbers of words directly.
(Zero [16] destptr mem) && !config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,8)] destptr
(MOVQstoreconst [0] destptr mem))
(Zero [24] destptr mem) && !config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,16)] destptr
(MOVQstoreconst [makeValAndOff(0,8)] destptr
(MOVQstoreconst [0] destptr mem)))
(Zero [32] destptr mem) && !config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,24)] destptr
(MOVQstoreconst [makeValAndOff(0,16)] destptr
(MOVQstoreconst [makeValAndOff(0,8)] destptr
(MOVQstoreconst [0] destptr mem))))
(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,s-8)] destptr
(MOVQstoreconst [0] destptr mem))
// Adjust zeros to be a multiple of 16 bytes.
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 ->
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE ->
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVOstore destptr (MOVOconst [0]) mem))
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 ->
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE ->
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVQstoreconst [0] destptr mem))
(Zero [16] destptr mem) ->
(Zero [16] destptr mem) && config.useSSE ->
(MOVOstore destptr (MOVOconst [0]) mem)
(Zero [32] destptr mem) ->
(Zero [32] destptr mem) && config.useSSE ->
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem))
(Zero [48] destptr mem) ->
(Zero [48] destptr mem) && config.useSSE ->
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem)))
(Zero [64] destptr mem) ->
(Zero [64] destptr mem) && config.useSSE ->
(MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
......@@ -421,7 +440,7 @@
// Large zeroing uses REP STOSQ.
(Zero [s] destptr mem)
&& (s > 1024 || (config.noDuffDevice && s > 64))
&& (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32))
&& s%8 == 0 ->
(REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
......@@ -2205,6 +2224,7 @@
&& clobber(x)
-> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
&& config.useSSE
&& x.Uses == 1
&& ValAndOff(c2).Off() + 8 == ValAndOff(c).Off()
&& ValAndOff(c).Val() == 0
......
......@@ -842,7 +842,7 @@ func rewriteValueAMD64(v *Value) bool {
case OpXor8:
return rewriteValueAMD64_OpXor8_0(v)
case OpZero:
return rewriteValueAMD64_OpZero_0(v) || rewriteValueAMD64_OpZero_10(v)
return rewriteValueAMD64_OpZero_0(v) || rewriteValueAMD64_OpZero_10(v) || rewriteValueAMD64_OpZero_20(v)
case OpZeroExt16to32:
return rewriteValueAMD64_OpZeroExt16to32_0(v)
case OpZeroExt16to64:
......@@ -9089,6 +9089,8 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
// match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
// cond: ValAndOff(sc).canAdd(off)
// result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
......@@ -9218,7 +9220,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
return true
}
// match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
// cond: x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
// cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
// result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
for {
c := v.AuxInt
......@@ -9238,7 +9240,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
break
}
mem := x.Args[1]
if !(x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
if !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
break
}
v.reset(OpAMD64MOVOstore)
......@@ -42862,6 +42864,8 @@ func rewriteValueAMD64_OpXor8_0(v *Value) bool {
func rewriteValueAMD64_OpZero_0(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
// match: (Zero [0] _ mem)
// cond:
// result: mem
......@@ -43021,24 +43025,27 @@ func rewriteValueAMD64_OpZero_0(v *Value) bool {
return true
}
// match: (Zero [s] destptr mem)
// cond: s > 8 && s < 16
// result: (MOVQstoreconst [makeValAndOff(0,s-8)] destptr (MOVQstoreconst [0] destptr mem))
// cond: s%8 != 0 && s > 8 && !config.useSSE
// result: (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) (MOVQstoreconst [0] destptr mem))
for {
s := v.AuxInt
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(s > 8 && s < 16) {
if !(s%8 != 0 && s > 8 && !config.useSSE) {
break
}
v.reset(OpAMD64MOVQstoreconst)
v.AuxInt = makeValAndOff(0, s-8)
v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
v0.AuxInt = 0
v.reset(OpZero)
v.AuxInt = s - s%8
v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
v0.AuxInt = s % 8
v0.AddArg(destptr)
v0.AddArg(mem)
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
v1.AuxInt = 0
v1.AddArg(destptr)
v1.AddArg(mem)
v.AddArg(v1)
return true
}
return false
......@@ -43048,17 +43055,117 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
_ = b
config := b.Func.Config
_ = config
typ := &b.Func.Config.Types
_ = typ
// match: (Zero [16] destptr mem)
// cond: !config.useSSE
// result: (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem))
for {
if v.AuxInt != 16 {
break
}
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(!config.useSSE) {
break
}
v.reset(OpAMD64MOVQstoreconst)
v.AuxInt = makeValAndOff(0, 8)
v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
v0.AuxInt = 0
v0.AddArg(destptr)
v0.AddArg(mem)
v.AddArg(v0)
return true
}
// match: (Zero [24] destptr mem)
// cond: !config.useSSE
// result: (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)))
for {
if v.AuxInt != 24 {
break
}
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(!config.useSSE) {
break
}
v.reset(OpAMD64MOVQstoreconst)
v.AuxInt = makeValAndOff(0, 16)
v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
v0.AuxInt = makeValAndOff(0, 8)
v0.AddArg(destptr)
v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
v1.AuxInt = 0
v1.AddArg(destptr)
v1.AddArg(mem)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
// match: (Zero [32] destptr mem)
// cond: !config.useSSE
// result: (MOVQstoreconst [makeValAndOff(0,24)] destptr (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem))))
for {
if v.AuxInt != 32 {
break
}
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(!config.useSSE) {
break
}
v.reset(OpAMD64MOVQstoreconst)
v.AuxInt = makeValAndOff(0, 24)
v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
v0.AuxInt = makeValAndOff(0, 16)
v0.AddArg(destptr)
v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
v1.AuxInt = makeValAndOff(0, 8)
v1.AddArg(destptr)
v2 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
v2.AuxInt = 0
v2.AddArg(destptr)
v2.AddArg(mem)
v1.AddArg(v2)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
// match: (Zero [s] destptr mem)
// cond: s > 8 && s < 16 && config.useSSE
// result: (MOVQstoreconst [makeValAndOff(0,s-8)] destptr (MOVQstoreconst [0] destptr mem))
for {
s := v.AuxInt
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(s > 8 && s < 16 && config.useSSE) {
break
}
v.reset(OpAMD64MOVQstoreconst)
v.AuxInt = makeValAndOff(0, s-8)
v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
v0.AuxInt = 0
v0.AddArg(destptr)
v0.AddArg(mem)
v.AddArg(v0)
return true
}
// match: (Zero [s] destptr mem)
// cond: s%16 != 0 && s > 16 && s%16 > 8
// cond: s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE
// result: (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) (MOVOstore destptr (MOVOconst [0]) mem))
for {
s := v.AuxInt
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(s%16 != 0 && s > 16 && s%16 > 8) {
if !(s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE) {
break
}
v.reset(OpZero)
......@@ -43077,14 +43184,14 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
return true
}
// match: (Zero [s] destptr mem)
// cond: s%16 != 0 && s > 16 && s%16 <= 8
// cond: s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE
// result: (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) (MOVQstoreconst [0] destptr mem))
for {
s := v.AuxInt
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(s%16 != 0 && s > 16 && s%16 <= 8) {
if !(s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE) {
break
}
v.reset(OpZero)
......@@ -43101,7 +43208,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
return true
}
// match: (Zero [16] destptr mem)
// cond:
// cond: config.useSSE
// result: (MOVOstore destptr (MOVOconst [0]) mem)
for {
if v.AuxInt != 16 {
......@@ -43110,6 +43217,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(config.useSSE) {
break
}
v.reset(OpAMD64MOVOstore)
v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpAMD64MOVOconst, types.TypeInt128)
......@@ -43119,7 +43229,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
return true
}
// match: (Zero [32] destptr mem)
// cond:
// cond: config.useSSE
// result: (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem))
for {
if v.AuxInt != 32 {
......@@ -43128,6 +43238,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(config.useSSE) {
break
}
v.reset(OpAMD64MOVOstore)
v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
v0.AuxInt = 16
......@@ -43146,7 +43259,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
return true
}
// match: (Zero [48] destptr mem)
// cond:
// cond: config.useSSE
// result: (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem)))
for {
if v.AuxInt != 48 {
......@@ -43155,6 +43268,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(config.useSSE) {
break
}
v.reset(OpAMD64MOVOstore)
v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
v0.AuxInt = 32
......@@ -43182,7 +43298,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
return true
}
// match: (Zero [64] destptr mem)
// cond:
// cond: config.useSSE
// result: (MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0]) (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem))))
for {
if v.AuxInt != 64 {
......@@ -43191,6 +43307,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !(config.useSSE) {
break
}
v.reset(OpAMD64MOVOstore)
v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
v0.AuxInt = 48
......@@ -43226,6 +43345,15 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
v.AddArg(v2)
return true
}
return false
}
func rewriteValueAMD64_OpZero_20(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
typ := &b.Func.Config.Types
_ = typ
// match: (Zero [s] destptr mem)
// cond: s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice
// result: (DUFFZERO [s] destptr (MOVOconst [0]) mem)
......@@ -43247,14 +43375,14 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool {
return true
}
// match: (Zero [s] destptr mem)
// cond: (s > 1024 || (config.noDuffDevice && s > 64)) && s%8 == 0
// cond: (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0
// result: (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
for {
s := v.AuxInt
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
if !((s > 1024 || (config.noDuffDevice && s > 64)) && s%8 == 0) {
if !((s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0) {
break
}
v.reset(OpAMD64REPSTOSQ)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment