Commit 04d6edc3 authored by Keith Randall's avatar Keith Randall

[dev.ssa] cmd/compile: clean up zeroing. Use duffzero when appropriate.

Change-Id: I4deb03340e87f43179d5e22bf81843c17b5581fc
Reviewed-on: https://go-review.googlesource.com/14756Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent 3a70bf9c
......@@ -3613,22 +3613,12 @@ func (s *genState) genValue(v *ssa.Value) {
ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
opregreg(v.Op.Asm(), regnum(v), regnum(v.Args[0]))
case ssa.OpAMD64MOVXzero:
nb := v.AuxInt
offset := int64(0)
reg := regnum(v.Args[0])
for nb >= 8 {
nb, offset = movZero(x86.AMOVQ, 8, nb, offset, reg)
}
for nb >= 4 {
nb, offset = movZero(x86.AMOVL, 4, nb, offset, reg)
}
for nb >= 2 {
nb, offset = movZero(x86.AMOVW, 2, nb, offset, reg)
}
for nb >= 1 {
nb, offset = movZero(x86.AMOVB, 1, nb, offset, reg)
}
case ssa.OpAMD64DUFFZERO:
p := Prog(obj.ADUFFZERO)
p.To.Type = obj.TYPE_ADDR
p.To.Sym = Linksym(Pkglookup("duffzero", Runtimepkg))
p.To.Offset = v.AuxInt
case ssa.OpCopy: // TODO: lower to MOVQ earlier?
if v.Type.IsMemory() {
return
......@@ -3830,11 +3820,6 @@ func (s *genState) genValue(v *ssa.Value) {
case ssa.OpAMD64InvertFlags:
v.Fatalf("InvertFlags should never make it to codegen %v", v)
case ssa.OpAMD64REPSTOSQ:
p := Prog(x86.AXORL) // TODO: lift out zeroing into its own instruction?
p.From.Type = obj.TYPE_REG
p.From.Reg = x86.REG_AX
p.To.Type = obj.TYPE_REG
p.To.Reg = x86.REG_AX
Prog(x86.AREP)
Prog(x86.ASTOSQ)
case ssa.OpAMD64REPMOVSB:
......
......@@ -87,3 +87,5 @@ func TestClosure(t *testing.T) { runTest(t, "closure_ssa.go") }
func TestArray(t *testing.T) { runTest(t, "array_ssa.go") }
func TestAppend(t *testing.T) { runTest(t, "append_ssa.go") }
func TestZero(t *testing.T) { runTest(t, "zero_ssa.go") }
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"fmt"
"go/format"
"io/ioutil"
"log"
)
// This program generates tests to verify that zeroing operations
// zero the data they are supposed to and clobber no adjacent values.
// run as `go run zeroGen.go`. A file called zero_ssa.go
// will be written into the parent directory containing the tests.
var sizes = [...]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 23, 24, 25, 31, 32, 33, 63, 64, 65, 1023, 1024, 1025}
func main() {
w := new(bytes.Buffer)
fmt.Fprintf(w, "// run\n")
fmt.Fprintf(w, "// autogenerated from gen/zeroGen.go - do not edit!\n")
fmt.Fprintf(w, "package main\n")
fmt.Fprintf(w, "import \"fmt\"\n")
for _, s := range sizes {
// type for test
fmt.Fprintf(w, "type T%d struct {\n", s)
fmt.Fprintf(w, " pre [8]byte\n")
fmt.Fprintf(w, " mid [%d]byte\n", s)
fmt.Fprintf(w, " post [8]byte\n")
fmt.Fprintf(w, "}\n")
// function being tested
fmt.Fprintf(w, "func zero%d_ssa(x *[%d]byte) {\n", s, s)
fmt.Fprintf(w, " switch{}\n")
fmt.Fprintf(w, " *x = [%d]byte{}\n", s)
fmt.Fprintf(w, "}\n")
// testing harness
fmt.Fprintf(w, "func testZero%d() {\n", s)
fmt.Fprintf(w, " a := T%d{[8]byte{255,255,255,255,255,255,255,255},[%d]byte{", s, s)
for i := 0; i < s; i++ {
fmt.Fprintf(w, "255,")
}
fmt.Fprintf(w, "},[8]byte{255,255,255,255,255,255,255,255}}\n")
fmt.Fprintf(w, " zero%d_ssa(&a.mid)\n", s)
fmt.Fprintf(w, " want := T%d{[8]byte{255,255,255,255,255,255,255,255},[%d]byte{", s, s)
for i := 0; i < s; i++ {
fmt.Fprintf(w, "0,")
}
fmt.Fprintf(w, "},[8]byte{255,255,255,255,255,255,255,255}}\n")
fmt.Fprintf(w, " if a != want {\n")
fmt.Fprintf(w, " fmt.Printf(\"zero%d got=%%v, want %%v\\n\", a, want)\n", s)
fmt.Fprintf(w, " failed=true\n")
fmt.Fprintf(w, " }\n")
fmt.Fprintf(w, "}\n")
}
// boilerplate at end
fmt.Fprintf(w, "var failed bool\n")
fmt.Fprintf(w, "func main() {\n")
for _, s := range sizes {
fmt.Fprintf(w, " testZero%d()\n", s)
}
fmt.Fprintf(w, " if failed {\n")
fmt.Fprintf(w, " panic(\"failed\")\n")
fmt.Fprintf(w, " }\n")
fmt.Fprintf(w, "}\n")
// gofmt result
b := w.Bytes()
src, err := format.Source(b)
if err != nil {
fmt.Printf("%s\n", b)
panic(err)
}
// write to file
err = ioutil.WriteFile("../zero_ssa.go", src, 0666)
if err != nil {
log.Fatalf("can't write output: %v\n", err)
}
}
This diff is collapsed.
......@@ -565,15 +565,50 @@
// lower Zero instructions with word sizes
(Zero [0] _ mem) -> mem
(Zero [1] destptr mem) -> (MOVBstore destptr (MOVBconst <config.Frontend().TypeInt8()> [0]) mem)
(Zero [2] destptr mem) -> (MOVWstore destptr (MOVWconst <config.Frontend().TypeInt16()> [0]) mem)
(Zero [4] destptr mem) -> (MOVLstore destptr (MOVLconst <config.Frontend().TypeInt32()> [0]) mem)
(Zero [8] destptr mem) -> (MOVQstore destptr (MOVQconst <config.Frontend().TypeInt64()> [0]) mem)
// rewrite anything less than 4 words into a series of MOV[BWLQ] $0, ptr(off) instructions
(Zero [size] destptr mem) && size < 4*8 -> (MOVXzero [size] destptr mem)
// Use STOSQ to zero memory. Rewrite this into storing the words with REPSTOSQ and then filling in the remainder with linear moves
(Zero [size] destptr mem) && size >= 4*8 -> (Zero [size%8] (OffPtr <config.Frontend().TypeUInt64()> [size-(size%8)] destptr) (REPSTOSQ <TypeMem> destptr (MOVQconst <config.Frontend().TypeUInt64()> [size/8]) mem))
(Zero [1] destptr mem) -> (MOVBstore destptr (MOVBconst [0]) mem)
(Zero [2] destptr mem) -> (MOVWstore destptr (MOVWconst [0]) mem)
(Zero [4] destptr mem) -> (MOVLstore destptr (MOVLconst [0]) mem)
(Zero [8] destptr mem) -> (MOVQstore destptr (MOVQconst [0]) mem)
(Zero [3] destptr mem) ->
(MOVBstore (ADDQconst [2] destptr) (MOVBconst [0])
(MOVWstore destptr (MOVWconst [0]) mem))
(Zero [5] destptr mem) ->
(MOVBstore (ADDQconst [4] destptr) (MOVBconst [0])
(MOVLstore destptr (MOVLconst [0]) mem))
(Zero [6] destptr mem) ->
(MOVWstore (ADDQconst [4] destptr) (MOVWconst [0])
(MOVLstore destptr (MOVLconst [0]) mem))
(Zero [7] destptr mem) ->
(MOVLstore (ADDQconst [3] destptr) (MOVLconst [0])
(MOVLstore destptr (MOVLconst [0]) mem))
// Strip off any fractional word zeroing.
(Zero [size] destptr mem) && size%8 != 0 && size > 8 ->
(Zero [size-size%8] (ADDQconst destptr [size%8])
(MOVQstore destptr (MOVQconst [0]) mem))
// Zero small numbers of words directly.
(Zero [16] destptr mem) ->
(MOVQstore (ADDQconst [8] destptr) (MOVQconst [0])
(MOVQstore destptr (MOVQconst [0]) mem))
(Zero [24] destptr mem) ->
(MOVQstore (ADDQconst [16] destptr) (MOVQconst [0])
(MOVQstore (ADDQconst [8] destptr) (MOVQconst [0])
(MOVQstore destptr (MOVQconst [0]) mem)))
(Zero [32] destptr mem) ->
(MOVQstore (ADDQconst [24] destptr) (MOVQconst [0])
(MOVQstore (ADDQconst [16] destptr) (MOVQconst [0])
(MOVQstore (ADDQconst [8] destptr) (MOVQconst [0])
(MOVQstore destptr (MOVQconst [0]) mem))))
// Medium zeroing uses a duff device.
(Zero [size] destptr mem) && size <= 1024 && size%8 == 0 ->
(DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVQconst [0]) mem)
// Large zeroing uses REP STOSQ.
(Zero [size] destptr mem) && size > 1024 && size%8 == 0 ->
(REPSTOSQ destptr (MOVQconst [size/8]) (MOVQconst [0]) mem)
// Absorb InvertFlags into branches.
(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
......
......@@ -117,9 +117,8 @@ func init() {
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
gpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
fp01 = regInfo{inputs: []regMask{}, outputs: fponly}
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
......@@ -167,14 +166,14 @@ func init() {
{name: "MOVSDstoreidx8", reg: fpstoreidx, asm: "MOVSD"}, // fp64 indexed by 8i store
// binary ops
{name: "ADDQ", reg: gp21, asm: "ADDQ"}, // arg0 + arg1
{name: "ADDL", reg: gp21, asm: "ADDL"}, // arg0 + arg1
{name: "ADDW", reg: gp21, asm: "ADDW"}, // arg0 + arg1
{name: "ADDB", reg: gp21, asm: "ADDB"}, // arg0 + arg1
{name: "ADDQconst", reg: gp11, asm: "ADDQ"}, // arg0 + auxint
{name: "ADDLconst", reg: gp11, asm: "ADDL"}, // arg0 + auxint
{name: "ADDWconst", reg: gp11, asm: "ADDW"}, // arg0 + auxint
{name: "ADDBconst", reg: gp11, asm: "ADDB"}, // arg0 + auxint
{name: "ADDQ", reg: gp21, asm: "ADDQ"}, // arg0 + arg1
{name: "ADDL", reg: gp21, asm: "ADDL"}, // arg0 + arg1
{name: "ADDW", reg: gp21, asm: "ADDW"}, // arg0 + arg1
{name: "ADDB", reg: gp21, asm: "ADDB"}, // arg0 + arg1
{name: "ADDQconst", reg: gp11, asm: "ADDQ", typ: "UInt64"}, // arg0 + auxint
{name: "ADDLconst", reg: gp11, asm: "ADDL"}, // arg0 + auxint
{name: "ADDWconst", reg: gp11, asm: "ADDW"}, // arg0 + auxint
{name: "ADDBconst", reg: gp11, asm: "ADDB"}, // arg0 + auxint
{name: "SUBQ", reg: gp21, asm: "SUBQ"}, // arg0 - arg1
{name: "SUBL", reg: gp21, asm: "SUBL"}, // arg0 - arg1
......@@ -343,10 +342,10 @@ func init() {
// clobbers flags as liblink will rewrite these to XOR reg, reg if the constant is zero
// TODO: revisit when issue 12405 is fixed
{name: "MOVBconst", reg: gp01flags, asm: "MOVB"}, // 8 low bits of auxint
{name: "MOVWconst", reg: gp01flags, asm: "MOVW"}, // 16 low bits of auxint
{name: "MOVLconst", reg: gp01flags, asm: "MOVL"}, // 32 low bits of auxint
{name: "MOVQconst", reg: gp01flags, asm: "MOVQ"}, // auxint
{name: "MOVBconst", reg: gp01flags, asm: "MOVB", typ: "UInt8"}, // 8 low bits of auxint
{name: "MOVWconst", reg: gp01flags, asm: "MOVW", typ: "UInt16"}, // 16 low bits of auxint
{name: "MOVLconst", reg: gp01flags, asm: "MOVL", typ: "UInt32"}, // 32 low bits of auxint
{name: "MOVQconst", reg: gp01flags, asm: "MOVQ", typ: "UInt64"}, // auxint
{name: "CVTTSD2SL", reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32
{name: "CVTTSD2SQ", reg: fpgp, asm: "CVTTSD2SQ"}, // convert float64 to int64
......@@ -368,24 +367,45 @@ func init() {
{name: "LEAQ8", reg: gp21sb}, // arg0 + 8*arg1 + auxint
// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
{name: "MOVBload", reg: gpload, asm: "MOVB"}, // load byte from arg0+auxint+aux. arg1=mem
{name: "MOVBQSXload", reg: gpload, asm: "MOVBQSX"}, // ditto, extend to int64
{name: "MOVBQZXload", reg: gpload, asm: "MOVBQZX"}, // ditto, extend to uint64
{name: "MOVWload", reg: gpload, asm: "MOVW"}, // load 2 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVLload", reg: gpload, asm: "MOVL"}, // load 4 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVQload", reg: gpload, asm: "MOVQ"}, // load 8 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVQloadidx8", reg: gploadidx, asm: "MOVQ"}, // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
{name: "MOVBstore", reg: gpstore, asm: "MOVB"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVWstore", reg: gpstore, asm: "MOVW"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVLstore", reg: gpstore, asm: "MOVL"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVQstore", reg: gpstore, asm: "MOVQ"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVQstoreidx8", reg: gpstoreidx, asm: "MOVQ"}, // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
{name: "MOVXzero", reg: gpstoreconst}, // store auxint 0 bytes into arg0 using a series of MOV instructions. arg1=mem.
{name: "REPSTOSQ", reg: regInfo{[]regMask{buildReg("DI"), buildReg("CX")}, buildReg("DI AX CX FLAGS"), nil}}, // store arg1 8-byte words containing zero into arg0 using STOSQ. arg2=mem.
//TODO: set register clobber to everything?
{name: "MOVBload", reg: gpload, asm: "MOVB"}, // load byte from arg0+auxint+aux. arg1=mem
{name: "MOVBQSXload", reg: gpload, asm: "MOVBQSX"}, // ditto, extend to int64
{name: "MOVBQZXload", reg: gpload, asm: "MOVBQZX"}, // ditto, extend to uint64
{name: "MOVWload", reg: gpload, asm: "MOVW"}, // load 2 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVLload", reg: gpload, asm: "MOVL"}, // load 4 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVQload", reg: gpload, asm: "MOVQ"}, // load 8 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVQloadidx8", reg: gploadidx, asm: "MOVQ"}, // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
{name: "MOVBstore", reg: gpstore, asm: "MOVB", typ: "Mem"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVWstore", reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVLstore", reg: gpstore, asm: "MOVL", typ: "Mem"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVQstore", reg: gpstore, asm: "MOVQ", typ: "Mem"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVQstoreidx8", reg: gpstoreidx, asm: "MOVQ"}, // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
// arg0 = (duff-adjusted) pointer to start of memory to zero
// arg1 = value to store (will always be zero)
// arg2 = mem
// auxint = offset into duffzero code to start executing
// returns mem
{
name: "DUFFZERO",
reg: regInfo{
inputs: []regMask{buildReg("DI"), buildReg("AX")},
clobbers: buildReg("DI FLAGS"),
},
},
// arg0 = address of memory to zero
// arg1 = # of 8-byte words to zero
// arg2 = value to store (will always be zero)
// arg3 = mem
// returns mem
{
name: "REPSTOSQ",
reg: regInfo{
inputs: []regMask{buildReg("DI"), buildReg("CX"), buildReg("AX")},
clobbers: buildReg("DI CX FLAGS"),
},
},
{name: "CALLstatic", reg: regInfo{clobbers: callerSave}}, // call static function aux.(*gc.Sym). arg0=mem, auxint=argsize, returns mem
{name: "CALLclosure", reg: regInfo{[]regMask{gpsp, buildReg("DX"), 0}, callerSave, nil}}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
{name: "CALLdefer", reg: regInfo{clobbers: callerSave}}, // call deferproc. arg0=mem, auxint=argsize, returns mem
......
......@@ -260,7 +260,7 @@ const (
OpAMD64MOVLstore
OpAMD64MOVQstore
OpAMD64MOVQstoreidx8
OpAMD64MOVXzero
OpAMD64DUFFZERO
OpAMD64REPSTOSQ
OpAMD64CALLstatic
OpAMD64CALLclosure
......@@ -3034,11 +3034,13 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVXzero",
name: "DUFFZERO",
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
{0, 128}, // .DI
{1, 1}, // .AX
},
clobbers: 8589934720, // .DI .FLAGS
},
},
{
......@@ -3047,8 +3049,9 @@ var opcodeTable = [...]opInfo{
inputs: []inputInfo{
{0, 128}, // .DI
{1, 2}, // .CX
{2, 1}, // .AX
},
clobbers: 8589934723, // .AX .CX .DI .FLAGS
clobbers: 8589934722, // .CX .DI .FLAGS
},
},
{
......
......@@ -178,3 +178,52 @@ func b2i(b bool) int64 {
func f2i(f float64) int64 {
return int64(math.Float64bits(f))
}
// DUFFZERO consists of repeated blocks of 4 MOVs + ADD,
// with 4 STOSQs at the very end.
// The trailing STOSQs prevent the need for a DI preadjustment
// for small numbers of words to clear.
// See runtime/mkduff.go.
const (
dzBlocks = 31 // number of MOV/ADD blocks
dzBlockLen = 4 // number of clears per block
dzBlockSize = 19 // size of instructions in a single block
dzMovSize = 4 // size of single MOV instruction w/ offset
dzAddSize = 4 // size of single ADD instruction
dzDIStep = 8 // number of bytes cleared by each MOV instruction
dzTailLen = 4 // number of final STOSQ instructions
dzTailSize = 2 // size of single STOSQ instruction
dzSize = dzBlocks*dzBlockSize + dzTailLen*dzTailSize // total size of DUFFZERO routine
)
func duffStart(size int64) int64 {
x, _ := duff(size)
return x
}
func duffAdj(size int64) int64 {
_, x := duff(size)
return x
}
// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
// required to use the duffzero mechanism for a block of the given size.
func duff(size int64) (int64, int64) {
if size < 32 || size > 1024 || size%8 != 0 {
panic("bad duffzero size")
}
// TODO: arch-dependent
off := int64(dzSize)
off -= dzTailLen * dzTailSize
size -= dzTailLen * dzDIStep
q := size / dzDIStep
blocks, singles := q/dzBlockLen, q%dzBlockLen
off -= dzBlockSize * blocks
var adj int64
if singles > 0 {
off -= dzAddSize + dzMovSize*singles
adj -= dzDIStep * (dzBlockLen - singles)
}
return off, adj
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment