Commit a4650a21 authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

cmd/compile: avoid write barrier in append fast path

When we are writing the result of an append back
to the same slice, we don’t need a write barrier
on the fast path.

This re-implements an optimization that was present
in the old backend.

Updates #14921
Fixes #14969

Sample code:

var x []byte

func p() {
	x = append(x, 1, 2, 3)
}

Before:

"".p t=1 size=224 args=0x0 locals=0x48
	0x0000 00000 (append.go:21)	TEXT	"".p(SB), $72-0
	0x0000 00000 (append.go:21)	MOVQ	(TLS), CX
	0x0009 00009 (append.go:21)	CMPQ	SP, 16(CX)
	0x000d 00013 (append.go:21)	JLS	199
	0x0013 00019 (append.go:21)	SUBQ	$72, SP
	0x0017 00023 (append.go:21)	FUNCDATA	$0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0017 00023 (append.go:21)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0017 00023 (append.go:19)	MOVQ	"".x+16(SB), CX
	0x001e 00030 (append.go:19)	MOVQ	"".x(SB), DX
	0x0025 00037 (append.go:19)	MOVQ	"".x+8(SB), BX
	0x002c 00044 (append.go:19)	MOVQ	BX, "".autotmp_0+64(SP)
	0x0031 00049 (append.go:22)	LEAQ	3(BX), BP
	0x0035 00053 (append.go:22)	CMPQ	BP, CX
	0x0038 00056 (append.go:22)	JGT	$0, 131
	0x003a 00058 (append.go:22)	MOVB	$1, (DX)(BX*1)
	0x003e 00062 (append.go:22)	MOVB	$2, 1(DX)(BX*1)
	0x0043 00067 (append.go:22)	MOVB	$3, 2(DX)(BX*1)
	0x0048 00072 (append.go:22)	MOVQ	BP, "".x+8(SB)
	0x004f 00079 (append.go:22)	MOVQ	CX, "".x+16(SB)
	0x0056 00086 (append.go:22)	MOVL	runtime.writeBarrier(SB), AX
	0x005c 00092 (append.go:22)	TESTB	AL, AL
	0x005e 00094 (append.go:22)	JNE	$0, 108
	0x0060 00096 (append.go:22)	MOVQ	DX, "".x(SB)
	0x0067 00103 (append.go:23)	ADDQ	$72, SP
	0x006b 00107 (append.go:23)	RET
	0x006c 00108 (append.go:22)	LEAQ	"".x(SB), CX
	0x0073 00115 (append.go:22)	MOVQ	CX, (SP)
	0x0077 00119 (append.go:22)	MOVQ	DX, 8(SP)
	0x007c 00124 (append.go:22)	PCDATA	$0, $0
	0x007c 00124 (append.go:22)	CALL	runtime.writebarrierptr(SB)
	0x0081 00129 (append.go:23)	JMP	103
	0x0083 00131 (append.go:22)	LEAQ	type.[]uint8(SB), AX
	0x008a 00138 (append.go:22)	MOVQ	AX, (SP)
	0x008e 00142 (append.go:22)	MOVQ	DX, 8(SP)
	0x0093 00147 (append.go:22)	MOVQ	BX, 16(SP)
	0x0098 00152 (append.go:22)	MOVQ	CX, 24(SP)
	0x009d 00157 (append.go:22)	MOVQ	BP, 32(SP)
	0x00a2 00162 (append.go:22)	PCDATA	$0, $0
	0x00a2 00162 (append.go:22)	CALL	runtime.growslice(SB)
	0x00a7 00167 (append.go:22)	MOVQ	40(SP), DX
	0x00ac 00172 (append.go:22)	MOVQ	48(SP), AX
	0x00b1 00177 (append.go:22)	MOVQ	56(SP), CX
	0x00b6 00182 (append.go:22)	ADDQ	$3, AX
	0x00ba 00186 (append.go:19)	MOVQ	"".autotmp_0+64(SP), BX
	0x00bf 00191 (append.go:22)	MOVQ	AX, BP
	0x00c2 00194 (append.go:22)	JMP	58
	0x00c7 00199 (append.go:22)	NOP
	0x00c7 00199 (append.go:21)	CALL	runtime.morestack_noctxt(SB)
	0x00cc 00204 (append.go:21)	JMP	0

After:

"".p t=1 size=208 args=0x0 locals=0x48
	0x0000 00000 (append.go:21)	TEXT	"".p(SB), $72-0
	0x0000 00000 (append.go:21)	MOVQ	(TLS), CX
	0x0009 00009 (append.go:21)	CMPQ	SP, 16(CX)
	0x000d 00013 (append.go:21)	JLS	191
	0x0013 00019 (append.go:21)	SUBQ	$72, SP
	0x0017 00023 (append.go:21)	FUNCDATA	$0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0017 00023 (append.go:21)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0017 00023 (append.go:19)	MOVQ	"".x+16(SB), CX
	0x001e 00030 (append.go:19)	MOVQ	"".x+8(SB), DX
	0x0025 00037 (append.go:19)	MOVQ	DX, "".autotmp_0+64(SP)
	0x002a 00042 (append.go:19)	MOVQ	"".x(SB), BX
	0x0031 00049 (append.go:22)	LEAQ	3(DX), BP
	0x0035 00053 (append.go:22)	MOVQ	BP, "".x+8(SB)
	0x003c 00060 (append.go:22)	CMPQ	BP, CX
	0x003f 00063 (append.go:22)	JGT	$0, 84
	0x0041 00065 (append.go:22)	MOVB	$1, (BX)(DX*1)
	0x0045 00069 (append.go:22)	MOVB	$2, 1(BX)(DX*1)
	0x004a 00074 (append.go:22)	MOVB	$3, 2(BX)(DX*1)
	0x004f 00079 (append.go:23)	ADDQ	$72, SP
	0x0053 00083 (append.go:23)	RET
	0x0054 00084 (append.go:22)	LEAQ	type.[]uint8(SB), AX
	0x005b 00091 (append.go:22)	MOVQ	AX, (SP)
	0x005f 00095 (append.go:22)	MOVQ	BX, 8(SP)
	0x0064 00100 (append.go:22)	MOVQ	DX, 16(SP)
	0x0069 00105 (append.go:22)	MOVQ	CX, 24(SP)
	0x006e 00110 (append.go:22)	MOVQ	BP, 32(SP)
	0x0073 00115 (append.go:22)	PCDATA	$0, $0
	0x0073 00115 (append.go:22)	CALL	runtime.growslice(SB)
	0x0078 00120 (append.go:22)	MOVQ	40(SP), CX
	0x007d 00125 (append.go:22)	MOVQ	56(SP), AX
	0x0082 00130 (append.go:22)	MOVQ	AX, "".x+16(SB)
	0x0089 00137 (append.go:22)	MOVL	runtime.writeBarrier(SB), AX
	0x008f 00143 (append.go:22)	TESTB	AL, AL
	0x0091 00145 (append.go:22)	JNE	$0, 168
	0x0093 00147 (append.go:22)	MOVQ	CX, "".x(SB)
	0x009a 00154 (append.go:22)	MOVQ	"".x(SB), BX
	0x00a1 00161 (append.go:19)	MOVQ	"".autotmp_0+64(SP), DX
	0x00a6 00166 (append.go:22)	JMP	65
	0x00a8 00168 (append.go:22)	LEAQ	"".x(SB), DX
	0x00af 00175 (append.go:22)	MOVQ	DX, (SP)
	0x00b3 00179 (append.go:22)	MOVQ	CX, 8(SP)
	0x00b8 00184 (append.go:22)	PCDATA	$0, $0
	0x00b8 00184 (append.go:22)	CALL	runtime.writebarrierptr(SB)
	0x00bd 00189 (append.go:22)	JMP	154
	0x00bf 00191 (append.go:22)	NOP
	0x00bf 00191 (append.go:21)	CALL	runtime.morestack_noctxt(SB)
	0x00c4 00196 (append.go:21)	JMP	0

Change-Id: I77a41ad3a22557a4bb4654de7d6d24a029efe34a
Reviewed-on: https://go-review.googlesource.com/21813
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 32efa16c
......@@ -683,14 +683,27 @@ func (s *state) stmt(n *Node) {
// Evaluate RHS.
rhs := n.Right
if rhs != nil && (rhs.Op == OSTRUCTLIT || rhs.Op == OARRAYLIT) {
// All literals with nonzero fields have already been
// rewritten during walk. Any that remain are just T{}
// or equivalents. Use the zero value.
if !iszero(rhs) {
Fatalf("literal with nonzero value in SSA: %v", rhs)
if rhs != nil {
switch rhs.Op {
case OSTRUCTLIT, OARRAYLIT:
// All literals with nonzero fields have already been
// rewritten during walk. Any that remain are just T{}
// or equivalents. Use the zero value.
if !iszero(rhs) {
Fatalf("literal with nonzero value in SSA: %v", rhs)
}
rhs = nil
case OAPPEND:
// If we're writing the result of an append back to the same slice,
// handle it specially to avoid write barriers on the fast (non-growth) path.
// If the slice can be SSA'd, it'll be on the stack,
// so there will be no write barriers,
// so there's no need to attempt to prevent them.
if samesafeexpr(n.Left, rhs.List.First()) && !s.canSSA(n.Left) {
s.append(rhs, true)
return
}
}
rhs = nil
}
var r *ssa.Value
needwb := n.Op == OASWB && rhs != nil
......@@ -709,11 +722,11 @@ func (s *state) stmt(n *Node) {
}
}
if rhs != nil && rhs.Op == OAPPEND {
// Yuck! The frontend gets rid of the write barrier, but we need it!
// At least, we need it in the case where growslice is called.
// TODO: Do the write barrier on just the growslice branch.
// The frontend gets rid of the write barrier to enable the special OAPPEND
// handling above, but since this is not a special case, we need it.
// TODO: just add a ptr graying to the end of growslice?
// TODO: check whether we need to do this for ODOTTYPE and ORECV also.
// TODO: check whether we need to provide special handling and a write barrier
// for ODOTTYPE and ORECV also.
// They get similar wb-removal treatment in walk.go:OAS.
needwb = true
}
......@@ -2079,7 +2092,7 @@ func (s *state) expr(n *Node) *ssa.Value {
return s.newValue1(ssa.OpGetG, n.Type, s.mem())
case OAPPEND:
return s.exprAppend(n)
return s.append(n, false)
default:
s.Unimplementedf("unhandled expr %s", opnames[n.Op])
......@@ -2087,25 +2100,57 @@ func (s *state) expr(n *Node) *ssa.Value {
}
}
// exprAppend converts an OAPPEND node n to an ssa.Value, adds it to s, and returns the Value.
func (s *state) exprAppend(n *Node) *ssa.Value {
// append(s, e1, e2, e3). Compile like:
// append converts an OAPPEND node to SSA.
// If inplace is false, it converts the OAPPEND expression n to an ssa.Value,
// adds it to s, and returns the Value.
// If inplace is true, it writes the result of the OAPPEND expression n
// back to the slice being appended to, and returns nil.
// inplace MUST be set to false if the slice can be SSA'd.
func (s *state) append(n *Node, inplace bool) *ssa.Value {
// If inplace is false, process as expression "append(s, e1, e2, e3)":
//
// ptr, len, cap := s
// newlen := len + 3
// if newlen > s.cap {
// if newlen > cap {
// ptr, len, cap = growslice(s, newlen)
// newlen = len + 3 // recalculate to avoid a spill
// }
// // with write barriers, if needed:
// *(ptr+len) = e1
// *(ptr+len+1) = e2
// *(ptr+len+2) = e3
// return makeslice(ptr, newlen, cap)
//
//
// If inplace is true, process as statement "s = append(s, e1, e2, e3)":
//
// a := &s
// ptr, len, cap := s
// newlen := len + 3
// *a.len = newlen // store newlen immediately to avoid a spill
// if newlen > cap {
// newptr, _, newcap = growslice(ptr, len, cap, newlen)
// *a.cap = newcap // write before ptr to avoid a spill
// *a.ptr = newptr // with write barrier
// }
// // with write barriers, if needed:
// *(ptr+len) = e1
// *(ptr+len+1) = e2
// *(ptr+len+2) = e3
// makeslice(ptr, newlen, cap)
et := n.Type.Elem()
pt := Ptrto(et)
// Evaluate slice
slice := s.expr(n.List.First())
sn := n.List.First() // the slice node is the first in the list
var slice, addr *ssa.Value
if inplace {
addr = s.addr(sn, false)
slice = s.newValue2(ssa.OpLoad, n.Type, addr, s.mem())
} else {
slice = s.expr(sn)
}
// Allocate new blocks
grow := s.f.NewBlock(ssa.BlockPlain)
......@@ -2117,10 +2162,20 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
l := s.newValue1(ssa.OpSliceLen, Types[TINT], slice)
c := s.newValue1(ssa.OpSliceCap, Types[TINT], slice)
nl := s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], l, s.constInt(Types[TINT], nargs))
if inplace {
lenaddr := s.newValue1I(ssa.OpOffPtr, pt, int64(Array_nel), addr)
s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, s.config.IntSize, lenaddr, nl, s.mem())
}
cmp := s.newValue2(s.ssaOp(OGT, Types[TINT]), Types[TBOOL], nl, c)
s.vars[&ptrVar] = p
s.vars[&newlenVar] = nl
s.vars[&capVar] = c
if !inplace {
s.vars[&newlenVar] = nl
s.vars[&capVar] = c
}
b := s.endBlock()
b.Kind = ssa.BlockIf
b.Likely = ssa.BranchUnlikely
......@@ -2134,9 +2189,18 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
r := s.rtcall(growslice, true, []*Type{pt, Types[TINT], Types[TINT]}, taddr, p, l, c, nl)
s.vars[&ptrVar] = r[0]
s.vars[&newlenVar] = s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], r[1], s.constInt(Types[TINT], nargs))
s.vars[&capVar] = r[2]
if inplace {
capaddr := s.newValue1I(ssa.OpOffPtr, pt, int64(Array_cap), addr)
s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, s.config.IntSize, capaddr, r[2], s.mem())
s.insertWBstore(pt, addr, r[0], n.Lineno, 0)
// load the value we just stored to avoid having to spill it
s.vars[&ptrVar] = s.newValue2(ssa.OpLoad, pt, addr, s.mem())
} else {
s.vars[&ptrVar] = r[0]
s.vars[&newlenVar] = s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], r[1], s.constInt(Types[TINT], nargs))
s.vars[&capVar] = r[2]
}
b = s.endBlock()
b.AddEdgeTo(assign)
......@@ -2156,9 +2220,11 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
}
}
p = s.variable(&ptrVar, pt) // generates phi for ptr
nl = s.variable(&newlenVar, Types[TINT]) // generates phi for nl
c = s.variable(&capVar, Types[TINT]) // generates phi for cap
p = s.variable(&ptrVar, pt) // generates phi for ptr
if !inplace {
nl = s.variable(&newlenVar, Types[TINT]) // generates phi for nl
c = s.variable(&capVar, Types[TINT]) // generates phi for cap
}
p2 := s.newValue2(ssa.OpPtrIndex, pt, p, l)
// TODO: just one write barrier call for all of these writes?
// TODO: maybe just one writeBarrier.enabled check?
......@@ -2179,10 +2245,13 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
}
}
// make result
delete(s.vars, &ptrVar)
if inplace {
return nil
}
delete(s.vars, &newlenVar)
delete(s.vars, &capVar)
// make result
return s.newValue3(ssa.OpSliceMake, n.Type, p, nl, c)
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment