Commit 00263a89 authored by David Chase's avatar David Chase

cmd/compile: reduce debugger-worsening line number churn

Reuse block head or preceding instruction's line number for
register allocator's spill, fill, copy, rematerialization
instructionsl; and also for phi, and for no-src-pos
instructions.  Assembler creates same line number tables
for copy-predecessor-line and for no-src-pos,
but copy-predecessor produces better-looking assembly
language output with -S and with GOSSAFUNC, and does not
require changes to tests of existing assembly language.

Split "copyInto" into two cases, one for register allocation,
one for otherwise.  This caused the test score line change
count to increase by one, which may reflect legitimately
useful information preserved.  Without any special treatment
for copyInto, the change count increases by 21 more, from
51 to 72 (i.e., quite a lot).

There is a test; using two naive "scores" for line number
churn, the old numbering is 2x or 4x worse.

Fixes #18902.

Change-Id: I0a0a69659d30ee4e5d10116a0dd2b8c5df8457b1
Reviewed-on: https://go-review.googlesource.com/36207
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 1df777f6
...@@ -4346,6 +4346,29 @@ func (s *SSAGenState) SetPos(pos src.XPos) { ...@@ -4346,6 +4346,29 @@ func (s *SSAGenState) SetPos(pos src.XPos) {
s.pp.pos = pos s.pp.pos = pos
} }
// DebugFriendlySetPos sets the position subject to heuristics
// that reduce "jumpy" line number churn when debugging.
// Spill/fill/copy instructions from the register allocator,
// phi functions, and instructions with a no-pos position
// are examples of instructions that can cause churn.
func (s *SSAGenState) DebugFriendlySetPosFrom(v *ssa.Value) {
// The two choices here are either to leave lineno unchanged,
// or to explicitly set it to src.NoXPos. Leaving it unchanged
// (reusing the preceding line number) produces slightly better-
// looking assembly language output from the compiler, and is
// expected by some already-existing tests.
// The debug information appears to be the same in either case
switch v.Op {
case ssa.OpPhi, ssa.OpCopy, ssa.OpLoadReg, ssa.OpStoreReg:
// leave the position unchanged from beginning of block
// or previous line number.
default:
if v.Pos != src.NoXPos {
s.SetPos(v.Pos)
}
}
}
// genssa appends entries to pp for each instruction in f. // genssa appends entries to pp for each instruction in f.
func genssa(f *ssa.Func, pp *Progs) { func genssa(f *ssa.Func, pp *Progs) {
var s SSAGenState var s SSAGenState
...@@ -4381,8 +4404,7 @@ func genssa(f *ssa.Func, pp *Progs) { ...@@ -4381,8 +4404,7 @@ func genssa(f *ssa.Func, pp *Progs) {
thearch.SSAMarkMoves(&s, b) thearch.SSAMarkMoves(&s, b)
for _, v := range b.Values { for _, v := range b.Values {
x := s.pp.next x := s.pp.next
s.SetPos(v.Pos) s.DebugFriendlySetPosFrom(v)
switch v.Op { switch v.Op {
case ssa.OpInitMem: case ssa.OpInitMem:
// memory arg needs no code // memory arg needs no code
......
...@@ -468,7 +468,7 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos ...@@ -468,7 +468,7 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c) c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
} else if v.rematerializeable() { } else if v.rematerializeable() {
// Rematerialize instead of loading from the spill location. // Rematerialize instead of loading from the spill location.
c = v.copyInto(s.curBlock) c = v.copyIntoNoXPos(s.curBlock)
} else { } else {
// Load v from its spill location. // Load v from its spill location.
spill := s.makeSpill(v, s.curBlock) spill := s.makeSpill(v, s.curBlock)
...@@ -1949,13 +1949,13 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP ...@@ -1949,13 +1949,13 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString()) e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
} }
if dstReg { if dstReg {
x = v.copyInto(e.p) x = v.copyIntoNoXPos(e.p)
} else { } else {
// Rematerialize into stack slot. Need a free // Rematerialize into stack slot. Need a free
// register to accomplish this. // register to accomplish this.
e.erase(loc) // see pre-clobber comment below e.erase(loc) // see pre-clobber comment below
r := e.findRegFor(v.Type) r := e.findRegFor(v.Type)
x = v.copyInto(e.p) x = v.copyIntoNoXPos(e.p)
e.set(r, vid, x, false, pos) e.set(r, vid, x, false, pos)
// Make sure we spill with the size of the slot, not the // Make sure we spill with the size of the slot, not the
// size of x (which might be wider due to our dropping // size of x (which might be wider due to our dropping
......
...@@ -212,7 +212,21 @@ func (v *Value) reset(op Op) { ...@@ -212,7 +212,21 @@ func (v *Value) reset(op Op) {
// copyInto makes a new value identical to v and adds it to the end of b. // copyInto makes a new value identical to v and adds it to the end of b.
func (v *Value) copyInto(b *Block) *Value { func (v *Value) copyInto(b *Block) *Value {
c := b.NewValue0(v.Pos, v.Op, v.Type) c := b.NewValue0(v.Pos, v.Op, v.Type) // Lose the position, this causes line number churn otherwise.
c.Aux = v.Aux
c.AuxInt = v.AuxInt
c.AddArgs(v.Args...)
for _, a := range v.Args {
if a.Type.IsMemory() {
v.Fatalf("can't move a value with a memory arg %s", v.LongString())
}
}
return c
}
// copyInto makes a new value identical to v and adds it to the end of b.
func (v *Value) copyIntoNoXPos(b *Block) *Value {
c := b.NewValue0(src.NoXPos, v.Op, v.Type) // Lose the position, this causes line number churn otherwise.
c.Aux = v.Aux c.Aux = v.Aux
c.AuxInt = v.AuxInt c.AuxInt = v.AuxInt
c.AddArgs(v.Args...) c.AddArgs(v.Args...)
......
// run
// +build !nacl
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Runs a build -S to capture the assembly language
// output, checks that the line numbers associated with
// the stream of instructions do not change "too much".
// The changes that fixes this (that reduces the amount
// of change) does so by treating register spill, reload,
// copy, and rematerializations as being "unimportant" and
// just assigns them the line numbers of whatever "real"
// instructions preceded them.
// nacl is excluded because this runs a compiler.
package main
import (
"bufio"
"bytes"
"fmt"
"os"
"os/exec"
"strconv"
"strings"
)
// updateEnv modifies env to ensure that key=val
func updateEnv(env *[]string, key, val string) {
if val != "" {
var found bool
key = key + "="
for i, kv := range *env {
if strings.HasPrefix(kv, key) {
(*env)[i] = key + val
found = true
break
}
}
if !found {
*env = append(*env, key+val)
}
}
}
func main() {
testarch := os.Getenv("TESTARCH") // Targets other platform in test compilation.
debug := os.Getenv("TESTDEBUG") != "" // Output the relevant assembly language.
cmd := exec.Command("go", "build", "-gcflags", "-S", "fixedbugs/issue18902b.go")
var buf bytes.Buffer
cmd.Stdout = &buf
cmd.Stderr = &buf
cmd.Env = os.Environ()
updateEnv(&cmd.Env, "GOARCH", testarch)
err := cmd.Run()
if err != nil {
fmt.Printf("%s\n%s", err, buf.Bytes())
return
}
begin := "\"\".(*gcSortBuf).flush" // Text at beginning of relevant dissassembly.
s := buf.String()
i := strings.Index(s, begin)
if i < 0 {
fmt.Printf("Failed to find expected symbol %s in output\n%s\n", begin, s)
return
}
s = s[i:]
r := strings.NewReader(s)
scanner := bufio.NewScanner(r)
first := true // The first line after the begin text will be skipped
beforeLineNumber := "issue18902b.go:" // Text preceding line number in each line.
lbln := len(beforeLineNumber)
var scannedCount, changes, sumdiffs float64
prevVal := 0
for scanner.Scan() {
line := scanner.Text()
if first {
first = false
continue
}
i = strings.Index(line, beforeLineNumber)
if i < 0 {
// Done reading lines
if scannedCount < 200 { // When test was written, 251 lines observed on amd64
fmt.Printf("Scanned only %d lines, was expecting more than 200", scannedCount)
return
}
// Note: when test was written, before changes=92, after=50 (was 62 w/o rematerialization NoXPos in *Value.copyInto())
// and before sumdiffs=784, after=180 (was 446 w/o rematerialization NoXPos in *Value.copyInto())
// Set the dividing line between pass and fail at the midpoint.
// Normalize against instruction count in case we unroll loops, etc.
if changes/scannedCount >= (50+92)/(2*scannedCount) || sumdiffs/scannedCount >= (180+784)/(2*scannedCount) {
fmt.Printf("Line numbers change too much, # of changes=%.f, sumdiffs=%.f, # of instructions=%.f\n", changes, sumdiffs, scannedCount)
}
return
}
scannedCount++
i += lbln
lineVal, err := strconv.Atoi(line[i : i+3])
if err != nil {
fmt.Printf("Expected 3-digit line number after %s in %s\n", beforeLineNumber, line)
}
if prevVal == 0 {
prevVal = lineVal
}
diff := lineVal - prevVal
if diff < 0 {
diff = -diff
}
if diff != 0 {
changes++
sumdiffs += float64(diff)
}
// If things change too much, set environment variable TESTDEBUG to help figure out what's up.
// The "before" behavior can be recreated in DebugFriendlySetPosFrom (currently in gc/ssa.go)
// by inserting unconditional
// s.SetPos(v.Pos)
// at the top of the function.
if debug {
fmt.Printf("%d %.f %.f %s\n", lineVal, changes, sumdiffs, line)
}
prevVal = lineVal
}
if err := scanner.Err(); err != nil {
fmt.Println("Reading standard input:", err)
return
}
}
// skip
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package foo
import (
"unsafe"
)
type gcMaxTreeNodeVal uint64
var work struct {
full uint64 // lock-free list of full blocks workbuf
empty uint64 // lock-free list of empty blocks workbuf
pad0 [64]uint8 // prevents false-sharing between full/empty and nproc/nwait
bytesMarked uint64
markrootNext uint32 // next markroot job
markrootJobs uint32 // number of markroot jobs
nproc uint32
tstart int64
nwait uint32
ndone uint32
}
type gcShardQueue1 struct {
partial *workbuf
full *workbuf
n uintptr
maxTree gcMaxTreeNodeVal
}
type gcShardQueue struct {
gcShardQueue1
pad [64 - unsafe.Sizeof(gcShardQueue1{})]byte
}
const gcSortBufPointers = (64 << 10) / 8
type gcSortBuf struct {
buf *gcSortArray
tmp *gcSortArray
n uintptr
}
//go:notinheap
type gcSortArray [gcSortBufPointers]uintptr
const (
_DebugGC = 0
_ConcurrentSweep = true
_FinBlockSize = 4 * 1024
sweepMinHeapDistance = 1024 * 1024
gcShardShift = 2 + 20
gcShardBytes = 1 << gcShardShift
)
//go:notinheap
type mheap struct {
shardQueues []gcShardQueue
_ uint32 // align uint64 fields on 32-bit for atomics
pagesInUse uint64 // pages of spans in stats _MSpanInUse; R/W with mheap.lock
spanBytesAlloc uint64 // bytes of spans allocated this cycle; updated atomically
pagesSwept uint64 // pages swept this cycle; updated atomically
sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
largefree uint64 // bytes freed for large objects (>maxsmallsize)
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
nsmallfree [67]uint64 // number of frees for small objects (<=maxsmallsize)
bitmap uintptr // Points to one byte past the end of the bitmap
bitmap_mapped uintptr
arena_start uintptr
arena_used uintptr // always mHeap_Map{Bits,Spans} before updating
arena_end uintptr
arena_reserved bool
}
var mheap_ mheap
type lfnode struct {
next uint64
pushcnt uintptr
}
type workbufhdr struct {
node lfnode // must be first
next *workbuf
nobj int
}
//go:notinheap
type workbuf struct {
workbufhdr
obj [(2048 - unsafe.Sizeof(workbufhdr{})) / 8]uintptr
}
//go:noinline
func (b *workbuf) checkempty() {
if b.nobj != 0 {
b.nobj = 0
}
}
func putempty(b *workbuf) {
b.checkempty()
lfstackpush(&work.empty, &b.node)
}
//go:noinline
func lfstackpush(head *uint64, node *lfnode) {
}
//go:noinline
func (q *gcShardQueue) add(qidx uintptr, ptrs []uintptr, spare *workbuf) *workbuf {
return spare
}
func (b *gcSortBuf) flush() {
if b.n == 0 {
return
}
const sortDigitBits = 11
buf, tmp := b.buf[:b.n], b.tmp[:b.n]
moreBits := true
for shift := uint(gcShardShift); moreBits; shift += sortDigitBits {
const k = 1 << sortDigitBits
var pos [k]uint16
nshift := shift + sortDigitBits
nbits := buf[0] >> nshift
moreBits = false
for _, v := range buf {
pos[(v>>shift)%k]++
moreBits = moreBits || v>>nshift != nbits
}
var sum uint16
for i, count := range &pos {
pos[i] = sum
sum += count
}
for _, v := range buf {
digit := (v >> shift) % k
tmp[pos[digit]] = v
pos[digit]++
}
buf, tmp = tmp, buf
}
start := mheap_.arena_start
i0 := 0
shard0 := (buf[0] - start) / gcShardBytes
var spare *workbuf
for i, p := range buf {
shard := (p - start) / gcShardBytes
if shard != shard0 {
spare = mheap_.shardQueues[shard0].add(shard0, buf[i0:i], spare)
i0, shard0 = i, shard
}
}
spare = mheap_.shardQueues[shard0].add(shard0, buf[i0:], spare)
b.n = 0
if spare != nil {
putempty(spare)
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment