Commit f5184d34 authored by Russ Cox's avatar Russ Cox

cmd/gc: correct handling of globals, func args, results

Globals, function arguments, and results are special cases in
registerization.

Globals must be flushed aggressively, because nearly any
operation can cause a panic, and the recovery code must see
the latest values. Globals also must be loaded aggressively,
because nearly any store through a pointer might be updating a
global: the compiler cannot see all the "address of"
operations on globals, especially exported globals. To
accomplish this, mark all globals as having their address
taken, which effectively disables registerization.

If a function contains a defer statement, the function results
must be flushed aggressively, because nearly any operation can
cause a panic, and the deferred code may call recover, causing
the original function to return the current values of its
function results. To accomplish this, mark all function
results as having their address taken if the function contains
any defer statements. This causes not just aggressive flushing
but also aggressive loading. The aggressive loading is
overkill but the best we can do in the current code.

Function arguments must be considered live at all safe points
in a function, because garbage collection always preserves
them: they must be up-to-date in order to be preserved
correctly. Accomplish this by marking them live at all call
sites. An earlier attempt at this marked function arguments as
having their address taken, which disabled registerization
completely, making programs slower. This CL's solution allows
registerization while preserving safety. The benchmark speedup
is caused by being able to registerize again (the earlier CL
lost the same amount).

benchmark                old ns/op     new ns/op     delta
BenchmarkEqualPort32     61.4          56.0          -8.79%

benchmark                old MB/s     new MB/s     speedup
BenchmarkEqualPort32     521.56       570.97       1.09x

Fixes #1304. (again)
Fixes #7944. (again)
Fixes #7984.
Fixes #7995.

LGTM=khr
R=golang-codereviews, khr
CC=golang-codereviews, iant, r
https://golang.org/cl/97500044
parent ec38c6f5
...@@ -56,28 +56,6 @@ rcmp(const void *a1, const void *a2) ...@@ -56,28 +56,6 @@ rcmp(const void *a1, const void *a2)
return p2->varno - p1->varno; return p2->varno - p1->varno;
} }
static void
setvar(Bits *dst, Type **args)
{
Type *t;
Node *n;
Addr a;
Iter save;
Bits bit;
int z;
t = structfirst(&save, args);
while(t != T) {
n = nodarg(t, 1);
a = zprog.from;
naddr(n, &a, 0);
bit = mkvar(R, &a);
for(z=0; z<BITS; z++)
dst->b[z] |= bit.b[z];
t = structnext(&save);
}
}
void void
excise(Flow *r) excise(Flow *r)
{ {
...@@ -192,11 +170,6 @@ regopt(Prog *firstp) ...@@ -192,11 +170,6 @@ regopt(Prog *firstp)
ovar.b[z] = 0; ovar.b[z] = 0;
} }
// build lists of parameters and results
setvar(&ivar, getthis(curfn->type));
setvar(&ivar, getinarg(curfn->type));
setvar(&ovar, getoutarg(curfn->type));
/* /*
* pass 1 * pass 1
* build aux data structure * build aux data structure
...@@ -837,9 +810,6 @@ mkvar(Reg *r, Adr *a) ...@@ -837,9 +810,6 @@ mkvar(Reg *r, Adr *a)
v->nextinnode = node->opt; v->nextinnode = node->opt;
node->opt = v; node->opt = v;
if(debug['R'])
print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
bit = blsh(i); bit = blsh(i);
if(n == D_EXTERN || n == D_STATIC) if(n == D_EXTERN || n == D_STATIC)
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
...@@ -848,6 +818,13 @@ mkvar(Reg *r, Adr *a) ...@@ -848,6 +818,13 @@ mkvar(Reg *r, Adr *a)
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
params.b[z] |= bit.b[z]; params.b[z] |= bit.b[z];
if(node->class == PPARAM)
for(z=0; z<BITS; z++)
ivar.b[z] |= bit.b[z];
if(node->class == PPARAMOUT)
for(z=0; z<BITS; z++)
ovar.b[z] |= bit.b[z];
// Treat values with their address taken as live at calls, // Treat values with their address taken as live at calls,
// because the garbage collector's liveness analysis in ../gc/plive.c does. // because the garbage collector's liveness analysis in ../gc/plive.c does.
// These must be consistent or else we will elide stores and the garbage // These must be consistent or else we will elide stores and the garbage
...@@ -864,7 +841,21 @@ mkvar(Reg *r, Adr *a) ...@@ -864,7 +841,21 @@ mkvar(Reg *r, Adr *a)
// The broader := in a closure problem is mentioned in a comment in // The broader := in a closure problem is mentioned in a comment in
// closure.c:/^typecheckclosure and dcl.c:/^oldname. // closure.c:/^typecheckclosure and dcl.c:/^oldname.
if(node->addrtaken) if(node->addrtaken)
setaddrs(bit); v->addr = 1;
// Disable registerization for globals, because:
// (1) we might panic at any time and we want the recovery code
// to see the latest values (issue 1304).
// (2) we don't know what pointers might point at them and we want
// loads via those pointers to see updated values and vice versa (issue 7995).
//
// Disable registerization for results if using defer, because the deferred func
// might recover and return, causing the current values to be used.
if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
v->addr = 1;
if(debug['R'])
print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
return bit; return bit;
...@@ -961,17 +952,6 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -961,17 +952,6 @@ prop(Reg *r, Bits ref, Bits cal)
ref.b[z] = 0; ref.b[z] = 0;
} }
break; break;
default:
// Work around for issue 1304:
// flush modified globals before each instruction.
for(z=0; z<BITS; z++) {
cal.b[z] |= externs.b[z];
// issue 4066: flush modified return variables in case of panic
if(hasdefer)
cal.b[z] |= ovar.b[z];
}
break;
} }
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
......
...@@ -54,28 +54,6 @@ rcmp(const void *a1, const void *a2) ...@@ -54,28 +54,6 @@ rcmp(const void *a1, const void *a2)
return p2->varno - p1->varno; return p2->varno - p1->varno;
} }
static void
setvar(Bits *dst, Type **args)
{
Type *t;
Node *n;
Addr a;
Iter save;
Bits bit;
int z;
t = structfirst(&save, args);
while(t != T) {
n = nodarg(t, 1);
a = zprog.from;
naddr(n, &a, 0);
bit = mkvar(R, &a);
for(z=0; z<BITS; z++)
dst->b[z] |= bit.b[z];
t = structnext(&save);
}
}
static void static void
setaddrs(Bits bit) setaddrs(Bits bit)
{ {
...@@ -178,11 +156,6 @@ regopt(Prog *firstp) ...@@ -178,11 +156,6 @@ regopt(Prog *firstp)
ovar.b[z] = 0; ovar.b[z] = 0;
} }
// build lists of parameters and results
setvar(&ivar, getthis(curfn->type));
setvar(&ivar, getinarg(curfn->type));
setvar(&ovar, getoutarg(curfn->type));
/* /*
* pass 1 * pass 1
* build aux data structure * build aux data structure
...@@ -690,11 +663,6 @@ mkvar(Reg *r, Adr *a) ...@@ -690,11 +663,6 @@ mkvar(Reg *r, Adr *a)
v->nextinnode = node->opt; v->nextinnode = node->opt;
node->opt = v; node->opt = v;
if(debug['R'])
print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
ostats.nvar++;
bit = blsh(i); bit = blsh(i);
if(n == D_EXTERN || n == D_STATIC) if(n == D_EXTERN || n == D_STATIC)
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
...@@ -703,6 +671,13 @@ mkvar(Reg *r, Adr *a) ...@@ -703,6 +671,13 @@ mkvar(Reg *r, Adr *a)
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
params.b[z] |= bit.b[z]; params.b[z] |= bit.b[z];
if(node->class == PPARAM)
for(z=0; z<BITS; z++)
ivar.b[z] |= bit.b[z];
if(node->class == PPARAMOUT)
for(z=0; z<BITS; z++)
ovar.b[z] |= bit.b[z];
// Treat values with their address taken as live at calls, // Treat values with their address taken as live at calls,
// because the garbage collector's liveness analysis in ../gc/plive.c does. // because the garbage collector's liveness analysis in ../gc/plive.c does.
// These must be consistent or else we will elide stores and the garbage // These must be consistent or else we will elide stores and the garbage
...@@ -719,7 +694,22 @@ mkvar(Reg *r, Adr *a) ...@@ -719,7 +694,22 @@ mkvar(Reg *r, Adr *a)
// The broader := in a closure problem is mentioned in a comment in // The broader := in a closure problem is mentioned in a comment in
// closure.c:/^typecheckclosure and dcl.c:/^oldname. // closure.c:/^typecheckclosure and dcl.c:/^oldname.
if(node->addrtaken) if(node->addrtaken)
setaddrs(bit); v->addr = 1;
// Disable registerization for globals, because:
// (1) we might panic at any time and we want the recovery code
// to see the latest values (issue 1304).
// (2) we don't know what pointers might point at them and we want
// loads via those pointers to see updated values and vice versa (issue 7995).
//
// Disable registerization for results if using defer, because the deferred func
// might recover and return, causing the current values to be used.
if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
v->addr = 1;
if(debug['R'])
print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
ostats.nvar++;
return bit; return bit;
...@@ -816,17 +806,6 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -816,17 +806,6 @@ prop(Reg *r, Bits ref, Bits cal)
ref.b[z] = 0; ref.b[z] = 0;
} }
break; break;
default:
// Work around for issue 1304:
// flush modified globals before each instruction.
for(z=0; z<BITS; z++) {
cal.b[z] |= externs.b[z];
// issue 4066: flush modified return variables in case of panic
if(hasdefer)
cal.b[z] |= ovar.b[z];
}
break;
} }
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
......
...@@ -54,28 +54,6 @@ rcmp(const void *a1, const void *a2) ...@@ -54,28 +54,6 @@ rcmp(const void *a1, const void *a2)
return p2->varno - p1->varno; return p2->varno - p1->varno;
} }
static void
setvar(Bits *dst, Type **args)
{
Type *t;
Node *n;
Addr a;
Iter save;
Bits bit;
int z;
t = structfirst(&save, args);
while(t != T) {
n = nodarg(t, 1);
a = zprog.from;
naddr(n, &a, 0);
bit = mkvar(R, &a);
for(z=0; z<BITS; z++)
dst->b[z] |= bit.b[z];
t = structnext(&save);
}
}
static void static void
setaddrs(Bits bit) setaddrs(Bits bit)
{ {
...@@ -148,11 +126,6 @@ regopt(Prog *firstp) ...@@ -148,11 +126,6 @@ regopt(Prog *firstp)
ovar.b[z] = 0; ovar.b[z] = 0;
} }
// build lists of parameters and results
setvar(&ivar, getthis(curfn->type));
setvar(&ivar, getinarg(curfn->type));
setvar(&ovar, getoutarg(curfn->type));
/* /*
* pass 1 * pass 1
* build aux data structure * build aux data structure
...@@ -656,10 +629,6 @@ mkvar(Reg *r, Adr *a) ...@@ -656,10 +629,6 @@ mkvar(Reg *r, Adr *a)
v->nextinnode = node->opt; v->nextinnode = node->opt;
node->opt = v; node->opt = v;
if(debug['R'])
print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
ostats.nvar++;
bit = blsh(i); bit = blsh(i);
if(n == D_EXTERN || n == D_STATIC) if(n == D_EXTERN || n == D_STATIC)
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
...@@ -668,6 +637,13 @@ mkvar(Reg *r, Adr *a) ...@@ -668,6 +637,13 @@ mkvar(Reg *r, Adr *a)
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
params.b[z] |= bit.b[z]; params.b[z] |= bit.b[z];
if(node->class == PPARAM)
for(z=0; z<BITS; z++)
ivar.b[z] |= bit.b[z];
if(node->class == PPARAMOUT)
for(z=0; z<BITS; z++)
ovar.b[z] |= bit.b[z];
// Treat values with their address taken as live at calls, // Treat values with their address taken as live at calls,
// because the garbage collector's liveness analysis in ../gc/plive.c does. // because the garbage collector's liveness analysis in ../gc/plive.c does.
// These must be consistent or else we will elide stores and the garbage // These must be consistent or else we will elide stores and the garbage
...@@ -684,7 +660,22 @@ mkvar(Reg *r, Adr *a) ...@@ -684,7 +660,22 @@ mkvar(Reg *r, Adr *a)
// The broader := in a closure problem is mentioned in a comment in // The broader := in a closure problem is mentioned in a comment in
// closure.c:/^typecheckclosure and dcl.c:/^oldname. // closure.c:/^typecheckclosure and dcl.c:/^oldname.
if(node->addrtaken) if(node->addrtaken)
setaddrs(bit); v->addr = 1;
// Disable registerization for globals, because:
// (1) we might panic at any time and we want the recovery code
// to see the latest values (issue 1304).
// (2) we don't know what pointers might point at them and we want
// loads via those pointers to see updated values and vice versa (issue 7995).
//
// Disable registerization for results if using defer, because the deferred func
// might recover and return, causing the current values to be used.
if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
v->addr = 1;
if(debug['R'])
print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
ostats.nvar++;
return bit; return bit;
...@@ -781,17 +772,6 @@ prop(Reg *r, Bits ref, Bits cal) ...@@ -781,17 +772,6 @@ prop(Reg *r, Bits ref, Bits cal)
ref.b[z] = 0; ref.b[z] = 0;
} }
break; break;
default:
// Work around for issue 1304:
// flush modified globals before each instruction.
for(z=0; z<BITS; z++) {
cal.b[z] |= externs.b[z];
// issue 4066: flush modified return variables in case of panic
if(hasdefer)
cal.b[z] |= ovar.b[z];
}
break;
} }
for(z=0; z<BITS; z++) { for(z=0; z<BITS; z++) {
ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
......
// run
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
var a = 1
func main() {
defer func() {
recover()
if a != 2 {
println("BUG a =", a)
}
}()
a = 2
b := a - a
c := 4
a = c / b
a = 3
}
// run
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Issue 7995: globals not flushed quickly enough.
package main
import "fmt"
var (
p = 1
q = &p
)
func main() {
p = 50
*q = 100
s := fmt.Sprintln(p, *q)
if s != "100 100\n" {
println("BUG:", s)
}
}
package x1
import "fmt"
var P int
var b bool
func F(x *int) string {
if b { // avoid inlining
F(x)
}
P = 50
*x = 100
return fmt.Sprintln(P, *x)
}
package main
import "./x1"
func main() {
s := x1.F(&x1.P)
if s != "100 100\n" {
println("BUG:", s)
}
}
// rundir
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Issue 7995: globals not flushed quickly enough.
package ignored
...@@ -17,7 +17,7 @@ type Struct struct { ...@@ -17,7 +17,7 @@ type Struct struct {
type BigStruct struct { type BigStruct struct {
X int X int
Y float64 Y float64
A [1<<20]int A [1 << 20]int
Z string Z string
} }
...@@ -32,7 +32,7 @@ var ( ...@@ -32,7 +32,7 @@ var (
intp *int intp *int
arrayp *[10]int arrayp *[10]int
array0p *[0]int array0p *[0]int
bigarrayp *[1<<26]int bigarrayp *[1 << 26]int
structp *Struct structp *Struct
bigstructp *BigStruct bigstructp *BigStruct
emptyp *Empty emptyp *Empty
...@@ -47,15 +47,16 @@ func f1() { ...@@ -47,15 +47,16 @@ func f1() {
// the indirect. // the indirect.
_ = *arrayp // ERROR "generated nil check" _ = *arrayp // ERROR "generated nil check"
// 0-byte indirect doesn't suffice // 0-byte indirect doesn't suffice.
// we don't registerize globals, so there are no removed repeated nil checks.
_ = *array0p // ERROR "generated nil check"
_ = *array0p // ERROR "generated nil check" _ = *array0p // ERROR "generated nil check"
_ = *array0p // ERROR "removed repeated nil check" 386
_ = *intp // ERROR "removed repeated nil check" _ = *intp // ERROR "generated nil check"
_ = *arrayp // ERROR "removed repeated nil check" _ = *arrayp // ERROR "generated nil check"
_ = *structp // ERROR "generated nil check" _ = *structp // ERROR "generated nil check"
_ = *emptyp // ERROR "generated nil check" _ = *emptyp // ERROR "generated nil check"
_ = *arrayp // ERROR "removed repeated nil check" _ = *arrayp // ERROR "generated nil check"
} }
func f2() { func f2() {
...@@ -63,7 +64,7 @@ func f2() { ...@@ -63,7 +64,7 @@ func f2() {
intp *int intp *int
arrayp *[10]int arrayp *[10]int
array0p *[0]int array0p *[0]int
bigarrayp *[1<<20]int bigarrayp *[1 << 20]int
structp *Struct structp *Struct
bigstructp *BigStruct bigstructp *BigStruct
emptyp *Empty emptyp *Empty
...@@ -85,8 +86,8 @@ func f2() { ...@@ -85,8 +86,8 @@ func f2() {
} }
func fx10k() *[10000]int func fx10k() *[10000]int
var b bool
var b bool
func f3(x *[10000]int) { func f3(x *[10000]int) {
// Using a huge type and huge offsets so the compiler // Using a huge type and huge offsets so the compiler
...@@ -188,4 +189,3 @@ func f4(x *[10]int) { ...@@ -188,4 +189,3 @@ func f4(x *[10]int) {
x = y x = y
_ = &x[9] // ERROR "removed repeated nil check" _ = &x[9] // ERROR "removed repeated nil check"
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment