Commit 1552e62d authored by Russ Cox's avatar Russ Cox

cmd/gc: elide write barrier for x = x[0:y] and x = append(x, ...)

Both of these forms can avoid writing to the base pointer in x
(in the slice, always, and in the append, most of the time).

For Go 1.5, will need to change the compilation of x = x[0:y]
to avoid writing to the base pointer, so that the elision is safe,
and will need to change the compilation of x = append(x, ...)
to write to the base pointer (through a barrier) only when
growing the underlying array, so that the general elision is safe.

For Go 1.4, elide the write barrier always, a change that should
have equivalent performance characteristics but is much
simpler and therefore safer.

benchmark                       old ns/op   new ns/op   delta
BenchmarkBinaryTree17           3910526122  3918802545  +0.21%
BenchmarkFannkuch11             3747650699  3732600693  -0.40%
BenchmarkFmtFprintfEmpty        106         98.7        -6.89%
BenchmarkFmtFprintfString       280         269         -3.93%
BenchmarkFmtFprintfInt          296         282         -4.73%
BenchmarkFmtFprintfIntInt       467         470         +0.64%
BenchmarkFmtFprintfPrefixedInt  418         398         -4.78%
BenchmarkFmtFprintfFloat        574         535         -6.79%
BenchmarkFmtManyArgs            1768        1818        +2.83%
BenchmarkGobDecode              14916799    14925182    +0.06%
BenchmarkGobEncode              14110076    13358298    -5.33%
BenchmarkGzip                   546609795   542630402   -0.73%
BenchmarkGunzip                 136270657   136496277   +0.17%
BenchmarkHTTPClientServer       126574      125245      -1.05%
BenchmarkJSONEncode             30006238    27862354    -7.14%
BenchmarkJSONDecode             106020889   102664600   -3.17%
BenchmarkMandelbrot200          5793550     5818320     +0.43%
BenchmarkGoParse                5437608     5463962     +0.48%
BenchmarkRegexpMatchEasy0_32    192         179         -6.77%
BenchmarkRegexpMatchEasy0_1K    462         460         -0.43%
BenchmarkRegexpMatchEasy1_32    168         153         -8.93%
BenchmarkRegexpMatchEasy1_1K    1420        1280        -9.86%
BenchmarkRegexpMatchMedium_32   338         286         -15.38%
BenchmarkRegexpMatchMedium_1K   107435      98027       -8.76%
BenchmarkRegexpMatchHard_32     5941        4846        -18.43%
BenchmarkRegexpMatchHard_1K     185965      153830      -17.28%
BenchmarkRevcomp                795497458   798447829   +0.37%
BenchmarkTemplate               132091559   134938425   +2.16%
BenchmarkTimeParse              604         608         +0.66%
BenchmarkTimeFormat             551         548         -0.54%

LGTM=r
R=r, dave
CC=golang-codereviews, iant, khr, rlh
https://golang.org/cl/159960043
parent e5624edc
...@@ -283,6 +283,7 @@ struct Node ...@@ -283,6 +283,7 @@ struct Node
uchar addrtaken; // address taken, even if not moved to heap uchar addrtaken; // address taken, even if not moved to heap
uchar dupok; // duplicate definitions ok (for func) uchar dupok; // duplicate definitions ok (for func)
uchar wrapper; // is method wrapper (for func) uchar wrapper; // is method wrapper (for func)
uchar reslice; // this is a reslice x = x[0:y] or x = append(x, ...)
schar likely; // likeliness of if statement schar likely; // likeliness of if statement
uchar hasbreak; // has break statement uchar hasbreak; // has break statement
uchar needzero; // if it contains pointers, needs to be zeroed on function entry uchar needzero; // if it contains pointers, needs to be zeroed on function entry
......
...@@ -2814,6 +2814,33 @@ checkassignlist(NodeList *l) ...@@ -2814,6 +2814,33 @@ checkassignlist(NodeList *l)
checkassign(l->n); checkassign(l->n);
} }
// Check whether l and r are the same side effect-free expression,
// so that it is safe to reuse one instead of computing both.
static int
samesafeexpr(Node *l, Node *r)
{
if(l->op != r->op || !eqtype(l->type, r->type))
return 0;
switch(l->op) {
case ONAME:
case OCLOSUREVAR:
return l == r;
case ODOT:
case ODOTPTR:
return l->right != nil && r->right != nil && l->right->sym == r->right->sym && samesafeexpr(l->left, r->left);
case OIND:
return samesafeexpr(l->left, r->left);
case OINDEX:
return samesafeexpr(l->left, r->left) && samesafeexpr(l->right, r->right);
}
return 0;
}
/* /*
* type check assignment. * type check assignment.
* if this assignment is the definition of a var on the left side, * if this assignment is the definition of a var on the left side,
...@@ -2851,6 +2878,29 @@ typecheckas(Node *n) ...@@ -2851,6 +2878,29 @@ typecheckas(Node *n)
n->typecheck = 1; n->typecheck = 1;
if(n->left->typecheck == 0) if(n->left->typecheck == 0)
typecheck(&n->left, Erv | Easgn); typecheck(&n->left, Erv | Easgn);
// Recognize slices being updated in place, for better code generation later.
// Don't rewrite if using race detector, to avoid needing to teach race detector
// about this optimization.
if(n->left && n->left->op != OINDEXMAP && n->right && !flag_race) {
switch(n->right->op) {
case OSLICE:
case OSLICE3:
case OSLICESTR:
// For x = x[0:y], x can be updated in place, without touching pointer.
if(samesafeexpr(n->left, n->right->left) && (n->right->right->left == N || iszero(n->right->right->left)))
n->right->reslice = 1;
break;
case OAPPEND:
// For x = append(x, ...), x can be updated in place when there is capacity,
// without touching the pointer; otherwise the emitted code to growslice
// can take care of updating the pointer, and only in that case.
if(n->right->list != nil && samesafeexpr(n->left, n->right->list->n))
n->right->reslice = 1;
break;
}
}
} }
static void static void
......
...@@ -1499,7 +1499,7 @@ fncall(Node *l, Type *rt) ...@@ -1499,7 +1499,7 @@ fncall(Node *l, Type *rt)
if(l->ullman >= UINF || l->op == OINDEXMAP) if(l->ullman >= UINF || l->op == OINDEXMAP)
return 1; return 1;
r.op = 0; memset(&r, 0, sizeof r);
if(needwritebarrier(l, &r)) if(needwritebarrier(l, &r))
return 1; return 1;
if(eqtype(l->type, rt)) if(eqtype(l->type, rt))
...@@ -2036,6 +2036,28 @@ needwritebarrier(Node *l, Node *r) ...@@ -2036,6 +2036,28 @@ needwritebarrier(Node *l, Node *r)
if(r->op == OADDR && isglobal(r->left)) if(r->op == OADDR && isglobal(r->left))
return 0; return 0;
// No write barrier for reslice: x = x[0:y] or x = append(x, ...).
// Both are compiled to modify x directly.
// In the case of append, a write barrier may still be needed
// if the underlying array grows, but the append code can
// generate the write barrier directly in that case.
// (It does not yet, but the cost of the write barrier will be
// small compared to the cost of the allocation.)
if(r->reslice) {
switch(r->op) {
case OSLICE:
case OSLICE3:
case OSLICESTR:
case OAPPEND:
break;
default:
dump("bad reslice-l", l);
dump("bad reslice-r", r);
break;
}
return 0;
}
// Otherwise, be conservative and use write barrier. // Otherwise, be conservative and use write barrier.
return 1; return 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment