Commit c9fd9975 authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

cmd/compile: unroll comparisons to short constant strings

Unroll s == "ab" to

len(s) == 2 && s[0] == 'a' && s[1] == 'b'

This generates faster and shorter code
by avoiding a runtime call.
Do something similar for !=.

The cutoff length is 6. This was chosen empirically
by examining binary sizes on arm, arm64, 386, and amd64
using the SSA backend.

For all architectures examined, 4, 5, and 6 were
the ideal cutoff, with identical binary sizes.

The distribution of constant string equality sizes
during 'go build -a std' is:

 40.81%   622 len 0
 14.11%   215 len 4
  9.45%   144 len 1
  7.81%   119 len 3
  7.48%   114 len 5
  5.12%    78 len 7
  4.13%    63 len 2
  3.54%    54 len 8
  2.69%    41 len 6
  1.18%    18 len 10
  0.85%    13 len 9
  0.66%    10 len 14
  0.59%     9 len 17
  0.46%     7 len 11
  0.26%     4 len 12
  0.20%     3 len 19
  0.13%     2 len 13
  0.13%     2 len 15
  0.13%     2 len 16
  0.07%     1 len 20
  0.07%     1 len 23
  0.07%     1 len 33
  0.07%     1 len 36

A cutoff of length 6 covers most of the cases.

Benchmarks on amd64 comparing a string to a constant of length 3:

Cmp/1same-8           4.78ns ± 6%  0.94ns ± 9%  -80.26%  (p=0.000 n=20+20)
Cmp/1diffbytes-8      6.43ns ± 6%  0.96ns ±11%  -85.13%  (p=0.000 n=20+20)
Cmp/3same-8           4.71ns ± 5%  1.28ns ± 5%  -72.90%  (p=0.000 n=20+20)
Cmp/3difffirstbyte-8  6.33ns ± 7%  1.27ns ± 7%  -79.90%  (p=0.000 n=20+20)
Cmp/3difflastbyte-8   6.34ns ± 8%  1.26ns ± 9%  -80.13%  (p=0.000 n=20+20)

The change to the prove test preserves the
existing intent of the test. When the string was
short, there was a new "proved in bounds" report
that referred to individual byte comparisons.

Change-Id: I593ac303b0d11f275672090c5c786ea0c6b8da13
Reviewed-on: https://go-review.googlesource.com/26758
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 584978f4
......@@ -1371,20 +1371,7 @@ opswitch:
n = callnew(n.Type.Elem())
}
// If one argument to the comparison is an empty string,
// comparing the lengths instead will yield the same result
// without the function call.
case OCMPSTR:
if (Isconst(n.Left, CTSTR) && len(n.Left.Val().U.(string)) == 0) || (Isconst(n.Right, CTSTR) && len(n.Right.Val().U.(string)) == 0) {
// TODO(marvin): Fix Node.EType type union.
r := Nod(Op(n.Etype), Nod(OLEN, n.Left, nil), Nod(OLEN, n.Right, nil))
r = typecheck(r, Erv)
r = walkexpr(r, init)
r.Type = n.Type
n = r
break
}
// s + "badgerbadgerbadger" == "badgerbadgerbadger"
if (Op(n.Etype) == OEQ || Op(n.Etype) == ONE) && Isconst(n.Right, CTSTR) && n.Left.Op == OADDSTR && n.Left.List.Len() == 2 && Isconst(n.Left.List.Second(), CTSTR) && strlit(n.Right) == strlit(n.Left.List.Second()) {
// TODO(marvin): Fix Node.EType type union.
......@@ -1396,12 +1383,61 @@ opswitch:
break
}
// Rewrite comparisons to short constant strings as length+byte-wise comparisons.
var cs, ncs *Node // const string, non-const string
switch {
case Isconst(n.Left, CTSTR) && Isconst(n.Right, CTSTR):
// ignore; will be constant evaluated
case Isconst(n.Left, CTSTR):
cs = n.Left
ncs = n.Right
case Isconst(n.Right, CTSTR):
cs = n.Right
ncs = n.Left
}
if cs != nil {
cmp := Op(n.Etype)
// maxRewriteLen was chosen empirically.
// It is the value that minimizes cmd/go file size
// across most architectures.
// See the commit description for CL 26758 for details.
maxRewriteLen := 6
var and Op
switch cmp {
case OEQ:
and = OANDAND
case ONE:
and = OOROR
default:
// Don't do byte-wise comparisons for <, <=, etc.
// They're fairly complicated.
// Length-only checks are ok, though.
maxRewriteLen = 0
}
if s := cs.Val().U.(string); len(s) <= maxRewriteLen {
if len(s) > 0 {
ncs = safeexpr(ncs, init)
}
// TODO(marvin): Fix Node.EType type union.
r := Nod(cmp, Nod(OLEN, ncs, nil), Nodintconst(int64(len(s))))
for i := 0; i < len(s); i++ {
cb := Nodintconst(int64(s[i]))
ncb := Nod(OINDEX, ncs, Nodintconst(int64(i)))
r = Nod(and, r, Nod(cmp, ncb, cb))
}
r = typecheck(r, Erv)
r = walkexpr(r, init)
r.Type = n.Type
n = r
break
}
}
var r *Node
// TODO(marvin): Fix Node.EType type union.
if Op(n.Etype) == OEQ || Op(n.Etype) == ONE {
// prepare for rewrite below
n.Left = cheapexpr(n.Left, init)
n.Right = cheapexpr(n.Right, init)
r = mkcall("eqstring", Types[TBOOL], init, conv(n.Left, Types[TSTRING]), conv(n.Right, Types[TSTRING]))
......@@ -1415,7 +1451,6 @@ opswitch:
} else {
// len(left) != len(right) || !eqstring(left, right)
r = Nod(ONOT, r, nil)
r = Nod(OOROR, Nod(ONE, Nod(OLEN, n.Left, nil), Nod(OLEN, n.Right, nil)), r)
}
......@@ -1424,7 +1459,6 @@ opswitch:
} else {
// sys_cmpstring(s1, s2) :: 0
r = mkcall("cmpstring", Types[TINT], init, conv(n.Left, Types[TSTRING]), conv(n.Right, Types[TSTRING]))
// TODO(marvin): Fix Node.EType type union.
r = Nod(Op(n.Etype), r, nodintconst(0))
}
......
......@@ -250,7 +250,7 @@ func f9(a, b bool) int {
func f10(a string) int {
n := len(a)
if a[:n>>1] == "aaa" {
if a[:n>>1] == "aaaaaaaaaaaaaa" {
return 0
}
return 1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment