Commit 89465027 authored by Aliaksandr Valialkin's avatar Aliaksandr Valialkin Committed by Brad Fitzpatrick

bytes, strings: optimize Split*

The relevant benchmark results on linux/amd64:

bytes:

SplitSingleByteSeparator-4   25.7ms ± 5%   9.1ms ± 4%  -64.40%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.8ms ±20%   4.3ms ± 8%  -69.23%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.88µs ± 9%  0.88µs ± 4%  -53.25%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   4.83µs ±10%  1.32µs ± 9%  -72.65%  (p=0.000 n=10+10)

strings:

name                         old time/op  new time/op  delta
SplitSingleByteSeparator-4   21.4ms ± 8%   8.5ms ± 5%  -60.19%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.2ms ± 9%   3.9ms ± 4%  -70.29%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.54µs ± 5%  0.75µs ± 7%  -51.21%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   3.57µs ± 8%  1.01µs ±11%  -71.76%  (p=0.000 n=10+10)

Fixes #18973

Change-Id: Ie4bc010c6cc389001e72eab530497c81e5b26f34
Reviewed-on: https://go-review.googlesource.com/36510Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent c026845b
......@@ -215,20 +215,21 @@ func genSplit(s, sep []byte, sepSave, n int) [][]byte {
if n < 0 {
n = Count(s, sep) + 1
}
c := sep[0]
start := 0
a := make([][]byte, n)
na := 0
for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) {
a[na] = s[start : i+sepSave]
na++
start = i + len(sep)
i += len(sep) - 1
n--
i := 0
for i < n {
m := Index(s, sep)
if m < 0 {
break
}
a[i] = s[:m+sepSave]
s = s[m+len(sep):]
i++
}
a[na] = s[start:]
return a[0 : na+1]
a[i] = s
return a[:i+1]
}
// SplitN slices s into subslices separated by sep and returns a slice of
......
......@@ -1432,6 +1432,59 @@ func BenchmarkTrimSpace(b *testing.B) {
}
}
func makeBenchInputHard() []byte {
tokens := [...]string{
"<a>", "<p>", "<b>", "<strong>",
"</a>", "</p>", "</b>", "</strong>",
"hello", "world",
}
x := make([]byte, 0, 1<<20)
for {
i := rand.Intn(len(tokens))
if len(x)+len(tokens[i]) >= 1<<20 {
break
}
x = append(x, tokens[i]...)
}
return x
}
var benchInputHard = makeBenchInputHard()
func BenchmarkSplitEmptySeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, nil)
}
}
func BenchmarkSplitSingleByteSeparator(b *testing.B) {
sep := []byte("/")
for i := 0; i < b.N; i++ {
Split(benchInputHard, sep)
}
}
func BenchmarkSplitMultiByteSeparator(b *testing.B) {
sep := []byte("hello")
for i := 0; i < b.N; i++ {
Split(benchInputHard, sep)
}
}
func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
sep := []byte("/")
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, sep, 10)
}
}
func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
sep := []byte("hello")
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, sep, 10)
}
}
func BenchmarkRepeat(b *testing.B) {
for i := 0; i < b.N; i++ {
Repeat([]byte("-"), 80)
......
......@@ -239,20 +239,21 @@ func genSplit(s, sep string, sepSave, n int) []string {
if n < 0 {
n = Count(s, sep) + 1
}
c := sep[0]
start := 0
a := make([]string, n)
na := 0
for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
a[na] = s[start : i+sepSave]
na++
start = i + len(sep)
i += len(sep) - 1
}
n--
i := 0
for i < n {
m := Index(s, sep)
if m < 0 {
break
}
a[i] = s[:m+sepSave]
s = s[m+len(sep):]
i++
}
a[na] = s[start:]
return a[0 : na+1]
a[i] = s
return a[:i+1]
}
// SplitN slices s into substrings separated by sep and returns a slice of
......
......@@ -1476,24 +1476,36 @@ func BenchmarkFieldsFunc(b *testing.B) {
}
}
func BenchmarkSplit1(b *testing.B) {
func BenchmarkSplitEmptySeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "")
}
}
func BenchmarkSplit2(b *testing.B) {
func BenchmarkSplitSingleByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "/")
}
}
func BenchmarkSplit3(b *testing.B) {
func BenchmarkSplitMultiByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "hello")
}
}
func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, "/", 10)
}
}
func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, "hello", 10)
}
}
func BenchmarkRepeat(b *testing.B) {
for i := 0; i < b.N; i++ {
Repeat("-", 80)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment