Commit 89ebdbb5 authored by Kevin Burke's avatar Kevin Burke

regexp: speed up QuoteMeta with a lookup table

This is the same technique used in CL 24466. By adding a little bit of
size to the binary, we can remove a function call and gain a lot of
performance.

A raw array ([128]bool) would be faster, but is also be 128 bytes
instead of 16.

Running tip on a Mac:

name             old time/op    new time/op     delta
QuoteMetaAll-4      192ns ±12%      120ns ±11%   -37.27%  (p=0.000 n=10+10)
QuoteMetaNone-4     186ns ± 6%       64ns ± 6%   -65.52%  (p=0.000 n=10+10)

name             old speed      new speed       delta
QuoteMetaAll-4   73.2MB/s ±11%  116.6MB/s ±10%   +59.21%  (p=0.000 n=10+10)
QuoteMetaNone-4   139MB/s ± 6%    405MB/s ± 6%  +190.74%  (p=0.000 n=10+10)

Change-Id: I68ce9fe2ef1c28e2274157789b35b0dd6ae3efb5
Reviewed-on: https://go-review.googlesource.com/41495
Run-TryBot: Kevin Burke <kev@inburke.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 642a1cc7
...@@ -9,6 +9,7 @@ import ( ...@@ -9,6 +9,7 @@ import (
"regexp/syntax" "regexp/syntax"
"strings" "strings"
"testing" "testing"
"unicode/utf8"
) )
var goodRe = []string{ var goodRe = []string{
...@@ -354,6 +355,7 @@ type MetaTest struct { ...@@ -354,6 +355,7 @@ type MetaTest struct {
var metaTests = []MetaTest{ var metaTests = []MetaTest{
{``, ``, ``, true}, {``, ``, ``, true},
{`foo`, `foo`, `foo`, true}, {`foo`, `foo`, `foo`, true},
{`日本語+`, `日本語\+`, `日本語`, false},
{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
{`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators
{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false}, {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
...@@ -822,7 +824,13 @@ func BenchmarkMatchParallelCopied(b *testing.B) { ...@@ -822,7 +824,13 @@ func BenchmarkMatchParallelCopied(b *testing.B) {
var sink string var sink string
func BenchmarkQuoteMetaAll(b *testing.B) { func BenchmarkQuoteMetaAll(b *testing.B) {
s := string(specialBytes) specials := make([]byte, 0)
for i := byte(0); i < utf8.RuneSelf; i++ {
if special(i) {
specials = append(specials, i)
}
}
s := string(specials)
b.SetBytes(int64(len(s))) b.SetBytes(int64(len(s)))
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
......
...@@ -609,10 +609,18 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { ...@@ -609,10 +609,18 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
}) })
} }
var specialBytes = []byte(`\.+*?()|[]{}^$`) // Bitmap used by func special to check whether a character needs to be escaped.
var specialBytes [16]byte
// special reports whether byte b needs to be escaped by QuoteMeta.
func special(b byte) bool { func special(b byte) bool {
return bytes.IndexByte(specialBytes, b) >= 0 return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0
}
func init() {
for _, b := range []byte(`\.+*?()|[]{}^$`) {
specialBytes[b%16] |= 1 << (b / 16)
}
} }
// QuoteMeta returns a string that quotes all regular expression metacharacters // QuoteMeta returns a string that quotes all regular expression metacharacters
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment