Commit 89ebdbb5 authored by Kevin Burke's avatar Kevin Burke

regexp: speed up QuoteMeta with a lookup table

This is the same technique used in CL 24466. By adding a little bit of
size to the binary, we can remove a function call and gain a lot of
performance.

A raw array ([128]bool) would be faster, but is also be 128 bytes
instead of 16.

Running tip on a Mac:

name             old time/op    new time/op     delta
QuoteMetaAll-4      192ns ±12%      120ns ±11%   -37.27%  (p=0.000 n=10+10)
QuoteMetaNone-4     186ns ± 6%       64ns ± 6%   -65.52%  (p=0.000 n=10+10)

name             old speed      new speed       delta
QuoteMetaAll-4   73.2MB/s ±11%  116.6MB/s ±10%   +59.21%  (p=0.000 n=10+10)
QuoteMetaNone-4   139MB/s ± 6%    405MB/s ± 6%  +190.74%  (p=0.000 n=10+10)

Change-Id: I68ce9fe2ef1c28e2274157789b35b0dd6ae3efb5
Reviewed-on: https://go-review.googlesource.com/41495
Run-TryBot: Kevin Burke <kev@inburke.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 642a1cc7
......@@ -9,6 +9,7 @@ import (
"regexp/syntax"
"strings"
"testing"
"unicode/utf8"
)
var goodRe = []string{
......@@ -354,6 +355,7 @@ type MetaTest struct {
var metaTests = []MetaTest{
{``, ``, ``, true},
{`foo`, `foo`, `foo`, true},
{`日本語+`, `日本語\+`, `日本語`, false},
{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
{`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators
{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
......@@ -822,7 +824,13 @@ func BenchmarkMatchParallelCopied(b *testing.B) {
var sink string
func BenchmarkQuoteMetaAll(b *testing.B) {
s := string(specialBytes)
specials := make([]byte, 0)
for i := byte(0); i < utf8.RuneSelf; i++ {
if special(i) {
specials = append(specials, i)
}
}
s := string(specials)
b.SetBytes(int64(len(s)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
......
......@@ -609,10 +609,18 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
})
}
var specialBytes = []byte(`\.+*?()|[]{}^$`)
// Bitmap used by func special to check whether a character needs to be escaped.
var specialBytes [16]byte
// special reports whether byte b needs to be escaped by QuoteMeta.
func special(b byte) bool {
return bytes.IndexByte(specialBytes, b) >= 0
return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0
}
func init() {
for _, b := range []byte(`\.+*?()|[]{}^$`) {
specialBytes[b%16] |= 1 << (b / 16)
}
}
// QuoteMeta returns a string that quotes all regular expression metacharacters
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment