Commit 9b2b0c8c authored by Russ Cox's avatar Russ Cox

regexp/syntax: reject large repetitions created by nesting small ones

Fixes #7609.

LGTM=r
R=r
CC=golang-codereviews
https://golang.org/cl/150270043
parent ac9218f5
...@@ -244,6 +244,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) ( ...@@ -244,6 +244,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
if sub.Op >= opPseudo { if sub.Op >= opPseudo {
return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]} return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
} }
re := p.newRegexp(op) re := p.newRegexp(op)
re.Min = min re.Min = min
re.Max = max re.Max = max
...@@ -251,9 +252,42 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) ( ...@@ -251,9 +252,42 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
re.Sub = re.Sub0[:1] re.Sub = re.Sub0[:1]
re.Sub[0] = sub re.Sub[0] = sub
p.stack[n-1] = re p.stack[n-1] = re
if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
}
return after, nil return after, nil
} }
// repeatIsValid reports whether the repetition re is valid.
// Valid means that the combination of the top-level repetition
// and any inner repetitions does not exceed n copies of the
// innermost thing.
// This function rewalks the regexp tree and is called for every repetition,
// so we have to worry about inducing quadratic behavior in the parser.
// We avoid this by only calling repeatIsValid when min or max >= 2.
// In that case the depth of any >= 2 nesting can only get to 9 without
// triggering a parse error, so each subtree can only be rewalked 9 times.
func repeatIsValid(re *Regexp, n int) bool {
if re.Op == OpRepeat {
m := re.Max
if m < 0 {
m = re.Min
}
if m > n {
return false
}
n /= m
}
for _, sub := range re.Sub {
if !repeatIsValid(sub, n) {
return false
}
}
return true
}
// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation. // concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
func (p *parser) concat() *Regexp { func (p *parser) concat() *Regexp {
p.maybeConcat(-1, 0) p.maybeConcat(-1, 0)
......
...@@ -200,6 +200,10 @@ var parseTests = []parseTest{ ...@@ -200,6 +200,10 @@ var parseTests = []parseTest{
`cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`}, `cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
{`x{2}y|x{2}[0-9]y`, {`x{2}y|x{2}[0-9]y`,
`cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`}, `cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
// Valid repetitions.
{`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
{`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
} }
const testFlags = MatchNL | PerlX | UnicodeGroups const testFlags = MatchNL | PerlX | UnicodeGroups
...@@ -262,6 +266,10 @@ func testParseDump(t *testing.T, tests []parseTest, flags Flags) { ...@@ -262,6 +266,10 @@ func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err) t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue continue
} }
if tt.Dump == "" {
// It parsed. That's all we care about.
continue
}
d := dump(re) d := dump(re)
if d != tt.Dump { if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump) t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
...@@ -470,6 +478,7 @@ var invalidRegexps = []string{ ...@@ -470,6 +478,7 @@ var invalidRegexps = []string{
`(?i)[a-Z]`, `(?i)[a-Z]`,
`a{100000}`, `a{100000}`,
`a{100000,}`, `a{100000,}`,
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
} }
var onlyPerl = []string{ var onlyPerl = []string{
...@@ -527,6 +536,10 @@ func TestToStringEquivalentParse(t *testing.T) { ...@@ -527,6 +536,10 @@ func TestToStringEquivalentParse(t *testing.T) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err) t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue continue
} }
if tt.Dump == "" {
// It parsed. That's all we care about.
continue
}
d := dump(re) d := dump(re)
if d != tt.Dump { if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump) t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment