Commit 74a60ed0 authored by Rob Pike's avatar Rob Pike

casify lib/regexp

R=rsc
DELTA=259  (0 added, 0 deleted, 259 changed)
OCL=22897
CL=22900
parent 2527bba9
...@@ -30,59 +30,59 @@ var good_re = []string{ ...@@ -30,59 +30,59 @@ var good_re = []string{
} }
// TODO: nice to do this with a map // TODO: nice to do this with a map
type StringError struct { type stringError struct {
re string; re string;
err *os.Error; err *os.Error;
} }
var bad_re = []StringError{ var bad_re = []stringError{
StringError{ `*`, regexp.ErrBareClosure }, stringError{ `*`, regexp.ErrBareClosure },
StringError{ `(abc`, regexp.ErrUnmatchedLpar }, stringError{ `(abc`, regexp.ErrUnmatchedLpar },
StringError{ `abc)`, regexp.ErrUnmatchedRpar }, stringError{ `abc)`, regexp.ErrUnmatchedRpar },
StringError{ `x[a-z`, regexp.ErrUnmatchedLbkt }, stringError{ `x[a-z`, regexp.ErrUnmatchedLbkt },
StringError{ `abc]`, regexp.ErrUnmatchedRbkt }, stringError{ `abc]`, regexp.ErrUnmatchedRbkt },
StringError{ `[z-a]`, regexp.ErrBadRange }, stringError{ `[z-a]`, regexp.ErrBadRange },
StringError{ `abc\`, regexp.ErrExtraneousBackslash }, stringError{ `abc\`, regexp.ErrExtraneousBackslash },
StringError{ `a**`, regexp.ErrBadClosure }, stringError{ `a**`, regexp.ErrBadClosure },
StringError{ `a*+`, regexp.ErrBadClosure }, stringError{ `a*+`, regexp.ErrBadClosure },
StringError{ `a??`, regexp.ErrBadClosure }, stringError{ `a??`, regexp.ErrBadClosure },
StringError{ `*`, regexp.ErrBareClosure }, stringError{ `*`, regexp.ErrBareClosure },
StringError{ `\x`, regexp.ErrBadBackslash }, stringError{ `\x`, regexp.ErrBadBackslash },
} }
type Vec []int; type vec []int;
type Tester struct { type tester struct {
re string; re string;
text string; text string;
match Vec; match vec;
} }
var matches = []Tester { var matches = []tester {
Tester{ ``, "", Vec{0,0} }, tester{ ``, "", vec{0,0} },
Tester{ `a`, "a", Vec{0,1} }, tester{ `a`, "a", vec{0,1} },
Tester{ `x`, "y", Vec{} }, tester{ `x`, "y", vec{} },
Tester{ `b`, "abc", Vec{1,2} }, tester{ `b`, "abc", vec{1,2} },
Tester{ `.`, "a", Vec{0,1} }, tester{ `.`, "a", vec{0,1} },
Tester{ `.*`, "abcdef", Vec{0,6} }, tester{ `.*`, "abcdef", vec{0,6} },
Tester{ `^abcd$`, "abcd", Vec{0,4} }, tester{ `^abcd$`, "abcd", vec{0,4} },
Tester{ `^bcd'`, "abcdef", Vec{} }, tester{ `^bcd'`, "abcdef", vec{} },
Tester{ `^abcd$`, "abcde", Vec{} }, tester{ `^abcd$`, "abcde", vec{} },
Tester{ `a+`, "baaab", Vec{1,4} }, tester{ `a+`, "baaab", vec{1,4} },
Tester{ `a*`, "baaab", Vec{0,0} }, tester{ `a*`, "baaab", vec{0,0} },
Tester{ `[a-z]+`, "abcd", Vec{0,4} }, tester{ `[a-z]+`, "abcd", vec{0,4} },
Tester{ `[^a-z]+`, "ab1234cd", Vec{2,6} }, tester{ `[^a-z]+`, "ab1234cd", vec{2,6} },
Tester{ `[a\-\]z]+`, "az]-bcz", Vec{0,4} }, tester{ `[a\-\]z]+`, "az]-bcz", vec{0,4} },
Tester{ `[日本語]+`, "日本語日本語", Vec{0,18} }, tester{ `[日本語]+`, "日本語日本語", vec{0,18} },
Tester{ `()`, "", Vec{0,0, 0,0} }, tester{ `()`, "", vec{0,0, 0,0} },
Tester{ `(a)`, "a", Vec{0,1, 0,1} }, tester{ `(a)`, "a", vec{0,1, 0,1} },
Tester{ `(.)(.)`, "日a", Vec{0,4, 0,3, 3,4} }, tester{ `(.)(.)`, "日a", vec{0,4, 0,3, 3,4} },
Tester{ `(.*)`, "", Vec{0,0, 0,0} }, tester{ `(.*)`, "", vec{0,0, 0,0} },
Tester{ `(.*)`, "abcd", Vec{0,4, 0,4} }, tester{ `(.*)`, "abcd", vec{0,4, 0,4} },
Tester{ `(..)(..)`, "abcd", Vec{0,4, 0,2, 2,4} }, tester{ `(..)(..)`, "abcd", vec{0,4, 0,2, 2,4} },
Tester{ `(([^xyz]*)(d))`, "abcd", Vec{0,4, 0,4, 0,3, 3,4} }, tester{ `(([^xyz]*)(d))`, "abcd", vec{0,4, 0,4, 0,3, 3,4} },
Tester{ `((a|b|c)*(d))`, "abcd", Vec{0,4, 0,4, 2,3, 3,4} }, tester{ `((a|b|c)*(d))`, "abcd", vec{0,4, 0,4, 2,3, 3,4} },
Tester{ `(((a|b|c)*)(d))`, "abcd", Vec{0,4, 0,4, 0,3, 2,3, 3,4} }, tester{ `(((a|b|c)*)(d))`, "abcd", vec{0,4, 0,4, 0,3, 2,3, 3,4} },
Tester{ `a*(|(b))c*`, "aacc", Vec{0,4, 2,2, -1,-1} }, tester{ `a*(|(b))c*`, "aacc", vec{0,4, 2,2, -1,-1} },
} }
func CompileTest(t *testing.T, expr string, error *os.Error) regexp.Regexp { func CompileTest(t *testing.T, expr string, error *os.Error) regexp.Regexp {
...@@ -93,7 +93,7 @@ func CompileTest(t *testing.T, expr string, error *os.Error) regexp.Regexp { ...@@ -93,7 +93,7 @@ func CompileTest(t *testing.T, expr string, error *os.Error) regexp.Regexp {
return re return re
} }
func PrintVec(t *testing.T, m []int) { func Printvec(t *testing.T, m []int) {
l := len(m); l := len(m);
if l == 0 { if l == 0 {
t.Log("\t<no match>"); t.Log("\t<no match>");
...@@ -149,9 +149,9 @@ func ExecuteTest(t *testing.T, expr string, str string, match []int) { ...@@ -149,9 +149,9 @@ func ExecuteTest(t *testing.T, expr string, str string, match []int) {
m := re.Execute(str); m := re.Execute(str);
if !Equal(m, match) { if !Equal(m, match) {
t.Error("Execute failure on `", expr, "` matching `", str, "`:"); t.Error("Execute failure on `", expr, "` matching `", str, "`:");
PrintVec(t, m); Printvec(t, m);
t.Log("should be:"); t.Log("should be:");
PrintVec(t, match); Printvec(t, match);
} }
} }
......
...@@ -11,7 +11,7 @@ import ( ...@@ -11,7 +11,7 @@ import (
"array"; "array";
) )
export var debug = false; var debug = false;
export var ErrInternal = os.NewError("internal error"); export var ErrInternal = os.NewError("internal error");
...@@ -26,110 +26,110 @@ export var ErrBareClosure = os.NewError("closure applies to nothing"); ...@@ -26,110 +26,110 @@ export var ErrBareClosure = os.NewError("closure applies to nothing");
export var ErrBadBackslash = os.NewError("illegal backslash escape"); export var ErrBadBackslash = os.NewError("illegal backslash escape");
// An instruction executed by the NFA // An instruction executed by the NFA
type Inst interface { type instr interface {
Type() int; // the type of this instruction: CHAR, ANY, etc. Type() int; // the type of this instruction: cCHAR, cANY, etc.
Next() Inst; // the instruction to execute after this one Next() instr; // the instruction to execute after this one
SetNext(i Inst); SetNext(i instr);
Index() int; Index() int;
SetIndex(i int); SetIndex(i int);
Print(); Print();
} }
// Fields and methods common to all instructions // Fields and methods common to all instructions
type Common struct { type iCommon struct {
next Inst; next instr;
index int; index int;
} }
func (c *Common) Next() Inst { return c.next } func (c *iCommon) Next() instr { return c.next }
func (c *Common) SetNext(i Inst) { c.next = i } func (c *iCommon) SetNext(i instr) { c.next = i }
func (c *Common) Index() int { return c.index } func (c *iCommon) Index() int { return c.index }
func (c *Common) SetIndex(i int) { c.index = i } func (c *iCommon) SetIndex(i int) { c.index = i }
type RE struct { type regExp struct {
expr string; // the original expression expr string; // the original expression
ch chan<- *RE; // reply channel when we're done ch chan<- *regExp; // reply channel when we're done
error *os.Error; // compile- or run-time error; nil if OK error *os.Error; // compile- or run-time error; nil if OK
inst *array.Array; inst *array.Array;
start Inst; start instr;
nbra int; // number of brackets in expression, for subexpressions nbra int; // number of brackets in expression, for subexpressions
} }
const ( const (
START // beginning of program cSTART // beginning of program
= iota; = iota;
END; // end of program: success cEND; // end of program: success
BOT; // '^' beginning of text cBOT; // '^' beginning of text
EOT; // '$' end of text cEOT; // '$' end of text
CHAR; // 'a' regular character cCHAR; // 'a' regular character
CHARCLASS; // [a-z] character class cCHARCLASS; // [a-z] character class
ANY; // '.' any character cANY; // '.' any character
BRA; // '(' parenthesized expression cBRA; // '(' parenthesized expression
EBRA; // ')'; end of '(' parenthesized expression cEBRA; // ')'; end of '(' parenthesized expression
ALT; // '|' alternation cALT; // '|' alternation
NOP; // do nothing; makes it easy to link without patching cNOP; // do nothing; makes it easy to link without patching
) )
// --- START start of program // --- START start of program
type Start struct { type iStart struct {
Common iCommon
} }
func (start *Start) Type() int { return START } func (start *iStart) Type() int { return cSTART }
func (start *Start) Print() { print("start") } func (start *iStart) Print() { print("start") }
// --- END end of program // --- END end of program
type End struct { type iEnd struct {
Common iCommon
} }
func (end *End) Type() int { return END } func (end *iEnd) Type() int { return cEND }
func (end *End) Print() { print("end") } func (end *iEnd) Print() { print("end") }
// --- BOT beginning of text // --- BOT beginning of text
type Bot struct { type iBot struct {
Common iCommon
} }
func (bot *Bot) Type() int { return BOT } func (bot *iBot) Type() int { return cBOT }
func (bot *Bot) Print() { print("bot") } func (bot *iBot) Print() { print("bot") }
// --- EOT end of text // --- EOT end of text
type Eot struct { type iEot struct {
Common iCommon
} }
func (eot *Eot) Type() int { return EOT } func (eot *iEot) Type() int { return cEOT }
func (eot *Eot) Print() { print("eot") } func (eot *iEot) Print() { print("eot") }
// --- CHAR a regular character // --- CHAR a regular character
type Char struct { type iChar struct {
Common; iCommon;
char int; char int;
} }
func (char *Char) Type() int { return CHAR } func (char *iChar) Type() int { return cCHAR }
func (char *Char) Print() { print("char ", string(char.char)) } func (char *iChar) Print() { print("char ", string(char.char)) }
func NewChar(char int) *Char { func newChar(char int) *iChar {
c := new(Char); c := new(iChar);
c.char = char; c.char = char;
return c; return c;
} }
// --- CHARCLASS [a-z] // --- CHARCLASS [a-z]
type CharClass struct { type iCharClass struct {
Common; iCommon;
char int; char int;
negate bool; // is character class negated? ([^a-z]) negate bool; // is character class negated? ([^a-z])
// array of int, stored pairwise: [a-z] is (a,z); x is (x,x): // array of int, stored pairwise: [a-z] is (a,z); x is (x,x):
ranges *array.IntArray; ranges *array.IntArray;
} }
func (cclass *CharClass) Type() int { return CHARCLASS } func (cclass *iCharClass) Type() int { return cCHARCLASS }
func (cclass *CharClass) Print() { func (cclass *iCharClass) Print() {
print("charclass"); print("charclass");
if cclass.negate { if cclass.negate {
print(" (negated)"); print(" (negated)");
...@@ -145,13 +145,13 @@ func (cclass *CharClass) Print() { ...@@ -145,13 +145,13 @@ func (cclass *CharClass) Print() {
} }
} }
func (cclass *CharClass) AddRange(a, b int) { func (cclass *iCharClass) AddRange(a, b int) {
// range is a through b inclusive // range is a through b inclusive
cclass.ranges.Push(a); cclass.ranges.Push(a);
cclass.ranges.Push(b); cclass.ranges.Push(b);
} }
func (cclass *CharClass) Matches(c int) bool { func (cclass *iCharClass) Matches(c int) bool {
for i := 0; i < cclass.ranges.Len(); i = i+2 { for i := 0; i < cclass.ranges.Len(); i = i+2 {
min := cclass.ranges.At(i); min := cclass.ranges.At(i);
max := cclass.ranges.At(i+1); max := cclass.ranges.At(i+1);
...@@ -162,84 +162,84 @@ func (cclass *CharClass) Matches(c int) bool { ...@@ -162,84 +162,84 @@ func (cclass *CharClass) Matches(c int) bool {
return cclass.negate return cclass.negate
} }
func NewCharClass() *CharClass { func newCharClass() *iCharClass {
c := new(CharClass); c := new(iCharClass);
c.ranges = array.NewIntArray(0); c.ranges = array.NewIntArray(0);
return c; return c;
} }
// --- ANY any character // --- ANY any character
type Any struct { type iAny struct {
Common iCommon
} }
func (any *Any) Type() int { return ANY } func (any *iAny) Type() int { return cANY }
func (any *Any) Print() { print("any") } func (any *iAny) Print() { print("any") }
// --- BRA parenthesized expression // --- BRA parenthesized expression
type Bra struct { type iBra struct {
Common; iCommon;
n int; // subexpression number n int; // subexpression number
} }
func (bra *Bra) Type() int { return BRA } func (bra *iBra) Type() int { return cBRA }
func (bra *Bra) Print() { print("bra", bra.n); } func (bra *iBra) Print() { print("bra", bra.n); }
// --- EBRA end of parenthesized expression // --- EBRA end of parenthesized expression
type Ebra struct { type iEbra struct {
Common; iCommon;
n int; // subexpression number n int; // subexpression number
} }
func (ebra *Ebra) Type() int { return EBRA } func (ebra *iEbra) Type() int { return cEBRA }
func (ebra *Ebra) Print() { print("ebra ", ebra.n); } func (ebra *iEbra) Print() { print("ebra ", ebra.n); }
// --- ALT alternation // --- ALT alternation
type Alt struct { type iAlt struct {
Common; iCommon;
left Inst; // other branch left instr; // other branch
} }
func (alt *Alt) Type() int { return ALT } func (alt *iAlt) Type() int { return cALT }
func (alt *Alt) Print() { print("alt(", alt.left.Index(), ")"); } func (alt *iAlt) Print() { print("alt(", alt.left.Index(), ")"); }
// --- NOP no operation // --- NOP no operation
type Nop struct { type iNop struct {
Common iCommon
} }
func (nop *Nop) Type() int { return NOP } func (nop *iNop) Type() int { return cNOP }
func (nop *Nop) Print() { print("nop") } func (nop *iNop) Print() { print("nop") }
// report error and exit compiling/executing goroutine // report error and exit compiling/executing goroutine
func (re *RE) Error(err *os.Error) { func (re *regExp) Error(err *os.Error) {
re.error = err; re.error = err;
re.ch <- re; re.ch <- re;
sys.goexit(); sys.goexit();
} }
func (re *RE) Add(i Inst) Inst { func (re *regExp) Add(i instr) instr {
i.SetIndex(re.inst.Len()); i.SetIndex(re.inst.Len());
re.inst.Push(i); re.inst.Push(i);
return i; return i;
} }
type Parser struct { type parser struct {
re *RE; re *regExp;
nlpar int; // number of unclosed lpars nlpar int; // number of unclosed lpars
pos int; pos int;
ch int; ch int;
} }
const EOF = -1 const endOfFile = -1
func (p *Parser) c() int { func (p *parser) c() int {
return p.ch; return p.ch;
} }
func (p *Parser) nextc() int { func (p *parser) nextc() int {
if p.pos >= len(p.re.expr) { if p.pos >= len(p.re.expr) {
p.ch = EOF p.ch = endOfFile
} else { } else {
c, w := sys.stringtorune(p.re.expr, p.pos); c, w := sys.stringtorune(p.re.expr, p.pos);
p.ch = c; p.ch = c;
...@@ -248,11 +248,11 @@ func (p *Parser) nextc() int { ...@@ -248,11 +248,11 @@ func (p *Parser) nextc() int {
return p.ch; return p.ch;
} }
func NewParser(re *RE) *Parser { func newParser(re *regExp) *parser {
parser := new(Parser); p := new(parser);
parser.re = re; p.re = re;
parser.nextc(); // load p.ch p.nextc(); // load p.ch
return parser; return p;
} }
/* /*
...@@ -274,9 +274,9 @@ Grammar: ...@@ -274,9 +274,9 @@ Grammar:
*/ */
func (p *Parser) Regexp() (start, end Inst) func (p *parser) Regexp() (start, end instr)
var NULL Inst var iNULL instr
func special(c int) bool { func special(c int) bool {
s := `\.+*?()|[]`; s := `\.+*?()|[]`;
...@@ -298,8 +298,8 @@ func specialcclass(c int) bool { ...@@ -298,8 +298,8 @@ func specialcclass(c int) bool {
return false return false
} }
func (p *Parser) CharClass() Inst { func (p *parser) CharClass() instr {
cc := NewCharClass(); cc := newCharClass();
p.re.Add(cc); p.re.Add(cc);
if p.c() == '^' { if p.c() == '^' {
cc.negate = true; cc.negate = true;
...@@ -308,7 +308,7 @@ func (p *Parser) CharClass() Inst { ...@@ -308,7 +308,7 @@ func (p *Parser) CharClass() Inst {
left := -1; left := -1;
for { for {
switch c := p.c(); c { switch c := p.c(); c {
case ']', EOF: case ']', endOfFile:
if left >= 0 { if left >= 0 {
p.re.Error(ErrBadRange); p.re.Error(ErrBadRange);
} }
...@@ -318,7 +318,7 @@ func (p *Parser) CharClass() Inst { ...@@ -318,7 +318,7 @@ func (p *Parser) CharClass() Inst {
case '\\': case '\\':
c = p.nextc(); c = p.nextc();
switch { switch {
case c == EOF: case c == endOfFile:
p.re.Error(ErrExtraneousBackslash); p.re.Error(ErrExtraneousBackslash);
case c == 'n': case c == 'n':
c = '\n'; c = '\n';
...@@ -346,33 +346,33 @@ func (p *Parser) CharClass() Inst { ...@@ -346,33 +346,33 @@ func (p *Parser) CharClass() Inst {
} }
} }
} }
return NULL return iNULL
} }
func (p *Parser) Term() (start, end Inst) { func (p *parser) Term() (start, end instr) {
switch c := p.c(); c { switch c := p.c(); c {
case '|', EOF: case '|', endOfFile:
return NULL, NULL; return iNULL, iNULL;
case '*', '+': case '*', '+':
p.re.Error(ErrBareClosure); p.re.Error(ErrBareClosure);
case ')': case ')':
if p.nlpar == 0 { if p.nlpar == 0 {
p.re.Error(ErrUnmatchedRpar); p.re.Error(ErrUnmatchedRpar);
} }
return NULL, NULL; return iNULL, iNULL;
case ']': case ']':
p.re.Error(ErrUnmatchedRbkt); p.re.Error(ErrUnmatchedRbkt);
case '^': case '^':
p.nextc(); p.nextc();
start = p.re.Add(new(Bot)); start = p.re.Add(new(iBot));
return start, start; return start, start;
case '$': case '$':
p.nextc(); p.nextc();
start = p.re.Add(new(Eot)); start = p.re.Add(new(iEot));
return start, start; return start, start;
case '.': case '.':
p.nextc(); p.nextc();
start = p.re.Add(new(Any)); start = p.re.Add(new(iAny));
return start, start; return start, start;
case '[': case '[':
p.nextc(); p.nextc();
...@@ -393,14 +393,14 @@ func (p *Parser) Term() (start, end Inst) { ...@@ -393,14 +393,14 @@ func (p *Parser) Term() (start, end Inst) {
} }
p.nlpar--; p.nlpar--;
p.nextc(); p.nextc();
bra := new(Bra); bra := new(iBra);
p.re.Add(bra); p.re.Add(bra);
ebra := new(Ebra); ebra := new(iEbra);
p.re.Add(ebra); p.re.Add(ebra);
bra.n = nbra; bra.n = nbra;
ebra.n = nbra; ebra.n = nbra;
if start == NULL { if start == iNULL {
if end == NULL { p.re.Error(ErrInternal) } if end == iNULL { p.re.Error(ErrInternal) }
start = ebra start = ebra
} else { } else {
end.SetNext(ebra); end.SetNext(ebra);
...@@ -410,7 +410,7 @@ func (p *Parser) Term() (start, end Inst) { ...@@ -410,7 +410,7 @@ func (p *Parser) Term() (start, end Inst) {
case '\\': case '\\':
c = p.nextc(); c = p.nextc();
switch { switch {
case c == EOF: case c == endOfFile:
p.re.Error(ErrExtraneousBackslash); p.re.Error(ErrExtraneousBackslash);
case c == 'n': case c == 'n':
c = '\n'; c = '\n';
...@@ -422,22 +422,22 @@ func (p *Parser) Term() (start, end Inst) { ...@@ -422,22 +422,22 @@ func (p *Parser) Term() (start, end Inst) {
fallthrough; fallthrough;
default: default:
p.nextc(); p.nextc();
start = NewChar(c); start = newChar(c);
p.re.Add(start); p.re.Add(start);
return start, start return start, start
} }
panic("unreachable"); panic("unreachable");
} }
func (p *Parser) Closure() (start, end Inst) { func (p *parser) Closure() (start, end instr) {
start, end = p.Term(); start, end = p.Term();
if start == NULL { if start == iNULL {
return return
} }
switch p.c() { switch p.c() {
case '*': case '*':
// (start,end)*: // (start,end)*:
alt := new(Alt); alt := new(iAlt);
p.re.Add(alt); p.re.Add(alt);
end.SetNext(alt); // after end, do alt end.SetNext(alt); // after end, do alt
alt.left = start; // alternate brach: return to start alt.left = start; // alternate brach: return to start
...@@ -445,16 +445,16 @@ func (p *Parser) Closure() (start, end Inst) { ...@@ -445,16 +445,16 @@ func (p *Parser) Closure() (start, end Inst) {
end = alt; end = alt;
case '+': case '+':
// (start,end)+: // (start,end)+:
alt := new(Alt); alt := new(iAlt);
p.re.Add(alt); p.re.Add(alt);
end.SetNext(alt); // after end, do alt end.SetNext(alt); // after end, do alt
alt.left = start; // alternate brach: return to start alt.left = start; // alternate brach: return to start
end = alt; // start is unchanged; end is alt end = alt; // start is unchanged; end is alt
case '?': case '?':
// (start,end)?: // (start,end)?:
alt := new(Alt); alt := new(iAlt);
p.re.Add(alt); p.re.Add(alt);
nop := new(Nop); nop := new(iNop);
p.re.Add(nop); p.re.Add(nop);
alt.left = start; // alternate branch is start alt.left = start; // alternate branch is start
alt.next = nop; // follow on to nop alt.next = nop; // follow on to nop
...@@ -471,18 +471,18 @@ func (p *Parser) Closure() (start, end Inst) { ...@@ -471,18 +471,18 @@ func (p *Parser) Closure() (start, end Inst) {
return return
} }
func (p *Parser) Concatenation() (start, end Inst) { func (p *parser) Concatenation() (start, end instr) {
start, end = NULL, NULL; start, end = iNULL, iNULL;
for { for {
nstart, nend := p.Closure(); nstart, nend := p.Closure();
switch { switch {
case nstart == NULL: // end of this concatenation case nstart == iNULL: // end of this concatenation
if start == NULL { // this is the empty string if start == iNULL { // this is the empty string
nop := p.re.Add(new(Nop)); nop := p.re.Add(new(iNop));
return nop, nop; return nop, nop;
} }
return; return;
case start == NULL: // this is first element of concatenation case start == iNULL: // this is first element of concatenation
start, end = nstart, nend; start, end = nstart, nend;
default: default:
end.SetNext(nstart); end.SetNext(nstart);
...@@ -492,7 +492,7 @@ func (p *Parser) Concatenation() (start, end Inst) { ...@@ -492,7 +492,7 @@ func (p *Parser) Concatenation() (start, end Inst) {
panic("unreachable"); panic("unreachable");
} }
func (p *Parser) Regexp() (start, end Inst) { func (p *parser) Regexp() (start, end instr) {
start, end = p.Concatenation(); start, end = p.Concatenation();
for { for {
switch p.c() { switch p.c() {
...@@ -501,11 +501,11 @@ func (p *Parser) Regexp() (start, end Inst) { ...@@ -501,11 +501,11 @@ func (p *Parser) Regexp() (start, end Inst) {
case '|': case '|':
p.nextc(); p.nextc();
nstart, nend := p.Concatenation(); nstart, nend := p.Concatenation();
alt := new(Alt); alt := new(iAlt);
p.re.Add(alt); p.re.Add(alt);
alt.left = start; alt.left = start;
alt.next = nstart; alt.next = nstart;
nop := new(Nop); nop := new(iNop);
p.re.Add(nop); p.re.Add(nop);
end.SetNext(nop); end.SetNext(nop);
nend.SetNext(nop); nend.SetNext(nop);
...@@ -515,47 +515,47 @@ func (p *Parser) Regexp() (start, end Inst) { ...@@ -515,47 +515,47 @@ func (p *Parser) Regexp() (start, end Inst) {
panic("unreachable"); panic("unreachable");
} }
func UnNop(i Inst) Inst { func UnNop(i instr) instr {
for i.Type() == NOP { for i.Type() == cNOP {
i = i.Next() i = i.Next()
} }
return i return i
} }
func (re *RE) EliminateNops() { func (re *regExp) EliminateNops() {
for i := 0; i < re.inst.Len(); i++ { for i := 0; i < re.inst.Len(); i++ {
inst := re.inst.At(i).(Inst); inst := re.inst.At(i).(instr);
if inst.Type() == END { if inst.Type() == cEND {
continue continue
} }
inst.SetNext(UnNop(inst.Next())); inst.SetNext(UnNop(inst.Next()));
if inst.Type() == ALT { if inst.Type() == cALT {
alt := inst.(*Alt); alt := inst.(*iAlt);
alt.left = UnNop(alt.left); alt.left = UnNop(alt.left);
} }
} }
} }
func (re *RE) Dump() { func (re *regExp) Dump() {
for i := 0; i < re.inst.Len(); i++ { for i := 0; i < re.inst.Len(); i++ {
inst := re.inst.At(i).(Inst); inst := re.inst.At(i).(instr);
print(inst.Index(), ": "); print(inst.Index(), ": ");
inst.Print(); inst.Print();
if inst.Type() != END { if inst.Type() != cEND {
print(" -> ", inst.Next().Index()) print(" -> ", inst.Next().Index())
} }
print("\n"); print("\n");
} }
} }
func (re *RE) DoParse() { func (re *regExp) DoParse() {
parser := NewParser(re); p := newParser(re);
start := new(Start); start := new(iStart);
re.Add(start); re.Add(start);
s, e := parser.Regexp(); s, e := p.Regexp();
start.next = s; start.next = s;
re.start = start; re.start = start;
e.SetNext(re.Add(new(End))); e.SetNext(re.Add(new(iEnd)));
if debug { if debug {
re.Dump(); re.Dump();
...@@ -571,8 +571,8 @@ func (re *RE) DoParse() { ...@@ -571,8 +571,8 @@ func (re *RE) DoParse() {
} }
func Compiler(str string, ch chan *RE) { func Compiler(str string, ch chan *regExp) {
re := new(RE); re := new(regExp);
re.expr = str; re.expr = str;
re.inst = array.New(0); re.inst = array.New(0);
re.ch = ch; re.ch = ch;
...@@ -589,20 +589,20 @@ export type Regexp interface { ...@@ -589,20 +589,20 @@ export type Regexp interface {
// Compile in separate goroutine; wait for result // Compile in separate goroutine; wait for result
export func Compile(str string) (regexp Regexp, error *os.Error) { export func Compile(str string) (regexp Regexp, error *os.Error) {
ch := make(chan *RE); ch := make(chan *regExp);
go Compiler(str, ch); go Compiler(str, ch);
re := <-ch; re := <-ch;
return re, re.error return re, re.error
} }
type State struct { type state struct {
inst Inst; // next instruction to execute inst instr; // next instruction to execute
match []int; // pairs of bracketing submatches. 0th is start,end match []int; // pairs of bracketing submatches. 0th is start,end
} }
// Append new state to to-do list. Leftmost-longest wins so avoid // Append new state to to-do list. Leftmost-longest wins so avoid
// adding a state that's already active. // adding a state that's already active.
func AddState(s []State, inst Inst, match []int) []State { func addState(s []state, inst instr, match []int) []state {
index := inst.Index(); index := inst.Index();
l := len(s); l := len(s);
pos := match[0]; pos := match[0];
...@@ -615,7 +615,7 @@ func AddState(s []State, inst Inst, match []int) []State { ...@@ -615,7 +615,7 @@ func AddState(s []State, inst Inst, match []int) []State {
} }
} }
if l == cap(s) { if l == cap(s) {
s1 := make([]State, 2*l)[0:l]; s1 := make([]state, 2*l)[0:l];
for i := 0; i < l; i++ { for i := 0; i < l; i++ {
s1[i] = s[i]; s1[i] = s[i];
} }
...@@ -627,12 +627,12 @@ func AddState(s []State, inst Inst, match []int) []State { ...@@ -627,12 +627,12 @@ func AddState(s []State, inst Inst, match []int) []State {
return s; return s;
} }
func (re *RE) DoExecute(str string, pos int) []int { func (re *regExp) DoExecute(str string, pos int) []int {
var s [2][]State; // TODO: use a vector when State values (not ptrs) can be vector elements var s [2][]state; // TODO: use a vector when state values (not ptrs) can be vector elements
s[0] = make([]State, 10)[0:0]; s[0] = make([]state, 10)[0:0];
s[1] = make([]State, 10)[0:0]; s[1] = make([]state, 10)[0:0];
in, out := 0, 1; in, out := 0, 1;
var final State; var final state;
found := false; found := false;
for pos <= len(str) { for pos <= len(str) {
if !found { if !found {
...@@ -642,7 +642,7 @@ func (re *RE) DoExecute(str string, pos int) []int { ...@@ -642,7 +642,7 @@ func (re *RE) DoExecute(str string, pos int) []int {
match[i] = -1; // no match seen; catches cases like "a(b)?c" on "ac" match[i] = -1; // no match seen; catches cases like "a(b)?c" on "ac"
} }
match[0] = pos; match[0] = pos;
s[out] = AddState(s[out], re.start.Next(), match); s[out] = addState(s[out], re.start.Next(), match);
} }
in, out = out, in; // old out state is new in state in, out = out, in; // old out state is new in state
s[out] = s[out][0:0]; // clear out state s[out] = s[out][0:0]; // clear out state
...@@ -651,60 +651,60 @@ func (re *RE) DoExecute(str string, pos int) []int { ...@@ -651,60 +651,60 @@ func (re *RE) DoExecute(str string, pos int) []int {
break; break;
} }
charwidth := 1; charwidth := 1;
c := EOF; c := endOfFile;
if pos < len(str) { if pos < len(str) {
c, charwidth = sys.stringtorune(str, pos); c, charwidth = sys.stringtorune(str, pos);
} }
for i := 0; i < len(s[in]); i++ { for i := 0; i < len(s[in]); i++ {
state := s[in][i]; st := s[in][i];
switch s[in][i].inst.Type() { switch s[in][i].inst.Type() {
case BOT: case cBOT:
if pos == 0 { if pos == 0 {
s[in] = AddState(s[in], state.inst.Next(), state.match) s[in] = addState(s[in], st.inst.Next(), st.match)
} }
case EOT: case cEOT:
if pos == len(str) { if pos == len(str) {
s[in] = AddState(s[in], state.inst.Next(), state.match) s[in] = addState(s[in], st.inst.Next(), st.match)
} }
case CHAR: case cCHAR:
if c == state.inst.(*Char).char { if c == st.inst.(*iChar).char {
s[out] = AddState(s[out], state.inst.Next(), state.match) s[out] = addState(s[out], st.inst.Next(), st.match)
} }
case CHARCLASS: case cCHARCLASS:
if state.inst.(*CharClass).Matches(c) { if st.inst.(*iCharClass).Matches(c) {
s[out] = AddState(s[out], state.inst.Next(), state.match) s[out] = addState(s[out], st.inst.Next(), st.match)
} }
case ANY: case cANY:
if c != EOF { if c != endOfFile {
s[out] = AddState(s[out], state.inst.Next(), state.match) s[out] = addState(s[out], st.inst.Next(), st.match)
} }
case BRA: case cBRA:
n := state.inst.(*Bra).n; n := st.inst.(*iBra).n;
state.match[2*n] = pos; st.match[2*n] = pos;
s[in] = AddState(s[in], state.inst.Next(), state.match); s[in] = addState(s[in], st.inst.Next(), st.match);
case EBRA: case cEBRA:
n := state.inst.(*Ebra).n; n := st.inst.(*iEbra).n;
state.match[2*n+1] = pos; st.match[2*n+1] = pos;
s[in] = AddState(s[in], state.inst.Next(), state.match); s[in] = addState(s[in], st.inst.Next(), st.match);
case ALT: case cALT:
s[in] = AddState(s[in], state.inst.(*Alt).left, state.match); s[in] = addState(s[in], st.inst.(*iAlt).left, st.match);
// give other branch a copy of this match vector // give other branch a copy of this match vector
s1 := make([]int, 2*(re.nbra+1)); s1 := make([]int, 2*(re.nbra+1));
for i := 0; i < len(s1); i++ { for i := 0; i < len(s1); i++ {
s1[i] = state.match[i] s1[i] = st.match[i]
} }
s[in] = AddState(s[in], state.inst.Next(), s1); s[in] = addState(s[in], st.inst.Next(), s1);
case END: case cEND:
// choose leftmost longest // choose leftmost longest
if !found || // first if !found || // first
state.match[0] < final.match[0] || // leftmost st.match[0] < final.match[0] || // leftmost
(state.match[0] == final.match[0] && pos > final.match[1]) { // longest (st.match[0] == final.match[0] && pos > final.match[1]) { // longest
final = state; final = st;
final.match[1] = pos; final.match[1] = pos;
} }
found = true; found = true;
default: default:
state.inst.Print(); st.inst.Print();
panic("unknown instruction in execute"); panic("unknown instruction in execute");
} }
} }
...@@ -714,17 +714,17 @@ func (re *RE) DoExecute(str string, pos int) []int { ...@@ -714,17 +714,17 @@ func (re *RE) DoExecute(str string, pos int) []int {
} }
func (re *RE) Execute(s string) []int { func (re *regExp) Execute(s string) []int {
return re.DoExecute(s, 0) return re.DoExecute(s, 0)
} }
func (re *RE) Match(s string) bool { func (re *regExp) Match(s string) bool {
return len(re.DoExecute(s, 0)) > 0 return len(re.DoExecute(s, 0)) > 0
} }
func (re *RE) MatchStrings(s string) []string { func (re *regExp) MatchStrings(s string) []string {
r := re.DoExecute(s, 0); r := re.DoExecute(s, 0);
if r == nil { if r == nil {
return nil return nil
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment