Commit a0e4c1bf authored by Guido van Rossum's avatar Guido van Rossum

Jeffrey's latest -- seems to solve most problems!

parent 75fce308
......@@ -60,6 +60,7 @@ def valid_identifier(id):
_cache = {}
_MAXCACHE = 20
def _cachecompile(pattern, flags):
key = (pattern, flags)
try:
......@@ -74,16 +75,16 @@ def _cachecompile(pattern, flags):
def match(pattern, string, flags=0):
return _cachecompile(pattern, flags).match(string)
def search(pattern, string, flags=0):
return _cachecompile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0):
return _cachecompile(pattern).sub(repl, string, count)
def subn(pattern, repl, string, count=0):
return _cachecompile(pattern).subn(repl, string, count)
def split(pattern, string, maxsplit=0):
return _cachecompile(pattern).subn(string, maxsplit)
......@@ -100,12 +101,16 @@ class RegexObject:
self.groupindex = groupindex
self.callouts = callouts
self.fastmap = build_fastmap(code)
if code[0].name == 'bol':
self.anchor = 1
elif code[0].name == 'begbuf':
self.anchor = 2
else:
self.anchor = 0
self.buffer = assemble(code)
def search(self, string, pos=0):
regs = reop.search(self.buffer,
......@@ -118,10 +123,12 @@ class RegexObject:
pos)
if regs is None:
return None
return MatchObject(self,
string,
pos,
regs)
def match(self, string, pos=0):
regs = reop.match(self.buffer,
self.num_regs,
......@@ -133,14 +140,18 @@ class RegexObject:
pos)
if regs is None:
return None
return MatchObject(self,
string,
pos,
regs)
def sub(self, repl, string, count=0):
pass
def subn(self, repl, string, count=0):
pass
def split(self, string, maxsplit=0):
pass
......@@ -150,6 +161,7 @@ class MatchObject:
self.string = string
self.pos = pos
self.regs = regs
def start(self, g):
if type(g) == type(''):
try:
......@@ -157,6 +169,7 @@ class MatchObject:
except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined')
return self.regs[g][0]
def end(self, g):
if type(g) == type(''):
try:
......@@ -164,6 +177,7 @@ class MatchObject:
except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined')
return self.regs[g][1]
def span(self, g):
if type(g) == type(''):
try:
......@@ -171,6 +185,7 @@ class MatchObject:
except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined')
return self.regs[g]
def group(self, *groups):
if len(groups) == 0:
groups = range(1, self.re.num_regs)
......@@ -339,7 +354,7 @@ class UpdateFailureJump(JumpInstruction):
JumpInstruction.__init__(self, chr(12), label)
class DummyFailureJump(JumpInstruction):
name = 'update_failure_jump'
name = 'dummy_failure_jump'
def __init__(self, label):
JumpInstruction.__init__(self, chr(13), label)
......@@ -764,11 +779,34 @@ def expand_escape(pattern, index, context=NORMAL):
def compile(pattern, flags=0):
stack = []
index = 0
label = 0
register = 1
groupindex = {}
callouts = []
# preprocess the pattern looking for embedded pattern modifiers
index = 0
while (index != -1):
index = string.find(pattern, '(?', index)
if index != -1:
index = index + 2
if (index < len(pattern)) and (pattern[index] in 'iImMsSxX'):
while (index < len(pattern)) and (pattern[index] != ')'):
if pattern[index] in 'iI':
flags = flags | IGNORECASE
elif pattern[index] in 'mM':
flags = flags | MULTILINE
elif pattern[index] in 'sS':
flags = flags | DOTALL
elif pattern[index] in 'xX':
flags = flags | VERBOSE
else:
raise error, 'unknown flag'
index = index + 1
index = 0
while (index < len(pattern)):
char = pattern[index]
index = index + 1
......@@ -809,12 +847,6 @@ def compile(pattern, flags=0):
raise error, 'unknown escape type'
elif char == '|':
if len(stack) == 0:
raise error, 'alternate with nothing on the left'
if stack[-1][0].name == '(':
raise error, 'alternate with nothing on the left in the group'
if stack[-1][0].name == '|':
raise error, 'alternates with nothing inbetween them'
expr = []
while (len(stack) != 0) and \
......@@ -915,17 +947,10 @@ def compile(pattern, flags=0):
'assertion is unsupported')
elif pattern[index] in 'iImMsSxX':
# ignore embedded pattern modifiers here, they
# have already been taken care of in the
# preprocessing
while (index < len(pattern)) and (pattern[index] != ')'):
if pattern[index] in 'iI':
flags = flags | IGNORECASE
elif pattern[index] in 'mM':
flags = flags | MULTILINE
elif pattern[index] in 'sS':
flags = flags | DOTALL
elif pattern[index] in 'xX':
flags = flags | VERBOSE
else:
raise error, 'unknown flag'
index = index + 1
index = index + 1
......@@ -947,13 +972,6 @@ def compile(pattern, flags=0):
if len(stack) == 0:
raise error, 'too many close parens'
if len(expr) == 0:
raise error, 'nothing inside parens'
# check to see if alternation used correctly
if (expr[-1].name == '|'):
raise error, 'alternate with nothing on the right'
# remove markers left by alternation
expr = filter(lambda x: x.name != '|', expr)
......@@ -1023,18 +1041,17 @@ def compile(pattern, flags=0):
while min > 0:
expr = expr + stack[-1]
min = min - 1
registers = registers_used(stack[-1])
if minimal:
expr = expr + \
([Jump(label + 1),
Label(label)] + \
stack[-1] + \
[Label(label + 1),
FailureJump(label, registers)])
FailureJump(label)])
else:
expr = expr + \
([Label(label),
FailureJump(label + 1, registers)] +
FailureJump(label + 1)] +
stack[-1] +
[StarJump(label),
Label(label + 1)])
......@@ -1109,7 +1126,7 @@ def compile(pattern, flags=0):
registers = registers_used(stack[-1])
if (index < len(pattern)) and (pattern[index] == '?'):
# non-greedy matching
expr = [JumpInstructions(label + 1),
expr = [Jump(label + 1),
Label(label)] + \
stack[-1] + \
[Label(label + 1),
......@@ -1130,9 +1147,10 @@ def compile(pattern, flags=0):
# positive closure
if len(stack) == 0:
raise error, '+ needs something to repeat'
if (stack[-1][0].name == '(') or (stack[-1][0].name == '|'):
raise error, '+ needs something to repeat'
registers = registers_used(stack[-1])
if (index < len(pattern)) and (pattern[index] == '?'):
# non-greedy
expr = [Label(label)] + \
......@@ -1156,7 +1174,6 @@ def compile(pattern, flags=0):
elif char == '?':
if len(stack) == 0:
raise error, 'need something to be optional'
registers = registers_used(stack[-1])
if (index < len(pattern)) and (pattern[index] == '?'):
# non-greedy matching
expr = [FailureJump(label),
......@@ -1177,7 +1194,7 @@ def compile(pattern, flags=0):
elif char == '.':
if flags & DOTALL:
stack.append(Set(map(chr, range(256))))
stack.append([Set(map(chr, range(256)))])
else:
stack.append([AnyChar()])
......@@ -1337,8 +1354,6 @@ def compile(pattern, flags=0):
del stack[-1]
if len(code) == 0:
raise error, 'no code generated'
if (code[-1].name == '|'):
raise error, 'alternate with nothing on the right'
code = filter(lambda x: x.name != '|', code)
need_label = 0
for i in range(len(code)):
......
......@@ -59,7 +59,6 @@ test_re
('ab|cd', 'abc', 0, 'found', 'ab')
('ab|cd', 'abcd', 0, 'found', 'ab')
('()ef', 'def', 0, 'found+"-"+g1', 'ef-')
=== Syntax error: ('()ef', 'def', 0, 'found+"-"+g1', 'ef-')
('$b', 'b', 1)
('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-Error')
('a\\(*b', 'ab', 0, 'found', 'ab')
......@@ -84,7 +83,6 @@ test_re
('[abhgefdc]ij', 'hij', 0, 'found', 'hij')
('^(ab|cd)e', 'abcde', 1, 'xg1y', 'xy')
('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
=== Syntax error: ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
('(a|b)c*d', 'abcd', 0, 'found+"-"+g1', 'bcd-b')
('(ab|ab*)bc', 'abc', 0, 'found+"-"+g1', 'abc-a')
('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc')
......@@ -155,15 +153,12 @@ test_re
('ab*bc', 'abbc', 0, 'found', 'abbc')
('ab*bc', 'abbbbc', 0, 'found', 'abbbbc')
('ab{0,}bc', 'abbbbc', 0, 'found', 'abbbbc')
*** Unexpected error ***
('ab+bc', 'abbc', 0, 'found', 'abbc')
('ab+bc', 'abc', 1)
('ab+bc', 'abq', 1)
('ab{1,}bc', 'abq', 1)
*** Unexpected error ***
('ab+bc', 'abbbbc', 0, 'found', 'abbbbc')
('ab{1,}bc', 'abbbbc', 0, 'found', 'abbbbc')
*** Unexpected error ***
('ab{1,3}bc', 'abbbbc', 0, 'found', 'abbbbc')
('ab{3,4}bc', 'abbbbc', 0, 'found', 'abbbbc')
('ab{4,5}bc', 'abbbbc', 1)
......@@ -205,13 +200,11 @@ test_re
('ab|cd', 'abc', 0, 'found', 'ab')
('ab|cd', 'abcd', 0, 'found', 'ab')
('()ef', 'def', 0, 'found+"-"+g1', 'ef-')
=== Syntax error: ('()ef', 'def', 0, 'found+"-"+g1', 'ef-')
('*a', '-', 2)
('(*)b', '-', 2)
('$b', 'b', 1)
('a\\', '-', 2)
('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-')
=== grouping error ('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-') 'a(b-Error' should be 'a(b-'
('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-Error')
('a\\(*b', 'ab', 0, 'found', 'ab')
('a\\(*b', 'a((b', 0, 'found', 'a((b')
('a\\\\b', 'a\\b', 0, 'found', 'a\\b')
......@@ -221,14 +214,11 @@ test_re
('(a)b(c)', 'abc', 0, 'found+"-"+g1+"-"+g2', 'abc-a-c')
('a+b+c', 'aabbabc', 0, 'found', 'abc')
('a{1,}b{1,}c', 'aabbabc', 0, 'found', 'abc')
*** Unexpected error ***
('a.+?c', 'abcabc', 0, 'found', 'abc')
('(a+|b)*', 'ab', 0, 'found+"-"+g1', 'ab-b')
('(a+|b){0,}', 'ab', 0, 'found+"-"+g1', 'ab-b')
*** Unexpected error ***
('(a+|b)+', 'ab', 0, 'found+"-"+g1', 'ab-b')
('(a+|b){1,}', 'ab', 0, 'found+"-"+g1', 'ab-b')
*** Unexpected error ***
('(a+|b)?', 'ab', 0, 'found+"-"+g1', 'a-a')
('(a+|b){0,1}', 'ab', 0, 'found+"-"+g1', 'a-a')
(')(', '-', 2)
......@@ -246,7 +236,6 @@ test_re
('[abhgefdc]ij', 'hij', 0, 'found', 'hij')
('^(ab|cd)e', 'abcde', 1)
('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
=== Syntax error: ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
('(a|b)c*d', 'abcd', 0, 'found+"-"+g1', 'bcd-b')
('(ab|ab*)bc', 'abc', 0, 'found+"-"+g1', 'abc-a')
('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc')
......@@ -258,260 +247,159 @@ test_re
('(ab|a)b*c', 'abc', 0, 'found+"-"+g1', 'abc-ab')
('((a)(b)c)(d)', 'abcd', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d')
('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', 0, 'found', 'alpha')
('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-')
=== grouping error ('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-') 'bh-None' should be 'bh-'
('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-')
=== grouping error ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-') 'effgz-effgz-None' should be 'effgz-effgz-'
('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-None')
('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None')
('(bc+d$|ef*g.|h?i(j|k))', 'ij', 0, 'found+"-"+g1+"-"+g2', 'ij-ij-j')
('(bc+d$|ef*g.|h?i(j|k))', 'effg', 1)
('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', 1)
('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-')
=== grouping error ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-') 'effgz-effgz-None' should be 'effgz-effgz-'
('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None')
('((((((((((a))))))))))', 'a', 0, 'g10', 'a')
('((((((((((a))))))))))\\10', 'aa', 0, 'found', 'aa')
('((((((((((a))))))))))\\41', 'aa', 1)
=== Syntax error: ('((((((((((a))))))))))\\41', 'aa', 1)
('((((((((((a))))))))))\\41', 'a!', 0, 'found', 'a!')
=== Syntax error: ('((((((((((a))))))))))\\41', 'a!', 0, 'found', 'a!')
('(((((((((a)))))))))', 'a', 0, 'found', 'a')
('multiple words of text', 'uh-uh', 1)
('multiple words', 'multiple words, yeah', 0, 'found', 'multiple words')
('(.*)c(.*)', 'abcde', 0, 'found+"-"+g1+"-"+g2', 'abcde-ab-de')
('\\((.*), (.*)\\)', '(a, b)', 0, '(g2, g1)', '(b, a)')
=== grouping error ('\\((.*), (.*)\\)', '(a, b)', 0, '(g2, g1)', '(b, a)') ('b', 'a') should be '(b, a)'
('\\((.*), (.*)\\)', '(a, b)', 0, 'g2+"-"+g1', 'b-a')
('[k]', 'ab', 1)
('a[-]?c', 'ac', 0, 'found', 'ac')
('(abc)\\1', 'abcabc', 0, 'g1', 'abc')
('([a-c]*)\\1', 'abcabc', 0, 'g1', 'abc')
('(?i)abc', 'ABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)abc', 'ABC', 0, 'found', 'ABC')
('(?i)abc', 'XBC', 1)
('(?i)abc', 'AXC', 1)
('(?i)abc', 'ABX', 1)
('(?i)abc', 'XABCY', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)abc', 'XABCY', 0, 'found', 'ABC')
('(?i)abc', 'ABABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)abc', 'ABABC', 0, 'found', 'ABC')
('(?i)ab*c', 'ABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)ab*c', 'ABC', 0, 'found', 'ABC')
('(?i)ab*bc', 'ABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)ab*bc', 'ABC', 0, 'found', 'ABC')
('(?i)ab*bc', 'ABBC', 0, 'found', 'ABBC')
=== Failed incorrectly ('(?i)ab*bc', 'ABBC', 0, 'found', 'ABBC')
('(?i)ab*?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
*** Unexpected error ***
('(?i)ab{0,}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
*** Unexpected error ***
('(?i)ab+?bc', 'ABBC', 0, 'found', 'ABBC')
=== Failed incorrectly ('(?i)ab+?bc', 'ABBC', 0, 'found', 'ABBC')
('(?i)ab+bc', 'ABC', 1)
('(?i)ab+bc', 'ABQ', 1)
('(?i)ab{1,}bc', 'ABQ', 1)
*** Unexpected error ***
('(?i)ab+bc', 'ABBBBC', 0, 'found', 'ABBBBC')
=== Failed incorrectly ('(?i)ab+bc', 'ABBBBC', 0, 'found', 'ABBBBC')
('(?i)ab{1,}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
*** Unexpected error ***
('(?i)ab{1,3}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
=== Failed incorrectly ('(?i)ab{1,3}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
('(?i)ab{3,4}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
=== Failed incorrectly ('(?i)ab{3,4}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
('(?i)ab{4,5}?bc', 'ABBBBC', 1)
('(?i)ab??bc', 'ABBC', 0, 'found', 'ABBC')
=== Failed incorrectly ('(?i)ab??bc', 'ABBC', 0, 'found', 'ABBC')
('(?i)ab??bc', 'ABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)ab??bc', 'ABC', 0, 'found', 'ABC')
('(?i)ab{0,1}?bc', 'ABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)ab{0,1}?bc', 'ABC', 0, 'found', 'ABC')
('(?i)ab??bc', 'ABBBBC', 1)
('(?i)ab??c', 'ABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)ab??c', 'ABC', 0, 'found', 'ABC')
('(?i)ab{0,1}?c', 'ABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)ab{0,1}?c', 'ABC', 0, 'found', 'ABC')
('(?i)^abc$', 'ABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)^abc$', 'ABC', 0, 'found', 'ABC')
('(?i)^abc$', 'ABCC', 1)
('(?i)^abc', 'ABCC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)^abc', 'ABCC', 0, 'found', 'ABC')
('(?i)^abc$', 'AABC', 1)
('(?i)abc$', 'AABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)abc$', 'AABC', 0, 'found', 'ABC')
('(?i)^', 'ABC', 0, 'found', '')
('(?i)$', 'ABC', 0, 'found', '')
('(?i)a.c', 'ABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)a.c', 'ABC', 0, 'found', 'ABC')
('(?i)a.c', 'AXC', 0, 'found', 'AXC')
=== Failed incorrectly ('(?i)a.c', 'AXC', 0, 'found', 'AXC')
('(?i)a.*?c', 'AXYZC', 0, 'found', 'AXYZC')
*** Unexpected error ***
('(?i)a.*c', 'AXYZD', 1)
('(?i)a[bc]d', 'ABC', 1)
('(?i)a[bc]d', 'ABD', 0, 'found', 'ABD')
=== Failed incorrectly ('(?i)a[bc]d', 'ABD', 0, 'found', 'ABD')
('(?i)a[b-d]e', 'ABD', 1)
('(?i)a[b-d]e', 'ACE', 0, 'found', 'ACE')
=== Failed incorrectly ('(?i)a[b-d]e', 'ACE', 0, 'found', 'ACE')
('(?i)a[b-d]', 'AAC', 0, 'found', 'AC')
=== Failed incorrectly ('(?i)a[b-d]', 'AAC', 0, 'found', 'AC')
('(?i)a[-b]', 'A-', 0, 'found', 'A-')
=== Failed incorrectly ('(?i)a[-b]', 'A-', 0, 'found', 'A-')
('(?i)a[b-]', 'A-', 0, 'found', 'A-')
=== Failed incorrectly ('(?i)a[b-]', 'A-', 0, 'found', 'A-')
('(?i)a[b-a]', '-', 2)
('(?i)a[]b', '-', 2)
('(?i)a[', '-', 2)
('(?i)a]', 'A]', 0, 'found', 'A]')
=== Failed incorrectly ('(?i)a]', 'A]', 0, 'found', 'A]')
('(?i)a[]]b', 'A]B', 0, 'found', 'A]B')
=== Failed incorrectly ('(?i)a[]]b', 'A]B', 0, 'found', 'A]B')
('(?i)a[^bc]d', 'AED', 0, 'found', 'AED')
=== Failed incorrectly ('(?i)a[^bc]d', 'AED', 0, 'found', 'AED')
('(?i)a[^bc]d', 'ABD', 1)
('(?i)a[^-b]c', 'ADC', 0, 'found', 'ADC')
=== Failed incorrectly ('(?i)a[^-b]c', 'ADC', 0, 'found', 'ADC')
('(?i)a[^-b]c', 'A-C', 1)
('(?i)a[^]b]c', 'A]C', 1)
('(?i)a[^]b]c', 'ADC', 0, 'found', 'ADC')
=== Failed incorrectly ('(?i)a[^]b]c', 'ADC', 0, 'found', 'ADC')
('(?i)ab|cd', 'ABC', 0, 'found', 'AB')
=== Failed incorrectly ('(?i)ab|cd', 'ABC', 0, 'found', 'AB')
('(?i)ab|cd', 'ABCD', 0, 'found', 'AB')
=== Failed incorrectly ('(?i)ab|cd', 'ABCD', 0, 'found', 'AB')
('(?i)()ef', 'DEF', 0, 'found+"-"+g1', 'EF-')
=== Syntax error: ('(?i)()ef', 'DEF', 0, 'found+"-"+g1', 'EF-')
('(?i)*a', '-', 2)
('(?i)(*)b', '-', 2)
('(?i)$b', 'B', 1)
('(?i)a\\', '-', 2)
('(?i)a\\(b', 'A(B', 0, 'found+"-"+g1', 'A(B-')
=== Failed incorrectly ('(?i)a\\(b', 'A(B', 0, 'found+"-"+g1', 'A(B-')
('(?i)a\\(b', 'A(B', 0, 'found+"-"+g1', 'A(B-Error')
('(?i)a\\(*b', 'AB', 0, 'found', 'AB')
=== Failed incorrectly ('(?i)a\\(*b', 'AB', 0, 'found', 'AB')
('(?i)a\\(*b', 'A((B', 0, 'found', 'A((B')
=== Failed incorrectly ('(?i)a\\(*b', 'A((B', 0, 'found', 'A((B')
('(?i)a\\\\b', 'A\\B', 0, 'found', 'A\\B')
=== Failed incorrectly ('(?i)a\\\\b', 'A\\B', 0, 'found', 'A\\B')
('(?i)abc)', '-', 2)
('(?i)(abc', '-', 2)
('(?i)((a))', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'A-A-A')
=== Failed incorrectly ('(?i)((a))', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'A-A-A')
('(?i)(a)b(c)', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'ABC-A-C')
=== Failed incorrectly ('(?i)(a)b(c)', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'ABC-A-C')
('(?i)a+b+c', 'AABBABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)a+b+c', 'AABBABC', 0, 'found', 'ABC')
('(?i)a{1,}b{1,}c', 'AABBABC', 0, 'found', 'ABC')
*** Unexpected error ***
('(?i)a.+?c', 'ABCABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)a.+?c', 'ABCABC', 0, 'found', 'ABC')
('(?i)a.*?c', 'ABCABC', 0, 'found', 'ABC')
*** Unexpected error ***
('(?i)a.{0,5}?c', 'ABCABC', 0, 'found', 'ABC')
=== Failed incorrectly ('(?i)a.{0,5}?c', 'ABCABC', 0, 'found', 'ABC')
('(?i)(a+|b)*', 'AB', 0, 'found+"-"+g1', 'AB-B')
=== grouping error ('(?i)(a+|b)*', 'AB', 0, 'found+"-"+g1', 'AB-B') '-None' should be 'AB-B'
('(?i)(a+|b){0,}', 'AB', 0, 'found+"-"+g1', 'AB-B')
*** Unexpected error ***
('(?i)(a+|b)+', 'AB', 0, 'found+"-"+g1', 'AB-B')
=== Failed incorrectly ('(?i)(a+|b)+', 'AB', 0, 'found+"-"+g1', 'AB-B')
('(?i)(a+|b){1,}', 'AB', 0, 'found+"-"+g1', 'AB-B')
*** Unexpected error ***
('(?i)(a+|b)?', 'AB', 0, 'found+"-"+g1', 'A-A')
=== grouping error ('(?i)(a+|b)?', 'AB', 0, 'found+"-"+g1', 'A-A') '-None' should be 'A-A'
('(?i)(a+|b){0,1}', 'AB', 0, 'found+"-"+g1', 'A-A')
=== grouping error ('(?i)(a+|b){0,1}', 'AB', 0, 'found+"-"+g1', 'A-A') '-None' should be 'A-A'
('(?i)(a+|b){0,1}?', 'AB', 0, 'found+"-"+g1', '-')
=== grouping error ('(?i)(a+|b){0,1}?', 'AB', 0, 'found+"-"+g1', '-') '-None' should be '-'
('(?i)(a+|b){0,1}?', 'AB', 0, 'found+"-"+g1', '-None')
('(?i))(', '-', 2)
('(?i)[^ab]*', 'CDE', 0, 'found', 'CDE')
('(?i)abc', '', 1)
('(?i)a*', '', 0, 'found', '')
('(?i)([abc])*d', 'ABBBCD', 0, 'found+"-"+g1', 'ABBBCD-C')
=== Failed incorrectly ('(?i)([abc])*d', 'ABBBCD', 0, 'found+"-"+g1', 'ABBBCD-C')
('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A')
=== Failed incorrectly ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A')
('(?i)a|b|c|d|e', 'E', 0, 'found', 'E')
=== Failed incorrectly ('(?i)a|b|c|d|e', 'E', 0, 'found', 'E')
('(?i)(a|b|c|d|e)f', 'EF', 0, 'found+"-"+g1', 'EF-E')
=== Failed incorrectly ('(?i)(a|b|c|d|e)f', 'EF', 0, 'found+"-"+g1', 'EF-E')
('(?i)abcd*efg', 'ABCDEFG', 0, 'found', 'ABCDEFG')
=== Failed incorrectly ('(?i)abcd*efg', 'ABCDEFG', 0, 'found', 'ABCDEFG')
('(?i)ab*', 'XABYABBBZ', 0, 'found', 'AB')
=== Failed incorrectly ('(?i)ab*', 'XABYABBBZ', 0, 'found', 'AB')
('(?i)ab*', 'XAYABBBZ', 0, 'found', 'A')
=== Failed incorrectly ('(?i)ab*', 'XAYABBBZ', 0, 'found', 'A')
('(?i)(ab|cd)e', 'ABCDE', 0, 'found+"-"+g1', 'CDE-CD')
=== Failed incorrectly ('(?i)(ab|cd)e', 'ABCDE', 0, 'found+"-"+g1', 'CDE-CD')
('(?i)[abhgefdc]ij', 'HIJ', 0, 'found', 'HIJ')
=== Failed incorrectly ('(?i)[abhgefdc]ij', 'HIJ', 0, 'found', 'HIJ')
('(?i)^(ab|cd)e', 'ABCDE', 1)
('(?i)(abc|)ef', 'ABCDEF', 0, 'found+"-"+g1', 'EF-')
=== Syntax error: ('(?i)(abc|)ef', 'ABCDEF', 0, 'found+"-"+g1', 'EF-')
('(?i)(a|b)c*d', 'ABCD', 0, 'found+"-"+g1', 'BCD-B')
=== Failed incorrectly ('(?i)(a|b)c*d', 'ABCD', 0, 'found+"-"+g1', 'BCD-B')
('(?i)(ab|ab*)bc', 'ABC', 0, 'found+"-"+g1', 'ABC-A')
=== Failed incorrectly ('(?i)(ab|ab*)bc', 'ABC', 0, 'found+"-"+g1', 'ABC-A')
('(?i)a([bc]*)c*', 'ABC', 0, 'found+"-"+g1', 'ABC-BC')
=== Failed incorrectly ('(?i)a([bc]*)c*', 'ABC', 0, 'found+"-"+g1', 'ABC-BC')
('(?i)a([bc]*)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D')
=== Failed incorrectly ('(?i)a([bc]*)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D')
('(?i)a([bc]+)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D')
=== Failed incorrectly ('(?i)a([bc]+)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D')
('(?i)a([bc]*)(c+d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD')
=== Failed incorrectly ('(?i)a([bc]*)(c+d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD')
('(?i)a[bcd]*dcdcde', 'ADCDCDE', 0, 'found', 'ADCDCDE')
=== Failed incorrectly ('(?i)a[bcd]*dcdcde', 'ADCDCDE', 0, 'found', 'ADCDCDE')
('(?i)a[bcd]+dcdcde', 'ADCDCDE', 1)
('(?i)(ab|a)b*c', 'ABC', 0, 'found+"-"+g1', 'ABC-AB')
=== Failed incorrectly ('(?i)(ab|a)b*c', 'ABC', 0, 'found+"-"+g1', 'ABC-AB')
('(?i)((a)(b)c)(d)', 'ABCD', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D')
=== Failed incorrectly ('(?i)((a)(b)c)(d)', 'ABCD', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D')
('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', 0, 'found', 'ALPHA')
('(?i)^a(bc+|b[eh])g|.h$', 'ABH', 0, 'found+"-"+g1', 'BH-')
=== Failed incorrectly ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', 0, 'found+"-"+g1', 'BH-')
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-')
=== Failed incorrectly ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-')
('(?i)^a(bc+|b[eh])g|.h$', 'ABH', 0, 'found+"-"+g1', 'BH-None')
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None')
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', 0, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J')
=== Failed incorrectly ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', 0, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J')
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', 1)
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', 1)
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-')
=== Failed incorrectly ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-')
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None')
('(?i)((((((((((a))))))))))', 'A', 0, 'g10', 'A')
=== Failed incorrectly ('(?i)((((((((((a))))))))))', 'A', 0, 'g10', 'A')
('(?i)((((((((((a))))))))))\\10', 'AA', 0, 'found', 'AA')
=== Failed incorrectly ('(?i)((((((((((a))))))))))\\10', 'AA', 0, 'found', 'AA')
('(?i)((((((((((a))))))))))\\41', 'AA', 1)
=== Syntax error: ('(?i)((((((((((a))))))))))\\41', 'AA', 1)
('(?i)((((((((((a))))))))))\\41', 'A!', 0, 'found', 'A!')
=== Syntax error: ('(?i)((((((((((a))))))))))\\41', 'A!', 0, 'found', 'A!')
('(?i)(((((((((a)))))))))', 'A', 0, 'found', 'A')
=== Failed incorrectly ('(?i)(((((((((a)))))))))', 'A', 0, 'found', 'A')
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', 0, 'g1', 'A')
=== Failed incorrectly ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', 0, 'g1', 'A')
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', 0, 'g1', 'C')
=== Failed incorrectly ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', 0, 'g1', 'C')
('(?i)multiple words of text', 'UH-UH', 1)
('(?i)multiple words', 'MULTIPLE WORDS, YEAH', 0, 'found', 'MULTIPLE WORDS')
=== Failed incorrectly ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', 0, 'found', 'MULTIPLE WORDS')
('(?i)(.*)c(.*)', 'ABCDE', 0, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE')
=== Failed incorrectly ('(?i)(.*)c(.*)', 'ABCDE', 0, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE')
('(?i)\\((.*), (.*)\\)', '(A, B)', 0, '(g2, g1)', '(B, A)')
=== grouping error ('(?i)\\((.*), (.*)\\)', '(A, B)', 0, '(g2, g1)', '(B, A)') ('B', 'A') should be '(B, A)'
('(?i)\\((.*), (.*)\\)', '(A, B)', 0, 'g2+"-"+g1', 'B-A')
('(?i)[k]', 'AB', 1)
('(?i)a[-]?c', 'AC', 0, 'found', 'AC')
=== Failed incorrectly ('(?i)a[-]?c', 'AC', 0, 'found', 'AC')
('(?i)(abc)\\1', 'ABCABC', 0, 'g1', 'ABC')
=== Failed incorrectly ('(?i)(abc)\\1', 'ABCABC', 0, 'g1', 'ABC')
('(?i)([a-c]*)\\1', 'ABCABC', 0, 'g1', 'ABC')
=== grouping error ('(?i)([a-c]*)\\1', 'ABCABC', 0, 'g1', 'ABC') '' should be 'ABC'
('a(?!b).', 'abad', 0, 'found', 'ad')
=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad')
('a(?=d).', 'abad', 0, 'found', 'ad')
=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad')
('a(?=c|d).', 'abad', 0, 'found', 'ad')
=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad')
('a(?:b|c|d)(.)', 'ace', 0, 'g1', 'e')
('a(?:b|c|d)*(.)', 'ace', 0, 'g1', 'e')
('a(?:b|c|d)+?(.)', 'ace', 0, 'g1', 'e')
('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', 0, 'g1+"-"+g2', 'c-e')
('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', 0, 'g1 + g2', 'ce')
('^(.+)?B', 'AB', 0, 'g1', 'A')
('w(?# comment', 'w', 2)
('w(?# comment 1)xy(?# comment 2)z', 'wxyz', 0, 'found', 'wxyz')
('w# comment 1\012 x(?x) y\012\011# comment 2\012\011z', 'wxyz', 0, 'found', 'wxyz')
('^abc', 'jkl\012abc\012xyz', 1)
('(?m)^abc', 'jkl\012abc\012xyz', 0, 'found', 'abc')
('a.b', 'a\012b', 1)
('(?s)a.b', 'a\012b', 0, 'found', 'a\012b')
......@@ -262,7 +262,7 @@ tests = [
('(*)b', '-', SYNTAX_ERROR),
('$b', 'b', FAIL),
('a\\', '-', SYNTAX_ERROR),
('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-'),
('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
......@@ -306,21 +306,22 @@ tests = [
('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-'),
('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-'),
('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-'),
('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
('((((((((((a))))))))))\\41', 'aa', FAIL),
('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
# Python does not have the same rules for \\41 so this is a syntax error
# ('((((((((((a))))))))))\\41', 'aa', FAIL),
# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
('multiple words of text', 'uh-uh', FAIL),
('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
('\\((.*), (.*)\\)', '(a, b)', SUCCEED, '(g2, g1)', '(b, a)'),
('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
('[k]', 'ab', FAIL),
##('abcd', 'abcd', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'abcd-$&-\\abcd'),
##('a(bc)d', 'abcd', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'bc-$1-\\bc'),
......@@ -389,7 +390,7 @@ tests = [
('(?i)(*)b', '-', SYNTAX_ERROR),
('(?i)$b', 'B', FAIL),
('(?i)a\\', '-', SYNTAX_ERROR),
('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-'),
('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
......@@ -409,7 +410,7 @@ tests = [
('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-'),
('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
('(?i))(', '-', SYNTAX_ERROR),
('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
('(?i)abc', '', FAIL),
......@@ -436,35 +437,62 @@ tests = [
('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-'),
('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
#('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
#('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
('(?i)multiple words of text', 'UH-UH', FAIL),
('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, '(g2, g1)', '(B, A)'),
('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
('(?i)[k]', 'AB', FAIL),
##('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
##('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
# these zero-width assertions are not supported
#('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
#('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
#('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1+"-"+g2', 'c-e'),
('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
# Comments using the (?#...) syntax
('w(?# comment', 'w', SYNTAX_ERROR),
('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
# Comments using the x embedded pattern modifier (in an unusual place too)
("""w# comment 1
x(?x) y
# comment 2
z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
# using the m embedded pattern modifier
('^abc', """jkl
abc
xyz""", FAIL),
('(?m)^abc', """jkl
abc
xyz""", SUCCEED, 'found', 'abc'),
# using the s embedded pattern modifier
('a.b', 'a\nb', FAIL),
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
]
#!/usr/local/bin/python
# -*- mode: python -*-
# $Id$
from test_support import verbose
import re
import reop
import sys, os, string, traceback
from re_tests import *
......@@ -7,6 +12,7 @@ if verbose: print 'Running re_tests test suite'
for t in tests:
print t
sys.stdout.flush()
pattern=s=outcome=repl=expected=None
if len(t)==5:
pattern, s, outcome, repl, expected = t
......@@ -21,6 +27,8 @@ for t in tests:
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
else:
print '=== Syntax error:', t
except KeyboardInterrupt:
raise KeyboardInterrupt
except:
print '*** Unexpected error ***'
if verbose:
......@@ -28,7 +36,7 @@ for t in tests:
else:
try:
result=obj.search(s)
except regex.error, msg:
except (re.error, reop.error), msg:
print '=== Unexpected exception', t, repr(msg)
if outcome==SYNTAX_ERROR:
# This should have been a syntax error; forget it.
......@@ -41,22 +49,26 @@ for t in tests:
# Matched, as expected, so now we compute the
# result string and compare it to our expected result.
start, end = result.span(0)
vardict={'found': result.group(0), 'groups': result.group()}
vardict={'found': result.group(0),
'groups': result.group(),
'flags': result.re.flags}
for i in range(1, 100):
try:
gi = result.group(i)
# Special hack because else the string concat fails:
if gi is None: gi = "None"
if gi is None:
gi = "None"
except IndexError:
gi = "Error"
vardict['g%d' % i] = gi
for i in result.re.groupindex.keys():
try:
gi = result.group(i)
if gi is None:
gi = "None"
except IndexError:
pass
else:
vardict[i] = str(gi)
gi = "Error"
vardict[i] = gi
repl=eval(repl, vardict)
if repl!=expected:
print '=== grouping error', t,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment