Commit a0e4c1bf authored by Guido van Rossum's avatar Guido van Rossum

Jeffrey's latest -- seems to solve most problems!

parent 75fce308
...@@ -60,6 +60,7 @@ def valid_identifier(id): ...@@ -60,6 +60,7 @@ def valid_identifier(id):
_cache = {} _cache = {}
_MAXCACHE = 20 _MAXCACHE = 20
def _cachecompile(pattern, flags): def _cachecompile(pattern, flags):
key = (pattern, flags) key = (pattern, flags)
try: try:
...@@ -74,16 +75,16 @@ def _cachecompile(pattern, flags): ...@@ -74,16 +75,16 @@ def _cachecompile(pattern, flags):
def match(pattern, string, flags=0): def match(pattern, string, flags=0):
return _cachecompile(pattern, flags).match(string) return _cachecompile(pattern, flags).match(string)
def search(pattern, string, flags=0): def search(pattern, string, flags=0):
return _cachecompile(pattern, flags).search(string) return _cachecompile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0): def sub(pattern, repl, string, count=0):
return _cachecompile(pattern).sub(repl, string, count) return _cachecompile(pattern).sub(repl, string, count)
def subn(pattern, repl, string, count=0): def subn(pattern, repl, string, count=0):
return _cachecompile(pattern).subn(repl, string, count) return _cachecompile(pattern).subn(repl, string, count)
def split(pattern, string, maxsplit=0): def split(pattern, string, maxsplit=0):
return _cachecompile(pattern).subn(string, maxsplit) return _cachecompile(pattern).subn(string, maxsplit)
...@@ -100,12 +101,16 @@ class RegexObject: ...@@ -100,12 +101,16 @@ class RegexObject:
self.groupindex = groupindex self.groupindex = groupindex
self.callouts = callouts self.callouts = callouts
self.fastmap = build_fastmap(code) self.fastmap = build_fastmap(code)
if code[0].name == 'bol': if code[0].name == 'bol':
self.anchor = 1 self.anchor = 1
elif code[0].name == 'begbuf': elif code[0].name == 'begbuf':
self.anchor = 2 self.anchor = 2
else: else:
self.anchor = 0 self.anchor = 0
self.buffer = assemble(code) self.buffer = assemble(code)
def search(self, string, pos=0): def search(self, string, pos=0):
regs = reop.search(self.buffer, regs = reop.search(self.buffer,
...@@ -118,10 +123,12 @@ class RegexObject: ...@@ -118,10 +123,12 @@ class RegexObject:
pos) pos)
if regs is None: if regs is None:
return None return None
return MatchObject(self, return MatchObject(self,
string, string,
pos, pos,
regs) regs)
def match(self, string, pos=0): def match(self, string, pos=0):
regs = reop.match(self.buffer, regs = reop.match(self.buffer,
self.num_regs, self.num_regs,
...@@ -133,14 +140,18 @@ class RegexObject: ...@@ -133,14 +140,18 @@ class RegexObject:
pos) pos)
if regs is None: if regs is None:
return None return None
return MatchObject(self, return MatchObject(self,
string, string,
pos, pos,
regs) regs)
def sub(self, repl, string, count=0): def sub(self, repl, string, count=0):
pass pass
def subn(self, repl, string, count=0): def subn(self, repl, string, count=0):
pass pass
def split(self, string, maxsplit=0): def split(self, string, maxsplit=0):
pass pass
...@@ -150,6 +161,7 @@ class MatchObject: ...@@ -150,6 +161,7 @@ class MatchObject:
self.string = string self.string = string
self.pos = pos self.pos = pos
self.regs = regs self.regs = regs
def start(self, g): def start(self, g):
if type(g) == type(''): if type(g) == type(''):
try: try:
...@@ -157,6 +169,7 @@ class MatchObject: ...@@ -157,6 +169,7 @@ class MatchObject:
except (KeyError, TypeError): except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined') raise IndexError, ('group "' + g + '" is undefined')
return self.regs[g][0] return self.regs[g][0]
def end(self, g): def end(self, g):
if type(g) == type(''): if type(g) == type(''):
try: try:
...@@ -164,6 +177,7 @@ class MatchObject: ...@@ -164,6 +177,7 @@ class MatchObject:
except (KeyError, TypeError): except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined') raise IndexError, ('group "' + g + '" is undefined')
return self.regs[g][1] return self.regs[g][1]
def span(self, g): def span(self, g):
if type(g) == type(''): if type(g) == type(''):
try: try:
...@@ -171,6 +185,7 @@ class MatchObject: ...@@ -171,6 +185,7 @@ class MatchObject:
except (KeyError, TypeError): except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined') raise IndexError, ('group "' + g + '" is undefined')
return self.regs[g] return self.regs[g]
def group(self, *groups): def group(self, *groups):
if len(groups) == 0: if len(groups) == 0:
groups = range(1, self.re.num_regs) groups = range(1, self.re.num_regs)
...@@ -339,7 +354,7 @@ class UpdateFailureJump(JumpInstruction): ...@@ -339,7 +354,7 @@ class UpdateFailureJump(JumpInstruction):
JumpInstruction.__init__(self, chr(12), label) JumpInstruction.__init__(self, chr(12), label)
class DummyFailureJump(JumpInstruction): class DummyFailureJump(JumpInstruction):
name = 'update_failure_jump' name = 'dummy_failure_jump'
def __init__(self, label): def __init__(self, label):
JumpInstruction.__init__(self, chr(13), label) JumpInstruction.__init__(self, chr(13), label)
...@@ -764,11 +779,34 @@ def expand_escape(pattern, index, context=NORMAL): ...@@ -764,11 +779,34 @@ def expand_escape(pattern, index, context=NORMAL):
def compile(pattern, flags=0): def compile(pattern, flags=0):
stack = [] stack = []
index = 0
label = 0 label = 0
register = 1 register = 1
groupindex = {} groupindex = {}
callouts = [] callouts = []
# preprocess the pattern looking for embedded pattern modifiers
index = 0
while (index != -1):
index = string.find(pattern, '(?', index)
if index != -1:
index = index + 2
if (index < len(pattern)) and (pattern[index] in 'iImMsSxX'):
while (index < len(pattern)) and (pattern[index] != ')'):
if pattern[index] in 'iI':
flags = flags | IGNORECASE
elif pattern[index] in 'mM':
flags = flags | MULTILINE
elif pattern[index] in 'sS':
flags = flags | DOTALL
elif pattern[index] in 'xX':
flags = flags | VERBOSE
else:
raise error, 'unknown flag'
index = index + 1
index = 0
while (index < len(pattern)): while (index < len(pattern)):
char = pattern[index] char = pattern[index]
index = index + 1 index = index + 1
...@@ -809,12 +847,6 @@ def compile(pattern, flags=0): ...@@ -809,12 +847,6 @@ def compile(pattern, flags=0):
raise error, 'unknown escape type' raise error, 'unknown escape type'
elif char == '|': elif char == '|':
if len(stack) == 0:
raise error, 'alternate with nothing on the left'
if stack[-1][0].name == '(':
raise error, 'alternate with nothing on the left in the group'
if stack[-1][0].name == '|':
raise error, 'alternates with nothing inbetween them'
expr = [] expr = []
while (len(stack) != 0) and \ while (len(stack) != 0) and \
...@@ -915,17 +947,10 @@ def compile(pattern, flags=0): ...@@ -915,17 +947,10 @@ def compile(pattern, flags=0):
'assertion is unsupported') 'assertion is unsupported')
elif pattern[index] in 'iImMsSxX': elif pattern[index] in 'iImMsSxX':
# ignore embedded pattern modifiers here, they
# have already been taken care of in the
# preprocessing
while (index < len(pattern)) and (pattern[index] != ')'): while (index < len(pattern)) and (pattern[index] != ')'):
if pattern[index] in 'iI':
flags = flags | IGNORECASE
elif pattern[index] in 'mM':
flags = flags | MULTILINE
elif pattern[index] in 'sS':
flags = flags | DOTALL
elif pattern[index] in 'xX':
flags = flags | VERBOSE
else:
raise error, 'unknown flag'
index = index + 1 index = index + 1
index = index + 1 index = index + 1
...@@ -947,13 +972,6 @@ def compile(pattern, flags=0): ...@@ -947,13 +972,6 @@ def compile(pattern, flags=0):
if len(stack) == 0: if len(stack) == 0:
raise error, 'too many close parens' raise error, 'too many close parens'
if len(expr) == 0:
raise error, 'nothing inside parens'
# check to see if alternation used correctly
if (expr[-1].name == '|'):
raise error, 'alternate with nothing on the right'
# remove markers left by alternation # remove markers left by alternation
expr = filter(lambda x: x.name != '|', expr) expr = filter(lambda x: x.name != '|', expr)
...@@ -1023,18 +1041,17 @@ def compile(pattern, flags=0): ...@@ -1023,18 +1041,17 @@ def compile(pattern, flags=0):
while min > 0: while min > 0:
expr = expr + stack[-1] expr = expr + stack[-1]
min = min - 1 min = min - 1
registers = registers_used(stack[-1])
if minimal: if minimal:
expr = expr + \ expr = expr + \
([Jump(label + 1), ([Jump(label + 1),
Label(label)] + \ Label(label)] + \
stack[-1] + \ stack[-1] + \
[Label(label + 1), [Label(label + 1),
FailureJump(label, registers)]) FailureJump(label)])
else: else:
expr = expr + \ expr = expr + \
([Label(label), ([Label(label),
FailureJump(label + 1, registers)] + FailureJump(label + 1)] +
stack[-1] + stack[-1] +
[StarJump(label), [StarJump(label),
Label(label + 1)]) Label(label + 1)])
...@@ -1109,7 +1126,7 @@ def compile(pattern, flags=0): ...@@ -1109,7 +1126,7 @@ def compile(pattern, flags=0):
registers = registers_used(stack[-1]) registers = registers_used(stack[-1])
if (index < len(pattern)) and (pattern[index] == '?'): if (index < len(pattern)) and (pattern[index] == '?'):
# non-greedy matching # non-greedy matching
expr = [JumpInstructions(label + 1), expr = [Jump(label + 1),
Label(label)] + \ Label(label)] + \
stack[-1] + \ stack[-1] + \
[Label(label + 1), [Label(label + 1),
...@@ -1130,9 +1147,10 @@ def compile(pattern, flags=0): ...@@ -1130,9 +1147,10 @@ def compile(pattern, flags=0):
# positive closure # positive closure
if len(stack) == 0: if len(stack) == 0:
raise error, '+ needs something to repeat' raise error, '+ needs something to repeat'
if (stack[-1][0].name == '(') or (stack[-1][0].name == '|'): if (stack[-1][0].name == '(') or (stack[-1][0].name == '|'):
raise error, '+ needs something to repeat' raise error, '+ needs something to repeat'
registers = registers_used(stack[-1])
if (index < len(pattern)) and (pattern[index] == '?'): if (index < len(pattern)) and (pattern[index] == '?'):
# non-greedy # non-greedy
expr = [Label(label)] + \ expr = [Label(label)] + \
...@@ -1156,7 +1174,6 @@ def compile(pattern, flags=0): ...@@ -1156,7 +1174,6 @@ def compile(pattern, flags=0):
elif char == '?': elif char == '?':
if len(stack) == 0: if len(stack) == 0:
raise error, 'need something to be optional' raise error, 'need something to be optional'
registers = registers_used(stack[-1])
if (index < len(pattern)) and (pattern[index] == '?'): if (index < len(pattern)) and (pattern[index] == '?'):
# non-greedy matching # non-greedy matching
expr = [FailureJump(label), expr = [FailureJump(label),
...@@ -1177,7 +1194,7 @@ def compile(pattern, flags=0): ...@@ -1177,7 +1194,7 @@ def compile(pattern, flags=0):
elif char == '.': elif char == '.':
if flags & DOTALL: if flags & DOTALL:
stack.append(Set(map(chr, range(256)))) stack.append([Set(map(chr, range(256)))])
else: else:
stack.append([AnyChar()]) stack.append([AnyChar()])
...@@ -1337,8 +1354,6 @@ def compile(pattern, flags=0): ...@@ -1337,8 +1354,6 @@ def compile(pattern, flags=0):
del stack[-1] del stack[-1]
if len(code) == 0: if len(code) == 0:
raise error, 'no code generated' raise error, 'no code generated'
if (code[-1].name == '|'):
raise error, 'alternate with nothing on the right'
code = filter(lambda x: x.name != '|', code) code = filter(lambda x: x.name != '|', code)
need_label = 0 need_label = 0
for i in range(len(code)): for i in range(len(code)):
......
This diff is collapsed.
...@@ -262,7 +262,7 @@ tests = [ ...@@ -262,7 +262,7 @@ tests = [
('(*)b', '-', SYNTAX_ERROR), ('(*)b', '-', SYNTAX_ERROR),
('$b', 'b', FAIL), ('$b', 'b', FAIL),
('a\\', '-', SYNTAX_ERROR), ('a\\', '-', SYNTAX_ERROR),
('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-'), ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
('a\\(*b', 'ab', SUCCEED, 'found', 'ab'), ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'), ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'), ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
...@@ -306,21 +306,22 @@ tests = [ ...@@ -306,21 +306,22 @@ tests = [
('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'), ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'), ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-'), ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-'), ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'), ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL), ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL), ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-'), ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'), ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'), ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
('((((((((((a))))))))))\\41', 'aa', FAIL), # Python does not have the same rules for \\41 so this is a syntax error
('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'), # ('((((((((((a))))))))))\\41', 'aa', FAIL),
# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
('multiple words of text', 'uh-uh', FAIL), ('multiple words of text', 'uh-uh', FAIL),
('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
('\\((.*), (.*)\\)', '(a, b)', SUCCEED, '(g2, g1)', '(b, a)'), ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
('[k]', 'ab', FAIL), ('[k]', 'ab', FAIL),
##('abcd', 'abcd', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'abcd-$&-\\abcd'), ##('abcd', 'abcd', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'abcd-$&-\\abcd'),
##('a(bc)d', 'abcd', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'bc-$1-\\bc'), ##('a(bc)d', 'abcd', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'bc-$1-\\bc'),
...@@ -389,7 +390,7 @@ tests = [ ...@@ -389,7 +390,7 @@ tests = [
('(?i)(*)b', '-', SYNTAX_ERROR), ('(?i)(*)b', '-', SYNTAX_ERROR),
('(?i)$b', 'B', FAIL), ('(?i)$b', 'B', FAIL),
('(?i)a\\', '-', SYNTAX_ERROR), ('(?i)a\\', '-', SYNTAX_ERROR),
('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-'), ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'), ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'), ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'), ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
...@@ -409,7 +410,7 @@ tests = [ ...@@ -409,7 +410,7 @@ tests = [
('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-'), ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
('(?i))(', '-', SYNTAX_ERROR), ('(?i))(', '-', SYNTAX_ERROR),
('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'), ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
('(?i)abc', '', FAIL), ('(?i)abc', '', FAIL),
...@@ -436,35 +437,62 @@ tests = [ ...@@ -436,35 +437,62 @@ tests = [
('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'), ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'), ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'), ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-'), ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-'), ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'), ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL), ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL), ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-'), ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'), ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'), ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
('(?i)((((((((((a))))))))))\\41', 'AA', FAIL), #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'), #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'), ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'), ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'), ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
('(?i)multiple words of text', 'UH-UH', FAIL), ('(?i)multiple words of text', 'UH-UH', FAIL),
('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'), ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'), ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, '(g2, g1)', '(B, A)'), ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
('(?i)[k]', 'AB', FAIL), ('(?i)[k]', 'AB', FAIL),
##('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'), ##('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
##('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'), ##('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'), ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
('a(?!b).', 'abad', SUCCEED, 'found', 'ad'), # these zero-width assertions are not supported
('a(?=d).', 'abad', SUCCEED, 'found', 'ad'), #('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'), #('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
#('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'), ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'), ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'), ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1+"-"+g2', 'c-e'), ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'), ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
# Comments using the (?#...) syntax
('w(?# comment', 'w', SYNTAX_ERROR),
('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
# Comments using the x embedded pattern modifier (in an unusual place too)
("""w# comment 1
x(?x) y
# comment 2
z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
# using the m embedded pattern modifier
('^abc', """jkl
abc
xyz""", FAIL),
('(?m)^abc', """jkl
abc
xyz""", SUCCEED, 'found', 'abc'),
# using the s embedded pattern modifier
('a.b', 'a\nb', FAIL),
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
] ]
#!/usr/local/bin/python
# -*- mode: python -*-
# $Id$
from test_support import verbose from test_support import verbose
import re import re
import reop
import sys, os, string, traceback import sys, os, string, traceback
from re_tests import * from re_tests import *
...@@ -7,6 +12,7 @@ if verbose: print 'Running re_tests test suite' ...@@ -7,6 +12,7 @@ if verbose: print 'Running re_tests test suite'
for t in tests: for t in tests:
print t print t
sys.stdout.flush()
pattern=s=outcome=repl=expected=None pattern=s=outcome=repl=expected=None
if len(t)==5: if len(t)==5:
pattern, s, outcome, repl, expected = t pattern, s, outcome, repl, expected = t
...@@ -21,6 +27,8 @@ for t in tests: ...@@ -21,6 +27,8 @@ for t in tests:
if outcome==SYNTAX_ERROR: pass # Expected a syntax error if outcome==SYNTAX_ERROR: pass # Expected a syntax error
else: else:
print '=== Syntax error:', t print '=== Syntax error:', t
except KeyboardInterrupt:
raise KeyboardInterrupt
except: except:
print '*** Unexpected error ***' print '*** Unexpected error ***'
if verbose: if verbose:
...@@ -28,7 +36,7 @@ for t in tests: ...@@ -28,7 +36,7 @@ for t in tests:
else: else:
try: try:
result=obj.search(s) result=obj.search(s)
except regex.error, msg: except (re.error, reop.error), msg:
print '=== Unexpected exception', t, repr(msg) print '=== Unexpected exception', t, repr(msg)
if outcome==SYNTAX_ERROR: if outcome==SYNTAX_ERROR:
# This should have been a syntax error; forget it. # This should have been a syntax error; forget it.
...@@ -41,22 +49,26 @@ for t in tests: ...@@ -41,22 +49,26 @@ for t in tests:
# Matched, as expected, so now we compute the # Matched, as expected, so now we compute the
# result string and compare it to our expected result. # result string and compare it to our expected result.
start, end = result.span(0) start, end = result.span(0)
vardict={'found': result.group(0), 'groups': result.group()} vardict={'found': result.group(0),
'groups': result.group(),
'flags': result.re.flags}
for i in range(1, 100): for i in range(1, 100):
try: try:
gi = result.group(i) gi = result.group(i)
# Special hack because else the string concat fails: # Special hack because else the string concat fails:
if gi is None: gi = "None" if gi is None:
gi = "None"
except IndexError: except IndexError:
gi = "Error" gi = "Error"
vardict['g%d' % i] = gi vardict['g%d' % i] = gi
for i in result.re.groupindex.keys(): for i in result.re.groupindex.keys():
try: try:
gi = result.group(i) gi = result.group(i)
if gi is None:
gi = "None"
except IndexError: except IndexError:
pass gi = "Error"
else: vardict[i] = gi
vardict[i] = str(gi)
repl=eval(repl, vardict) repl=eval(repl, vardict)
if repl!=expected: if repl!=expected:
print '=== grouping error', t, print '=== grouping error', t,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment