Commit 8f417748 authored by Andrew M. Kuchling's avatar Andrew M. Kuchling

This patch looks large, but it just deletes the ^M characters and

   untabifies the files.  No actual code changes were made.
parent bd83b7ee
...@@ -26,7 +26,7 @@ from sre_constants import * ...@@ -26,7 +26,7 @@ from sre_constants import *
# find an array type code that matches the engine's code size # find an array type code that matches the engine's code size
for WORDSIZE in "BHil": for WORDSIZE in "BHil":
if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize(): if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
break break
else: else:
raise RuntimeError, "cannot find a useable array type" raise RuntimeError, "cannot find a useable array type"
...@@ -34,18 +34,18 @@ else: ...@@ -34,18 +34,18 @@ else:
class Code: class Code:
def __init__(self): def __init__(self):
self.data = [] self.data = []
def __len__(self): def __len__(self):
return len(self.data) return len(self.data)
def __getitem__(self, index): def __getitem__(self, index):
return self.data[index] return self.data[index]
def __setitem__(self, index, code): def __setitem__(self, index, code):
self.data[index] = code self.data[index] = code
def append(self, code): def append(self, code):
self.data.append(code) self.data.append(code)
def todata(self): def todata(self):
# print self.data # print self.data
return array.array(WORDSIZE, self.data).tostring() return array.array(WORDSIZE, self.data).tostring()
def _lower(literal): def _lower(literal):
# return _sre._lower(literal) # FIXME # return _sre._lower(literal) # FIXME
...@@ -54,122 +54,122 @@ def _lower(literal): ...@@ -54,122 +54,122 @@ def _lower(literal):
def _compile(code, pattern, flags): def _compile(code, pattern, flags):
append = code.append append = code.append
for op, av in pattern: for op, av in pattern:
if op is ANY: if op is ANY:
if "s" in flags: if "s" in flags:
append(CODES[op]) # any character at all! append(CODES[op]) # any character at all!
else: else:
append(CODES[NOT_LITERAL]) append(CODES[NOT_LITERAL])
append(10) append(10)
elif op in (SUCCESS, FAILURE): elif op in (SUCCESS, FAILURE):
append(CODES[op]) append(CODES[op])
elif op is AT: elif op is AT:
append(CODES[op]) append(CODES[op])
append(POSITIONS[av]) append(POSITIONS[av])
elif op is BRANCH: elif op is BRANCH:
append(CODES[op]) append(CODES[op])
tail = [] tail = []
for av in av[1]: for av in av[1]:
skip = len(code); append(0) skip = len(code); append(0)
_compile(code, av, flags) _compile(code, av, flags)
append(CODES[JUMP]) append(CODES[JUMP])
tail.append(len(code)); append(0) tail.append(len(code)); append(0)
code[skip] = len(code) - skip code[skip] = len(code) - skip
append(0) # end of branch append(0) # end of branch
for tail in tail: for tail in tail:
code[tail] = len(code) - tail code[tail] = len(code) - tail
elif op is CALL: elif op is CALL:
append(CODES[op]) append(CODES[op])
skip = len(code); append(0) skip = len(code); append(0)
_compile(code, av, flags) _compile(code, av, flags)
append(CODES[SUCCESS]) append(CODES[SUCCESS])
code[skip] = len(code) - skip code[skip] = len(code) - skip
elif op is CATEGORY: # not used by current parser elif op is CATEGORY: # not used by current parser
append(CODES[op]) append(CODES[op])
append(CATEGORIES[av]) append(CATEGORIES[av])
elif op is GROUP: elif op is GROUP:
if "i" in flags: if "i" in flags:
append(CODES[MAP_IGNORE[op]]) append(CODES[MAP_IGNORE[op]])
else: else:
append(CODES[op]) append(CODES[op])
append(av) append(av)
elif op is IN: elif op is IN:
if "i" in flags: if "i" in flags:
append(CODES[MAP_IGNORE[op]]) append(CODES[MAP_IGNORE[op]])
def fixup(literal): def fixup(literal):
return ord(_lower(literal)) return ord(_lower(literal))
else: else:
append(CODES[op]) append(CODES[op])
fixup = ord fixup = ord
skip = len(code); append(0) skip = len(code); append(0)
for op, av in av: for op, av in av:
append(CODES[op]) append(CODES[op])
if op is NEGATE: if op is NEGATE:
pass pass
elif op is LITERAL: elif op is LITERAL:
append(fixup(av)) append(fixup(av))
elif op is RANGE: elif op is RANGE:
append(fixup(av[0])) append(fixup(av[0]))
append(fixup(av[1])) append(fixup(av[1]))
elif op is CATEGORY: elif op is CATEGORY:
append(CATEGORIES[av]) append(CATEGORIES[av])
else: else:
raise ValueError, "unsupported set operator" raise ValueError, "unsupported set operator"
append(CODES[FAILURE]) append(CODES[FAILURE])
code[skip] = len(code) - skip code[skip] = len(code) - skip
elif op in (LITERAL, NOT_LITERAL): elif op in (LITERAL, NOT_LITERAL):
if "i" in flags: if "i" in flags:
append(CODES[MAP_IGNORE[op]]) append(CODES[MAP_IGNORE[op]])
append(ord(_lower(av))) append(ord(_lower(av)))
else: else:
append(CODES[op]) append(CODES[op])
append(ord(av)) append(ord(av))
elif op is MARK: elif op is MARK:
append(CODES[op]) append(CODES[op])
append(av) append(av)
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
lo, hi = av[2].getwidth() lo, hi = av[2].getwidth()
if lo == 0: if lo == 0:
raise SyntaxError, "cannot repeat zero-width items" raise SyntaxError, "cannot repeat zero-width items"
if lo == hi == 1 and op is MAX_REPEAT: if lo == hi == 1 and op is MAX_REPEAT:
append(CODES[MAX_REPEAT_ONE]) append(CODES[MAX_REPEAT_ONE])
skip = len(code); append(0) skip = len(code); append(0)
append(av[0]) append(av[0])
append(av[1]) append(av[1])
_compile(code, av[2], flags) _compile(code, av[2], flags)
append(CODES[SUCCESS]) append(CODES[SUCCESS])
code[skip] = len(code) - skip code[skip] = len(code) - skip
else: else:
append(CODES[op]) append(CODES[op])
skip = len(code); append(0) skip = len(code); append(0)
append(av[0]) append(av[0])
append(av[1]) append(av[1])
_compile(code, av[2], flags) _compile(code, av[2], flags)
if op is MIN_REPEAT: if op is MIN_REPEAT:
append(CODES[MIN_UNTIL]) append(CODES[MIN_UNTIL])
else: else:
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?) # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
append(CODES[MAX_UNTIL]) append(CODES[MAX_UNTIL])
code[skip] = len(code) - skip code[skip] = len(code) - skip
elif op is SUBPATTERN: elif op is SUBPATTERN:
## group = av[0] ## group = av[0]
## if group: ## if group:
## append(CODES[MARK]) ## append(CODES[MARK])
## append((group-1)*2) ## append((group-1)*2)
_compile(code, av[1], flags) _compile(code, av[1], flags)
## if group: ## if group:
## append(CODES[MARK]) ## append(CODES[MARK])
## append((group-1)*2+1) ## append((group-1)*2+1)
else: else:
raise ValueError, ("unsupported operand type", op) raise ValueError, ("unsupported operand type", op)
def compile(p, flags=()): def compile(p, flags=()):
# convert pattern list to internal format # convert pattern list to internal format
if type(p) is type(""): if type(p) is type(""):
import sre_parse import sre_parse
pattern = p pattern = p
p = sre_parse.parse(p) p = sre_parse.parse(p)
else: else:
pattern = None pattern = None
# print p.getwidth() # print p.getwidth()
# print p # print p
code = Code() code = Code()
...@@ -178,10 +178,10 @@ def compile(p, flags=()): ...@@ -178,10 +178,10 @@ def compile(p, flags=()):
# print list(code.data) # print list(code.data)
data = code.todata() data = code.todata()
if 0: # debugging if 0: # debugging
print print
print "-" * 68 print "-" * 68
import sre_disasm import sre_disasm
sre_disasm.disasm(data) sre_disasm.disasm(data)
print "-" * 68 print "-" * 68
# print len(data), p.pattern.groups, len(p.pattern.groupdict) # print len(data), p.pattern.groups, len(p.pattern.groupdict)
return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict) return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
...@@ -126,6 +126,6 @@ if __name__ == "__main__": ...@@ -126,6 +126,6 @@ if __name__ == "__main__":
f = open("sre_constants.h", "w") f = open("sre_constants.h", "w")
f.write("/* generated by sre_constants.py */\n") f.write("/* generated by sre_constants.py */\n")
for k, v in items: for k, v in items:
f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n") f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
f.close() f.close()
print "done" print "done"
...@@ -55,168 +55,168 @@ CATEGORIES = { ...@@ -55,168 +55,168 @@ CATEGORIES = {
class Pattern: class Pattern:
# FIXME: <fl> rename class, and store flags in here too! # FIXME: <fl> rename class, and store flags in here too!
def __init__(self): def __init__(self):
self.flags = [] self.flags = []
self.groups = 1 self.groups = 1
self.groupdict = {} self.groupdict = {}
def getgroup(self, name=None): def getgroup(self, name=None):
gid = self.groups gid = self.groups
self.groups = gid + 1 self.groups = gid + 1
if name: if name:
self.groupdict[name] = gid self.groupdict[name] = gid
return gid return gid
def setflag(self, flag): def setflag(self, flag):
if flag not in self.flags: if flag not in self.flags:
self.flags.append(flag) self.flags.append(flag)
class SubPattern: class SubPattern:
# a subpattern, in intermediate form # a subpattern, in intermediate form
def __init__(self, pattern, data=None): def __init__(self, pattern, data=None):
self.pattern = pattern self.pattern = pattern
if not data: if not data:
data = [] data = []
self.data = data self.data = data
self.flags = [] self.flags = []
self.width = None self.width = None
def __repr__(self): def __repr__(self):
return repr(self.data) return repr(self.data)
def __len__(self): def __len__(self):
return len(self.data) return len(self.data)
def __delitem__(self, index): def __delitem__(self, index):
del self.data[index] del self.data[index]
def __getitem__(self, index): def __getitem__(self, index):
return self.data[index] return self.data[index]
def __setitem__(self, index, code): def __setitem__(self, index, code):
self.data[index] = code self.data[index] = code
def __getslice__(self, start, stop): def __getslice__(self, start, stop):
return SubPattern(self.pattern, self.data[start:stop]) return SubPattern(self.pattern, self.data[start:stop])
def insert(self, index, code): def insert(self, index, code):
self.data.insert(index, code) self.data.insert(index, code)
def append(self, code): def append(self, code):
self.data.append(code) self.data.append(code)
def getwidth(self): def getwidth(self):
# determine the width (min, max) for this subpattern # determine the width (min, max) for this subpattern
if self.width: if self.width:
return self.width return self.width
lo = hi = 0L lo = hi = 0L
for op, av in self.data: for op, av in self.data:
if op is BRANCH: if op is BRANCH:
l = sys.maxint l = sys.maxint
h = 0 h = 0
for av in av[1]: for av in av[1]:
i, j = av.getwidth() i, j = av.getwidth()
l = min(l, i) l = min(l, i)
h = min(h, j) h = min(h, j)
lo = lo + i lo = lo + i
hi = hi + j hi = hi + j
elif op is CALL: elif op is CALL:
i, j = av.getwidth() i, j = av.getwidth()
lo = lo + i lo = lo + i
hi = hi + j hi = hi + j
elif op is SUBPATTERN: elif op is SUBPATTERN:
i, j = av[1].getwidth() i, j = av[1].getwidth()
lo = lo + i lo = lo + i
hi = hi + j hi = hi + j
elif op in (MIN_REPEAT, MAX_REPEAT): elif op in (MIN_REPEAT, MAX_REPEAT):
i, j = av[2].getwidth() i, j = av[2].getwidth()
lo = lo + i * av[0] lo = lo + i * av[0]
hi = hi + j * av[1] hi = hi + j * av[1]
elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY): elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
lo = lo + 1 lo = lo + 1
hi = hi + 1 hi = hi + 1
elif op == SUCCESS: elif op == SUCCESS:
break break
self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint)) self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
return self.width return self.width
def set(self, flag): def set(self, flag):
if not flag in self.flags: if not flag in self.flags:
self.flags.append(flag) self.flags.append(flag)
def reset(self, flag): def reset(self, flag):
if flag in self.flags: if flag in self.flags:
self.flags.remove(flag) self.flags.remove(flag)
class Tokenizer: class Tokenizer:
def __init__(self, string): def __init__(self, string):
self.string = list(string) self.string = list(string)
self.next = self.__next() self.next = self.__next()
def __next(self): def __next(self):
if not self.string: if not self.string:
return None return None
char = self.string[0] char = self.string[0]
if char[0] == "\\": if char[0] == "\\":
try: try:
c = self.string[1] c = self.string[1]
except IndexError: except IndexError:
raise SyntaxError, "bogus escape" raise SyntaxError, "bogus escape"
char = char + c char = char + c
try: try:
if c == "x": if c == "x":
# hexadecimal constant # hexadecimal constant
for i in xrange(2, sys.maxint): for i in xrange(2, sys.maxint):
c = self.string[i] c = self.string[i]
if c not in HEXDIGITS: if c not in HEXDIGITS:
break break
char = char + c char = char + c
elif c in string.digits: elif c in string.digits:
# decimal (or octal) number # decimal (or octal) number
for i in xrange(2, sys.maxint): for i in xrange(2, sys.maxint):
c = self.string[i] c = self.string[i]
# FIXME: if larger than current number of # FIXME: if larger than current number of
# groups, interpret as an octal number # groups, interpret as an octal number
if c not in string.digits: if c not in string.digits:
break break
char = char + c char = char + c
except IndexError: except IndexError:
pass # use what we've got this far pass # use what we've got this far
del self.string[0:len(char)] del self.string[0:len(char)]
return char return char
def match(self, char): def match(self, char):
if char == self.next: if char == self.next:
self.next = self.__next() self.next = self.__next()
return 1 return 1
return 0 return 0
def match_set(self, set): def match_set(self, set):
if self.next in set: if self.next in set:
self.next = self.__next() self.next = self.__next()
return 1 return 1
return 0 return 0
def get(self): def get(self):
this = self.next this = self.next
self.next = self.__next() self.next = self.__next()
return this return this
def _fixescape(escape, character_class=0): def _fixescape(escape, character_class=0):
# convert escape to (type, value) # convert escape to (type, value)
if character_class: if character_class:
# inside a character class, we'll look in the character # inside a character class, we'll look in the character
# escapes dictionary first # escapes dictionary first
code = ESCAPES.get(escape) code = ESCAPES.get(escape)
if code: if code:
return code return code
code = CATEGORIES.get(escape) code = CATEGORIES.get(escape)
else: else:
code = CATEGORIES.get(escape) code = CATEGORIES.get(escape)
if code: if code:
return code return code
code = ESCAPES.get(escape) code = ESCAPES.get(escape)
if code: if code:
return code return code
if not character_class: if not character_class:
try: try:
group = int(escape[1:]) group = int(escape[1:])
# FIXME: only valid if group <= current number of groups # FIXME: only valid if group <= current number of groups
return GROUP, group return GROUP, group
except ValueError: except ValueError:
pass pass
try: try:
if escape[1:2] == "x": if escape[1:2] == "x":
escape = escape[2:] escape = escape[2:]
return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff) return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
elif escape[1:2] in string.digits: elif escape[1:2] in string.digits:
return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff) return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
elif len(escape) == 2: elif len(escape) == 2:
return LITERAL, escape[1] return LITERAL, escape[1]
except ValueError: except ValueError:
pass pass
raise SyntaxError, "bogus escape: %s" % repr(escape) raise SyntaxError, "bogus escape: %s" % repr(escape)
def _branch(subpattern, items): def _branch(subpattern, items):
...@@ -226,35 +226,35 @@ def _branch(subpattern, items): ...@@ -226,35 +226,35 @@ def _branch(subpattern, items):
# check if all items share a common prefix # check if all items share a common prefix
while 1: while 1:
prefix = None prefix = None
for item in items: for item in items:
if not item: if not item:
break break
if prefix is None: if prefix is None:
prefix = item[0] prefix = item[0]
elif item[0] != prefix: elif item[0] != prefix:
break break
else: else:
# all subitems start with a common "prefix". # all subitems start with a common "prefix".
# move it out of the branch # move it out of the branch
for item in items: for item in items:
del item[0] del item[0]
subpattern.append(prefix) subpattern.append(prefix)
continue # check next one continue # check next one
break break
# check if the branch can be replaced by a character set # check if the branch can be replaced by a character set
for item in items: for item in items:
if len(item) != 1 or item[0][0] != LITERAL: if len(item) != 1 or item[0][0] != LITERAL:
break break
else: else:
# we can store this as a character set instead of a # we can store this as a character set instead of a
# branch (FIXME: use a range if possible) # branch (FIXME: use a range if possible)
set = [] set = []
for item in items: for item in items:
set.append(item[0]) set.append(item[0])
subpattern.append((IN, set)) subpattern.append((IN, set))
return return
subpattern.append((BRANCH, (None, items))) subpattern.append((BRANCH, (None, items)))
...@@ -268,178 +268,178 @@ def _parse(source, pattern, flags=()): ...@@ -268,178 +268,178 @@ def _parse(source, pattern, flags=()):
while 1: while 1:
if source.next in ("|", ")"): if source.next in ("|", ")"):
break # end of subpattern break # end of subpattern
this = source.get() this = source.get()
if this is None: if this is None:
break # end of pattern break # end of pattern
if this and this[0] not in SPECIAL_CHARS: if this and this[0] not in SPECIAL_CHARS:
subpattern.append((LITERAL, this)) subpattern.append((LITERAL, this))
elif this == "[": elif this == "[":
# character set # character set
set = [] set = []
## if source.match(":"): ## if source.match(":"):
## pass # handle character classes ## pass # handle character classes
if source.match("^"): if source.match("^"):
set.append((NEGATE, None)) set.append((NEGATE, None))
# check remaining characters # check remaining characters
start = set[:] start = set[:]
while 1: while 1:
this = source.get() this = source.get()
if this == "]" and set != start: if this == "]" and set != start:
break break
elif this and this[0] == "\\": elif this and this[0] == "\\":
code1 = _fixescape(this, 1) code1 = _fixescape(this, 1)
elif this: elif this:
code1 = LITERAL, this code1 = LITERAL, this
else: else:
raise SyntaxError, "unexpected end of regular expression" raise SyntaxError, "unexpected end of regular expression"
if source.match("-"): if source.match("-"):
# potential range # potential range
this = source.get() this = source.get()
if this == "]": if this == "]":
set.append(code1) set.append(code1)
set.append((LITERAL, "-")) set.append((LITERAL, "-"))
break break
else: else:
if this[0] == "\\": if this[0] == "\\":
code2 = _fixescape(this, 1) code2 = _fixescape(this, 1)
else: else:
code2 = LITERAL, this code2 = LITERAL, this
if code1[0] != LITERAL or code2[0] != LITERAL: if code1[0] != LITERAL or code2[0] != LITERAL:
raise SyntaxError, "illegal range" raise SyntaxError, "illegal range"
if len(code1[1]) != 1 or len(code2[1]) != 1: if len(code1[1]) != 1 or len(code2[1]) != 1:
raise SyntaxError, "illegal range" raise SyntaxError, "illegal range"
set.append((RANGE, (code1[1], code2[1]))) set.append((RANGE, (code1[1], code2[1])))
else: else:
if code1[0] is IN: if code1[0] is IN:
code1 = code1[1][0] code1 = code1[1][0]
set.append(code1) set.append(code1)
# FIXME: <fl> move set optimization to support function # FIXME: <fl> move set optimization to support function
if len(set)==1 and set[0][0] is LITERAL: if len(set)==1 and set[0][0] is LITERAL:
subpattern.append(set[0]) # optimization subpattern.append(set[0]) # optimization
elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
subpattern.append((NOT_LITERAL, set[1][1])) # optimization subpattern.append((NOT_LITERAL, set[1][1])) # optimization
else: else:
# FIXME: <fl> add charmap optimization # FIXME: <fl> add charmap optimization
subpattern.append((IN, set)) subpattern.append((IN, set))
elif this and this[0] in REPEAT_CHARS: elif this and this[0] in REPEAT_CHARS:
# repeat previous item # repeat previous item
if this == "?": if this == "?":
min, max = 0, 1 min, max = 0, 1
elif this == "*": elif this == "*":
min, max = 0, sys.maxint min, max = 0, sys.maxint
elif this == "+": elif this == "+":
min, max = 1, sys.maxint min, max = 1, sys.maxint
elif this == "{": elif this == "{":
min, max = 0, sys.maxint min, max = 0, sys.maxint
lo = hi = "" lo = hi = ""
while source.next in string.digits: while source.next in string.digits:
lo = lo + source.get() lo = lo + source.get()
if source.match(","): if source.match(","):
while source.next in string.digits: while source.next in string.digits:
hi = hi + source.get() hi = hi + source.get()
else: else:
hi = lo hi = lo
if not source.match("}"): if not source.match("}"):
raise SyntaxError, "bogus range" raise SyntaxError, "bogus range"
if lo: if lo:
min = int(lo) min = int(lo)
if hi: if hi:
max = int(hi) max = int(hi)
# FIXME: <fl> check that hi >= lo! # FIXME: <fl> check that hi >= lo!
else: else:
raise SyntaxError, "not supported" raise SyntaxError, "not supported"
# figure out which item to repeat # figure out which item to repeat
# FIXME: should back up to the right mark, right? # FIXME: should back up to the right mark, right?
if subpattern: if subpattern:
index = len(subpattern)-1 index = len(subpattern)-1
while subpattern[index][0] is MARK: while subpattern[index][0] is MARK:
index = index - 1 index = index - 1
item = subpattern[index:index+1] item = subpattern[index:index+1]
else: else:
raise SyntaxError, "nothing to repeat" raise SyntaxError, "nothing to repeat"
if source.match("?"): if source.match("?"):
subpattern[index] = (MIN_REPEAT, (min, max, item)) subpattern[index] = (MIN_REPEAT, (min, max, item))
else: else:
subpattern[index] = (MAX_REPEAT, (min, max, item)) subpattern[index] = (MAX_REPEAT, (min, max, item))
elif this == ".": elif this == ".":
subpattern.append((ANY, None)) subpattern.append((ANY, None))
elif this == "(": elif this == "(":
group = 1 group = 1
name = None name = None
if source.match("?"): if source.match("?"):
group = 0 group = 0
# options # options
if source.match("P"): if source.match("P"):
# named group: skip forward to end of name # named group: skip forward to end of name
if source.match("<"): if source.match("<"):
name = "" name = ""
while 1: while 1:
char = source.get() char = source.get()
if char in (">", None): if char in (">", None):
break break
name = name + char name = name + char
group = 1 group = 1
elif source.match(":"): elif source.match(":"):
# non-capturing group # non-capturing group
group = 2 group = 2
elif source.match_set("iI"): elif source.match_set("iI"):
pattern.setflag("i") pattern.setflag("i")
elif source.match_set("lL"): elif source.match_set("lL"):
pattern.setflag("l") pattern.setflag("l")
elif source.match_set("mM"): elif source.match_set("mM"):
pattern.setflag("m") pattern.setflag("m")
elif source.match_set("sS"): elif source.match_set("sS"):
pattern.setflag("s") pattern.setflag("s")
elif source.match_set("xX"): elif source.match_set("xX"):
pattern.setflag("x") pattern.setflag("x")
if group: if group:
# parse group contents # parse group contents
b = [] b = []
if group == 2: if group == 2:
# anonymous group # anonymous group
group = None group = None
else: else:
group = pattern.getgroup(name) group = pattern.getgroup(name)
if group: if group:
subpattern.append((MARK, (group-1)*2)) subpattern.append((MARK, (group-1)*2))
while 1: while 1:
p = _parse(source, pattern, flags) p = _parse(source, pattern, flags)
if source.match(")"): if source.match(")"):
if b: if b:
b.append(p) b.append(p)
_branch(subpattern, b) _branch(subpattern, b)
else: else:
subpattern.append((SUBPATTERN, (group, p))) subpattern.append((SUBPATTERN, (group, p)))
break break
elif source.match("|"): elif source.match("|"):
b.append(p) b.append(p)
else: else:
raise SyntaxError, "group not properly closed" raise SyntaxError, "group not properly closed"
if group: if group:
subpattern.append((MARK, (group-1)*2+1)) subpattern.append((MARK, (group-1)*2+1))
else: else:
# FIXME: should this really be a while loop? # FIXME: should this really be a while loop?
while source.get() not in (")", None): while source.get() not in (")", None):
pass pass
elif this == "^": elif this == "^":
subpattern.append((AT, AT_BEGINNING)) subpattern.append((AT, AT_BEGINNING))
elif this == "$": elif this == "$":
subpattern.append((AT, AT_END)) subpattern.append((AT, AT_END))
elif this and this[0] == "\\": elif this and this[0] == "\\":
code =_fixescape(this) code =_fixescape(this)
subpattern.append(code) subpattern.append(code)
else: else:
raise SyntaxError, "parser error" raise SyntaxError, "parser error"
return subpattern return subpattern
...@@ -448,20 +448,20 @@ def parse(source, flags=()): ...@@ -448,20 +448,20 @@ def parse(source, flags=()):
g = Pattern() g = Pattern()
b = [] b = []
while 1: while 1:
p = _parse(s, g, flags) p = _parse(s, g, flags)
tail = s.get() tail = s.get()
if tail == "|": if tail == "|":
b.append(p) b.append(p)
elif tail == ")": elif tail == ")":
raise SyntaxError, "unbalanced parenthesis" raise SyntaxError, "unbalanced parenthesis"
elif tail is None: elif tail is None:
if b: if b:
b.append(p) b.append(p)
p = SubPattern(g) p = SubPattern(g)
_branch(p, b) _branch(p, b)
break break
else: else:
raise SyntaxError, "bogus characters at end of regular expression" raise SyntaxError, "bogus characters at end of regular expression"
return p return p
if __name__ == "__main__": if __name__ == "__main__":
...@@ -469,23 +469,23 @@ if __name__ == "__main__": ...@@ -469,23 +469,23 @@ if __name__ == "__main__":
from testpatterns import PATTERNS from testpatterns import PATTERNS
a = b = c = 0 a = b = c = 0
for pattern, flags in PATTERNS: for pattern, flags in PATTERNS:
if flags: if flags:
continue continue
print "-"*68 print "-"*68
try: try:
p = parse(pattern) p = parse(pattern)
print repr(pattern), "->" print repr(pattern), "->"
pprint(p.data) pprint(p.data)
import sre_compile import sre_compile
try: try:
code = sre_compile.compile(p) code = sre_compile.compile(p)
c = c + 1 c = c + 1
except: except:
pass pass
a = a + 1 a = a + 1
except SyntaxError, v: except SyntaxError, v:
print "**", repr(pattern), v print "**", repr(pattern), v
b = b + 1 b = b + 1
print "-"*68 print "-"*68
print a, "of", b, "patterns successfully parsed" print a, "of", b, "patterns successfully parsed"
print c, "of", b, "patterns successfully compiled" print c, "of", b, "patterns successfully compiled"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment