Commit b81e70eb authored by Guido van Rossum's avatar Guido van Rossum

Fredrik Lundh: new snapshot. Mostly reindented.

This one should work with unicode expressions, and compile
a bit more silently.
parent 5de435a2
......@@ -164,7 +164,7 @@ def _compile(code, pattern, flags):
def compile(p, flags=()):
# convert pattern list to internal format
if type(p) is type(""):
if type(p) in (type(""), type(u"")):
import sre_parse
pattern = p
p = sre_parse.parse(p)
......
......@@ -26,8 +26,11 @@ from sre_constants import *
SPECIAL_CHARS = ".\\[{()*+?^$|"
REPEAT_CHARS = "*+?{"
OCTDIGITS = "01234567"
HEXDIGITS = "0123456789abcdefABCDEF"
# FIXME: string in tuple tests may explode with if char is unicode :-(
DIGITS = tuple(string.digits)
OCTDIGITS = tuple("01234567")
HEXDIGITS = tuple("0123456789abcdefABCDEF")
ESCAPES = {
"\\a": (LITERAL, chr(7)),
......@@ -65,7 +68,7 @@ class Pattern:
self.groupdict[name] = gid
return gid
def setflag(self, flag):
if flag not in self.flags:
if flag in self.flags:
self.flags.append(flag)
class SubPattern:
......@@ -153,16 +156,16 @@ class Tokenizer:
# hexadecimal constant
for i in xrange(2, sys.maxint):
c = self.string[i]
if c not in HEXDIGITS:
if str(c) not in HEXDIGITS:
break
char = char + c
elif c in string.digits:
elif str(c) in DIGITS:
# decimal (or octal) number
for i in xrange(2, sys.maxint):
c = self.string[i]
# FIXME: if larger than current number of
# groups, interpret as an octal number
if c not in string.digits:
if str(c) not in DIGITS:
break
char = char + c
except IndexError:
......@@ -175,7 +178,7 @@ class Tokenizer:
return 1
return 0
def match_set(self, set):
if self.next in set:
if self.next and self.next in set:
self.next = self.__next()
return 1
return 0
......@@ -210,9 +213,9 @@ def _fixescape(escape, character_class=0):
try:
if escape[1:2] == "x":
escape = escape[2:]
return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
elif escape[1:2] in string.digits:
return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
return LITERAL, chr(int(escape[-2:], 16) & 0xff)
elif str(escape[1:2]) in DIGITS:
return LITERAL, chr(int(escape[1:], 8) & 0xff)
elif len(escape) == 2:
return LITERAL, escape[1]
except ValueError:
......@@ -268,7 +271,7 @@ def _parse(source, pattern, flags=()):
while 1:
if source.next in ("|", ")"):
if str(source.next) in ("|", ")"):
break # end of subpattern
this = source.get()
if this is None:
......@@ -338,10 +341,10 @@ def _parse(source, pattern, flags=()):
elif this == "{":
min, max = 0, sys.maxint
lo = hi = ""
while source.next in string.digits:
while str(source.next) in DIGITS:
lo = lo + source.get()
if source.match(","):
while source.next in string.digits:
while str(source.next) in DIGITS:
hi = hi + source.get()
else:
hi = lo
......@@ -381,7 +384,7 @@ def _parse(source, pattern, flags=()):
name = ""
while 1:
char = source.get()
if char in (">", None):
if char is None or char == ">":
break
name = name + char
group = 1
......@@ -425,8 +428,10 @@ def _parse(source, pattern, flags=()):
subpattern.append((MARK, (group-1)*2+1))
else:
# FIXME: should this really be a while loop?
while source.get() not in (")", None):
pass
while 1:
char = source.get()
if char is None or char == ")":
break
elif this == "^":
subpattern.append((AT, AT_BEGINNING))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment