Commit b81e70eb authored by Guido van Rossum's avatar Guido van Rossum

Fredrik Lundh: new snapshot. Mostly reindented.

This one should work with unicode expressions, and compile
a bit more silently.
parent 5de435a2
...@@ -164,7 +164,7 @@ def _compile(code, pattern, flags): ...@@ -164,7 +164,7 @@ def _compile(code, pattern, flags):
def compile(p, flags=()): def compile(p, flags=()):
# convert pattern list to internal format # convert pattern list to internal format
if type(p) is type(""): if type(p) in (type(""), type(u"")):
import sre_parse import sre_parse
pattern = p pattern = p
p = sre_parse.parse(p) p = sre_parse.parse(p)
......
...@@ -26,8 +26,11 @@ from sre_constants import * ...@@ -26,8 +26,11 @@ from sre_constants import *
SPECIAL_CHARS = ".\\[{()*+?^$|" SPECIAL_CHARS = ".\\[{()*+?^$|"
REPEAT_CHARS = "*+?{" REPEAT_CHARS = "*+?{"
OCTDIGITS = "01234567" # FIXME: string in tuple tests may explode with if char is unicode :-(
HEXDIGITS = "0123456789abcdefABCDEF" DIGITS = tuple(string.digits)
OCTDIGITS = tuple("01234567")
HEXDIGITS = tuple("0123456789abcdefABCDEF")
ESCAPES = { ESCAPES = {
"\\a": (LITERAL, chr(7)), "\\a": (LITERAL, chr(7)),
...@@ -65,7 +68,7 @@ class Pattern: ...@@ -65,7 +68,7 @@ class Pattern:
self.groupdict[name] = gid self.groupdict[name] = gid
return gid return gid
def setflag(self, flag): def setflag(self, flag):
if flag not in self.flags: if flag in self.flags:
self.flags.append(flag) self.flags.append(flag)
class SubPattern: class SubPattern:
...@@ -153,16 +156,16 @@ class Tokenizer: ...@@ -153,16 +156,16 @@ class Tokenizer:
# hexadecimal constant # hexadecimal constant
for i in xrange(2, sys.maxint): for i in xrange(2, sys.maxint):
c = self.string[i] c = self.string[i]
if c not in HEXDIGITS: if str(c) not in HEXDIGITS:
break break
char = char + c char = char + c
elif c in string.digits: elif str(c) in DIGITS:
# decimal (or octal) number # decimal (or octal) number
for i in xrange(2, sys.maxint): for i in xrange(2, sys.maxint):
c = self.string[i] c = self.string[i]
# FIXME: if larger than current number of # FIXME: if larger than current number of
# groups, interpret as an octal number # groups, interpret as an octal number
if c not in string.digits: if str(c) not in DIGITS:
break break
char = char + c char = char + c
except IndexError: except IndexError:
...@@ -175,7 +178,7 @@ class Tokenizer: ...@@ -175,7 +178,7 @@ class Tokenizer:
return 1 return 1
return 0 return 0
def match_set(self, set): def match_set(self, set):
if self.next in set: if self.next and self.next in set:
self.next = self.__next() self.next = self.__next()
return 1 return 1
return 0 return 0
...@@ -210,9 +213,9 @@ def _fixescape(escape, character_class=0): ...@@ -210,9 +213,9 @@ def _fixescape(escape, character_class=0):
try: try:
if escape[1:2] == "x": if escape[1:2] == "x":
escape = escape[2:] escape = escape[2:]
return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff) return LITERAL, chr(int(escape[-2:], 16) & 0xff)
elif escape[1:2] in string.digits: elif str(escape[1:2]) in DIGITS:
return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff) return LITERAL, chr(int(escape[1:], 8) & 0xff)
elif len(escape) == 2: elif len(escape) == 2:
return LITERAL, escape[1] return LITERAL, escape[1]
except ValueError: except ValueError:
...@@ -268,7 +271,7 @@ def _parse(source, pattern, flags=()): ...@@ -268,7 +271,7 @@ def _parse(source, pattern, flags=()):
while 1: while 1:
if source.next in ("|", ")"): if str(source.next) in ("|", ")"):
break # end of subpattern break # end of subpattern
this = source.get() this = source.get()
if this is None: if this is None:
...@@ -338,10 +341,10 @@ def _parse(source, pattern, flags=()): ...@@ -338,10 +341,10 @@ def _parse(source, pattern, flags=()):
elif this == "{": elif this == "{":
min, max = 0, sys.maxint min, max = 0, sys.maxint
lo = hi = "" lo = hi = ""
while source.next in string.digits: while str(source.next) in DIGITS:
lo = lo + source.get() lo = lo + source.get()
if source.match(","): if source.match(","):
while source.next in string.digits: while str(source.next) in DIGITS:
hi = hi + source.get() hi = hi + source.get()
else: else:
hi = lo hi = lo
...@@ -381,7 +384,7 @@ def _parse(source, pattern, flags=()): ...@@ -381,7 +384,7 @@ def _parse(source, pattern, flags=()):
name = "" name = ""
while 1: while 1:
char = source.get() char = source.get()
if char in (">", None): if char is None or char == ">":
break break
name = name + char name = name + char
group = 1 group = 1
...@@ -425,8 +428,10 @@ def _parse(source, pattern, flags=()): ...@@ -425,8 +428,10 @@ def _parse(source, pattern, flags=()):
subpattern.append((MARK, (group-1)*2+1)) subpattern.append((MARK, (group-1)*2+1))
else: else:
# FIXME: should this really be a while loop? # FIXME: should this really be a while loop?
while source.get() not in (")", None): while 1:
pass char = source.get()
if char is None or char == ")":
break
elif this == "^": elif this == "^":
subpattern.append((AT, AT_BEGINNING)) subpattern.append((AT, AT_BEGINNING))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment