Commit c7f7d389 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #22434: Constants in sre_constants are now named constants (enum-like).

parent bf764a19
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
import _sre import _sre
import sre_parse import sre_parse
from sre_constants import * from sre_constants import *
from _sre import MAXREPEAT
assert _sre.MAGIC == MAGIC, "SRE module mismatch" assert _sre.MAGIC == MAGIC, "SRE module mismatch"
...@@ -38,65 +37,65 @@ def _compile(code, pattern, flags): ...@@ -38,65 +37,65 @@ def _compile(code, pattern, flags):
for op, av in pattern: for op, av in pattern:
if op in LITERAL_CODES: if op in LITERAL_CODES:
if flags & SRE_FLAG_IGNORECASE: if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]]) emit(OP_IGNORE[op])
emit(_sre.getlower(av, flags)) emit(_sre.getlower(av, flags))
else: else:
emit(OPCODES[op]) emit(op)
emit(av) emit(av)
elif op is IN: elif op is IN:
if flags & SRE_FLAG_IGNORECASE: if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]]) emit(OP_IGNORE[op])
def fixup(literal, flags=flags): def fixup(literal, flags=flags):
return _sre.getlower(literal, flags) return _sre.getlower(literal, flags)
else: else:
emit(OPCODES[op]) emit(op)
fixup = None fixup = None
skip = _len(code); emit(0) skip = _len(code); emit(0)
_compile_charset(av, flags, code, fixup) _compile_charset(av, flags, code, fixup)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
elif op is ANY: elif op is ANY:
if flags & SRE_FLAG_DOTALL: if flags & SRE_FLAG_DOTALL:
emit(OPCODES[ANY_ALL]) emit(ANY_ALL)
else: else:
emit(OPCODES[ANY]) emit(ANY)
elif op in REPEATING_CODES: elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE: if flags & SRE_FLAG_TEMPLATE:
raise error("internal: unsupported template operator") raise error("internal: unsupported template operator")
elif _simple(av) and op is not REPEAT: elif _simple(av) and op is not REPEAT:
if op is MAX_REPEAT: if op is MAX_REPEAT:
emit(OPCODES[REPEAT_ONE]) emit(REPEAT_ONE)
else: else:
emit(OPCODES[MIN_REPEAT_ONE]) emit(MIN_REPEAT_ONE)
skip = _len(code); emit(0) skip = _len(code); emit(0)
emit(av[0]) emit(av[0])
emit(av[1]) emit(av[1])
_compile(code, av[2], flags) _compile(code, av[2], flags)
emit(OPCODES[SUCCESS]) emit(SUCCESS)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
else: else:
emit(OPCODES[REPEAT]) emit(REPEAT)
skip = _len(code); emit(0) skip = _len(code); emit(0)
emit(av[0]) emit(av[0])
emit(av[1]) emit(av[1])
_compile(code, av[2], flags) _compile(code, av[2], flags)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
if op is MAX_REPEAT: if op is MAX_REPEAT:
emit(OPCODES[MAX_UNTIL]) emit(MAX_UNTIL)
else: else:
emit(OPCODES[MIN_UNTIL]) emit(MIN_UNTIL)
elif op is SUBPATTERN: elif op is SUBPATTERN:
if av[0]: if av[0]:
emit(OPCODES[MARK]) emit(MARK)
emit((av[0]-1)*2) emit((av[0]-1)*2)
# _compile_info(code, av[1], flags) # _compile_info(code, av[1], flags)
_compile(code, av[1], flags) _compile(code, av[1], flags)
if av[0]: if av[0]:
emit(OPCODES[MARK]) emit(MARK)
emit((av[0]-1)*2+1) emit((av[0]-1)*2+1)
elif op in SUCCESS_CODES: elif op in SUCCESS_CODES:
emit(OPCODES[op]) emit(op)
elif op in ASSERT_CODES: elif op in ASSERT_CODES:
emit(OPCODES[op]) emit(op)
skip = _len(code); emit(0) skip = _len(code); emit(0)
if av[0] >= 0: if av[0] >= 0:
emit(0) # look ahead emit(0) # look ahead
...@@ -106,57 +105,57 @@ def _compile(code, pattern, flags): ...@@ -106,57 +105,57 @@ def _compile(code, pattern, flags):
raise error("look-behind requires fixed-width pattern") raise error("look-behind requires fixed-width pattern")
emit(lo) # look behind emit(lo) # look behind
_compile(code, av[1], flags) _compile(code, av[1], flags)
emit(OPCODES[SUCCESS]) emit(SUCCESS)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
elif op is CALL: elif op is CALL:
emit(OPCODES[op]) emit(op)
skip = _len(code); emit(0) skip = _len(code); emit(0)
_compile(code, av, flags) _compile(code, av, flags)
emit(OPCODES[SUCCESS]) emit(SUCCESS)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
elif op is AT: elif op is AT:
emit(OPCODES[op]) emit(op)
if flags & SRE_FLAG_MULTILINE: if flags & SRE_FLAG_MULTILINE:
av = AT_MULTILINE.get(av, av) av = AT_MULTILINE.get(av, av)
if flags & SRE_FLAG_LOCALE: if flags & SRE_FLAG_LOCALE:
av = AT_LOCALE.get(av, av) av = AT_LOCALE.get(av, av)
elif flags & SRE_FLAG_UNICODE: elif flags & SRE_FLAG_UNICODE:
av = AT_UNICODE.get(av, av) av = AT_UNICODE.get(av, av)
emit(ATCODES[av]) emit(av)
elif op is BRANCH: elif op is BRANCH:
emit(OPCODES[op]) emit(op)
tail = [] tail = []
tailappend = tail.append tailappend = tail.append
for av in av[1]: for av in av[1]:
skip = _len(code); emit(0) skip = _len(code); emit(0)
# _compile_info(code, av, flags) # _compile_info(code, av, flags)
_compile(code, av, flags) _compile(code, av, flags)
emit(OPCODES[JUMP]) emit(JUMP)
tailappend(_len(code)); emit(0) tailappend(_len(code)); emit(0)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
emit(0) # end of branch emit(0) # end of branch
for tail in tail: for tail in tail:
code[tail] = _len(code) - tail code[tail] = _len(code) - tail
elif op is CATEGORY: elif op is CATEGORY:
emit(OPCODES[op]) emit(op)
if flags & SRE_FLAG_LOCALE: if flags & SRE_FLAG_LOCALE:
av = CH_LOCALE[av] av = CH_LOCALE[av]
elif flags & SRE_FLAG_UNICODE: elif flags & SRE_FLAG_UNICODE:
av = CH_UNICODE[av] av = CH_UNICODE[av]
emit(CHCODES[av]) emit(av)
elif op is GROUPREF: elif op is GROUPREF:
if flags & SRE_FLAG_IGNORECASE: if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]]) emit(OP_IGNORE[op])
else: else:
emit(OPCODES[op]) emit(op)
emit(av-1) emit(av-1)
elif op is GROUPREF_EXISTS: elif op is GROUPREF_EXISTS:
emit(OPCODES[op]) emit(op)
emit(av[0]-1) emit(av[0]-1)
skipyes = _len(code); emit(0) skipyes = _len(code); emit(0)
_compile(code, av[1], flags) _compile(code, av[1], flags)
if av[2]: if av[2]:
emit(OPCODES[JUMP]) emit(JUMP)
skipno = _len(code); emit(0) skipno = _len(code); emit(0)
code[skipyes] = _len(code) - skipyes + 1 code[skipyes] = _len(code) - skipyes + 1
_compile(code, av[2], flags) _compile(code, av[2], flags)
...@@ -170,7 +169,7 @@ def _compile_charset(charset, flags, code, fixup=None): ...@@ -170,7 +169,7 @@ def _compile_charset(charset, flags, code, fixup=None):
# compile charset subprogram # compile charset subprogram
emit = code.append emit = code.append
for op, av in _optimize_charset(charset, fixup): for op, av in _optimize_charset(charset, fixup):
emit(OPCODES[op]) emit(op)
if op is NEGATE: if op is NEGATE:
pass pass
elif op is LITERAL: elif op is LITERAL:
...@@ -184,14 +183,14 @@ def _compile_charset(charset, flags, code, fixup=None): ...@@ -184,14 +183,14 @@ def _compile_charset(charset, flags, code, fixup=None):
code.extend(av) code.extend(av)
elif op is CATEGORY: elif op is CATEGORY:
if flags & SRE_FLAG_LOCALE: if flags & SRE_FLAG_LOCALE:
emit(CHCODES[CH_LOCALE[av]]) emit(CH_LOCALE[av])
elif flags & SRE_FLAG_UNICODE: elif flags & SRE_FLAG_UNICODE:
emit(CHCODES[CH_UNICODE[av]]) emit(CH_UNICODE[av])
else: else:
emit(CHCODES[av]) emit(av)
else: else:
raise error("internal: unsupported set operator") raise error("internal: unsupported set operator")
emit(OPCODES[FAILURE]) emit(FAILURE)
def _optimize_charset(charset, fixup): def _optimize_charset(charset, fixup):
# internal: optimize character set # internal: optimize character set
...@@ -414,7 +413,7 @@ def _compile_info(code, pattern, flags): ...@@ -414,7 +413,7 @@ def _compile_info(code, pattern, flags):
## print "*** CHARSET", charset ## print "*** CHARSET", charset
# add an info block # add an info block
emit = code.append emit = code.append
emit(OPCODES[INFO]) emit(INFO)
skip = len(code); emit(0) skip = len(code); emit(0)
# literal flag # literal flag
mask = 0 mask = 0
...@@ -460,7 +459,7 @@ def _code(p, flags): ...@@ -460,7 +459,7 @@ def _code(p, flags):
# compile the pattern # compile the pattern
_compile(code, p.data, flags) _compile(code, p.data, flags)
code.append(OPCODES[SUCCESS]) code.append(SUCCESS)
return code return code
...@@ -475,7 +474,7 @@ def compile(p, flags=0): ...@@ -475,7 +474,7 @@ def compile(p, flags=0):
code = _code(p, flags) code = _code(p, flags)
# print code # print(code)
# map in either direction # map in either direction
groupindex = p.pattern.groupdict groupindex = p.pattern.groupdict
......
...@@ -23,138 +23,81 @@ from _sre import MAXREPEAT, MAXGROUPS ...@@ -23,138 +23,81 @@ from _sre import MAXREPEAT, MAXGROUPS
class error(Exception): class error(Exception):
pass pass
# operators
FAILURE = "failure" class _NamedIntConstant(int):
SUCCESS = "success" def __new__(cls, value, name):
self = super(_NamedIntConstant, cls).__new__(cls, value)
ANY = "any" self.name = name
ANY_ALL = "any_all" return self
ASSERT = "assert"
ASSERT_NOT = "assert_not" def __str__(self):
AT = "at" return self.name
BIGCHARSET = "bigcharset"
BRANCH = "branch" __repr__ = __str__
CALL = "call"
CATEGORY = "category" MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT')
CHARSET = "charset"
GROUPREF = "groupref" def _makecodes(names):
GROUPREF_IGNORE = "groupref_ignore" names = names.strip().split()
GROUPREF_EXISTS = "groupref_exists" items = [_NamedIntConstant(i, name) for i, name in enumerate(names)]
IN = "in" globals().update({item.name: item for item in items})
IN_IGNORE = "in_ignore" return items
INFO = "info"
JUMP = "jump" # operators
LITERAL = "literal" # failure=0 success=1 (just because it looks better that way :-)
LITERAL_IGNORE = "literal_ignore" OPCODES = _makecodes("""
MARK = "mark" FAILURE SUCCESS
MAX_REPEAT = "max_repeat"
MAX_UNTIL = "max_until" ANY ANY_ALL
MIN_REPEAT = "min_repeat" ASSERT ASSERT_NOT
MIN_UNTIL = "min_until" AT
NEGATE = "negate" BRANCH
NOT_LITERAL = "not_literal" CALL
NOT_LITERAL_IGNORE = "not_literal_ignore" CATEGORY
RANGE = "range" CHARSET BIGCHARSET
RANGE_IGNORE = "range_ignore" GROUPREF GROUPREF_EXISTS GROUPREF_IGNORE
REPEAT = "repeat" IN IN_IGNORE
REPEAT_ONE = "repeat_one" INFO
SUBPATTERN = "subpattern" JUMP
MIN_REPEAT_ONE = "min_repeat_one" LITERAL LITERAL_IGNORE
MARK
MAX_UNTIL
MIN_UNTIL
NOT_LITERAL NOT_LITERAL_IGNORE
NEGATE
RANGE
REPEAT
REPEAT_ONE
SUBPATTERN
MIN_REPEAT_ONE
RANGE_IGNORE
MIN_REPEAT MAX_REPEAT
""")
del OPCODES[-2:] # remove MIN_REPEAT and MAX_REPEAT
# positions # positions
AT_BEGINNING = "at_beginning" ATCODES = _makecodes("""
AT_BEGINNING_LINE = "at_beginning_line" AT_BEGINNING AT_BEGINNING_LINE AT_BEGINNING_STRING
AT_BEGINNING_STRING = "at_beginning_string" AT_BOUNDARY AT_NON_BOUNDARY
AT_BOUNDARY = "at_boundary" AT_END AT_END_LINE AT_END_STRING
AT_NON_BOUNDARY = "at_non_boundary" AT_LOC_BOUNDARY AT_LOC_NON_BOUNDARY
AT_END = "at_end" AT_UNI_BOUNDARY AT_UNI_NON_BOUNDARY
AT_END_LINE = "at_end_line" """)
AT_END_STRING = "at_end_string"
AT_LOC_BOUNDARY = "at_loc_boundary"
AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
AT_UNI_BOUNDARY = "at_uni_boundary"
AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
# categories # categories
CATEGORY_DIGIT = "category_digit" CHCODES = _makecodes("""
CATEGORY_NOT_DIGIT = "category_not_digit" CATEGORY_DIGIT CATEGORY_NOT_DIGIT
CATEGORY_SPACE = "category_space" CATEGORY_SPACE CATEGORY_NOT_SPACE
CATEGORY_NOT_SPACE = "category_not_space" CATEGORY_WORD CATEGORY_NOT_WORD
CATEGORY_WORD = "category_word" CATEGORY_LINEBREAK CATEGORY_NOT_LINEBREAK
CATEGORY_NOT_WORD = "category_not_word" CATEGORY_LOC_WORD CATEGORY_LOC_NOT_WORD
CATEGORY_LINEBREAK = "category_linebreak" CATEGORY_UNI_DIGIT CATEGORY_UNI_NOT_DIGIT
CATEGORY_NOT_LINEBREAK = "category_not_linebreak" CATEGORY_UNI_SPACE CATEGORY_UNI_NOT_SPACE
CATEGORY_LOC_WORD = "category_loc_word" CATEGORY_UNI_WORD CATEGORY_UNI_NOT_WORD
CATEGORY_LOC_NOT_WORD = "category_loc_not_word" CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK
CATEGORY_UNI_DIGIT = "category_uni_digit" """)
CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
CATEGORY_UNI_SPACE = "category_uni_space"
CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
CATEGORY_UNI_WORD = "category_uni_word"
CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
OPCODES = [
# failure=0 success=1 (just because it looks better that way :-)
FAILURE, SUCCESS,
ANY, ANY_ALL,
ASSERT, ASSERT_NOT,
AT,
BRANCH,
CALL,
CATEGORY,
CHARSET, BIGCHARSET,
GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
IN, IN_IGNORE,
INFO,
JUMP,
LITERAL, LITERAL_IGNORE,
MARK,
MAX_UNTIL,
MIN_UNTIL,
NOT_LITERAL, NOT_LITERAL_IGNORE,
NEGATE,
RANGE,
REPEAT,
REPEAT_ONE,
SUBPATTERN,
MIN_REPEAT_ONE,
RANGE_IGNORE,
]
ATCODES = [
AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
AT_UNI_NON_BOUNDARY
]
CHCODES = [
CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
CATEGORY_UNI_NOT_LINEBREAK
]
def makedict(list):
d = {}
i = 0
for item in list:
d[item] = i
i = i + 1
return d
OPCODES = makedict(OPCODES)
ATCODES = makedict(ATCODES)
CHCODES = makedict(CHCODES)
# replacement operations for "ignore case" mode # replacement operations for "ignore case" mode
OP_IGNORE = { OP_IGNORE = {
...@@ -220,9 +163,9 @@ SRE_INFO_CHARSET = 4 # pattern starts with character from given set ...@@ -220,9 +163,9 @@ SRE_INFO_CHARSET = 4 # pattern starts with character from given set
if __name__ == "__main__": if __name__ == "__main__":
def dump(f, d, prefix): def dump(f, d, prefix):
items = sorted(d.items(), key=lambda a: a[1]) items = sorted(d)
for k, v in items: for item in items:
f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) f.write("#define %s_%s %d\n" % (prefix, item, item))
f = open("sre_constants.h", "w") f = open("sre_constants.h", "w")
f.write("""\ f.write("""\
/* /*
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# XXX: show string offset and offending character for all errors # XXX: show string offset and offending character for all errors
from sre_constants import * from sre_constants import *
from _sre import MAXREPEAT
SPECIAL_CHARS = ".\\[{()*+?^$|" SPECIAL_CHARS = ".\\[{()*+?^$|"
REPEAT_CHARS = "*+?{" REPEAT_CHARS = "*+?{"
...@@ -103,24 +102,24 @@ class SubPattern: ...@@ -103,24 +102,24 @@ class SubPattern:
nl = True nl = True
seqtypes = (tuple, list) seqtypes = (tuple, list)
for op, av in self.data: for op, av in self.data:
print(level*" " + op, end='') print(level*" " + str(op), end='')
if op == IN: if op == IN:
# member sublanguage # member sublanguage
print() print()
for op, a in av: for op, a in av:
print((level+1)*" " + op, a) print((level+1)*" " + str(op), a)
elif op == BRANCH: elif op == BRANCH:
print() print()
for i, a in enumerate(av[1]): for i, a in enumerate(av[1]):
if i: if i:
print(level*" " + "or") print(level*" " + "OR")
a.dump(level+1) a.dump(level+1)
elif op == GROUPREF_EXISTS: elif op == GROUPREF_EXISTS:
condgroup, item_yes, item_no = av condgroup, item_yes, item_no = av
print('', condgroup) print('', condgroup)
item_yes.dump(level+1) item_yes.dump(level+1)
if item_no: if item_no:
print(level*" " + "else") print(level*" " + "ELSE")
item_no.dump(level+1) item_no.dump(level+1)
elif isinstance(av, seqtypes): elif isinstance(av, seqtypes):
nl = False nl = False
......
...@@ -1285,22 +1285,22 @@ class ReTests(unittest.TestCase): ...@@ -1285,22 +1285,22 @@ class ReTests(unittest.TestCase):
with captured_stdout() as out: with captured_stdout() as out:
re.compile(pat, re.DEBUG) re.compile(pat, re.DEBUG)
dump = '''\ dump = '''\
subpattern 1 SUBPATTERN 1
literal 46 LITERAL 46
subpattern None SUBPATTERN None
branch BRANCH
in IN
literal 99 LITERAL 99
literal 104 LITERAL 104
or OR
literal 112 LITERAL 112
literal 121 LITERAL 121
subpattern None SUBPATTERN None
groupref_exists 1 GROUPREF_EXISTS 1
at at_end AT AT_END
else ELSE
literal 58 LITERAL 58
literal 32 LITERAL 32
''' '''
self.assertEqual(out.getvalue(), dump) self.assertEqual(out.getvalue(), dump)
# Debug output is output again even a second time (bypassing # Debug output is output again even a second time (bypassing
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment