Commit 37b10a7d authored by Raymond Hettinger's avatar Raymond Hettinger

Simple optimizations:

* pre-build a single identity function for the fixup function
* pre-build membership tests in dictionaries instead of in-line tuples
* assign len() to a local variable
* assign append() methods to a local variable
* use xrange() instead of range()
* replace "x<<1" with "x+x"
parent a2d958da
...@@ -21,11 +21,25 @@ if _sre.CODESIZE == 2: ...@@ -21,11 +21,25 @@ if _sre.CODESIZE == 2:
else: else:
MAXCODE = 0xFFFFFFFFL MAXCODE = 0xFFFFFFFFL
def _identityfunction(x):
return x
# use xrange if available
try:
xrange
except NameError:
xrange = range
def _compile(code, pattern, flags): def _compile(code, pattern, flags):
# internal: compile a (sub)pattern # internal: compile a (sub)pattern
emit = code.append emit = code.append
_len = len
LITERAL_CODES = {LITERAL:1, NOT_LITERAL:1}
REPEATING_CODES = {REPEAT:1, MIN_REPEAT:1, MAX_REPEAT:1}
SUCCESS_CODES = {SUCCESS:1, FAILURE:1}
ASSERT_CODES = {ASSERT:1, ASSERT_NOT:1}
for op, av in pattern: for op, av in pattern:
if op in (LITERAL, NOT_LITERAL): if op in LITERAL_CODES:
if flags & SRE_FLAG_IGNORECASE: if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]]) emit(OPCODES[OP_IGNORE[op]])
emit(_sre.getlower(av, flags)) emit(_sre.getlower(av, flags))
...@@ -39,44 +53,44 @@ def _compile(code, pattern, flags): ...@@ -39,44 +53,44 @@ def _compile(code, pattern, flags):
return _sre.getlower(literal, flags) return _sre.getlower(literal, flags)
else: else:
emit(OPCODES[op]) emit(OPCODES[op])
fixup = lambda x: x fixup = _identityfunction
skip = len(code); emit(0) skip = _len(code); emit(0)
_compile_charset(av, flags, code, fixup) _compile_charset(av, flags, code, fixup)
code[skip] = len(code) - skip code[skip] = _len(code) - skip
elif op is ANY: elif op is ANY:
if flags & SRE_FLAG_DOTALL: if flags & SRE_FLAG_DOTALL:
emit(OPCODES[ANY_ALL]) emit(OPCODES[ANY_ALL])
else: else:
emit(OPCODES[ANY]) emit(OPCODES[ANY])
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE: if flags & SRE_FLAG_TEMPLATE:
raise error, "internal: unsupported template operator" raise error, "internal: unsupported template operator"
emit(OPCODES[REPEAT]) emit(OPCODES[REPEAT])
skip = len(code); emit(0) skip = _len(code); emit(0)
emit(av[0]) emit(av[0])
emit(av[1]) emit(av[1])
_compile(code, av[2], flags) _compile(code, av[2], flags)
emit(OPCODES[SUCCESS]) emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip code[skip] = _len(code) - skip
elif _simple(av) and op != REPEAT: elif _simple(av) and op is not REPEAT:
if op == MAX_REPEAT: if op is MAX_REPEAT:
emit(OPCODES[REPEAT_ONE]) emit(OPCODES[REPEAT_ONE])
else: else:
emit(OPCODES[MIN_REPEAT_ONE]) emit(OPCODES[MIN_REPEAT_ONE])
skip = len(code); emit(0) skip = _len(code); emit(0)
emit(av[0]) emit(av[0])
emit(av[1]) emit(av[1])
_compile(code, av[2], flags) _compile(code, av[2], flags)
emit(OPCODES[SUCCESS]) emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip code[skip] = _len(code) - skip
else: else:
emit(OPCODES[REPEAT]) emit(OPCODES[REPEAT])
skip = len(code); emit(0) skip = _len(code); emit(0)
emit(av[0]) emit(av[0])
emit(av[1]) emit(av[1])
_compile(code, av[2], flags) _compile(code, av[2], flags)
code[skip] = len(code) - skip code[skip] = _len(code) - skip
if op == MAX_REPEAT: if op is MAX_REPEAT:
emit(OPCODES[MAX_UNTIL]) emit(OPCODES[MAX_UNTIL])
else: else:
emit(OPCODES[MIN_UNTIL]) emit(OPCODES[MIN_UNTIL])
...@@ -89,11 +103,11 @@ def _compile(code, pattern, flags): ...@@ -89,11 +103,11 @@ def _compile(code, pattern, flags):
if av[0]: if av[0]:
emit(OPCODES[MARK]) emit(OPCODES[MARK])
emit((av[0]-1)*2+1) emit((av[0]-1)*2+1)
elif op in (SUCCESS, FAILURE): elif op in SUCCESS_CODES:
emit(OPCODES[op]) emit(OPCODES[op])
elif op in (ASSERT, ASSERT_NOT): elif op in ASSERT_CODES:
emit(OPCODES[op]) emit(OPCODES[op])
skip = len(code); emit(0) skip = _len(code); emit(0)
if av[0] >= 0: if av[0] >= 0:
emit(0) # look ahead emit(0) # look ahead
else: else:
...@@ -103,13 +117,13 @@ def _compile(code, pattern, flags): ...@@ -103,13 +117,13 @@ def _compile(code, pattern, flags):
emit(lo) # look behind emit(lo) # look behind
_compile(code, av[1], flags) _compile(code, av[1], flags)
emit(OPCODES[SUCCESS]) emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip code[skip] = _len(code) - skip
elif op is CALL: elif op is CALL:
emit(OPCODES[op]) emit(OPCODES[op])
skip = len(code); emit(0) skip = _len(code); emit(0)
_compile(code, av, flags) _compile(code, av, flags)
emit(OPCODES[SUCCESS]) emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip code[skip] = _len(code) - skip
elif op is AT: elif op is AT:
emit(OPCODES[op]) emit(OPCODES[op])
if flags & SRE_FLAG_MULTILINE: if flags & SRE_FLAG_MULTILINE:
...@@ -122,16 +136,17 @@ def _compile(code, pattern, flags): ...@@ -122,16 +136,17 @@ def _compile(code, pattern, flags):
elif op is BRANCH: elif op is BRANCH:
emit(OPCODES[op]) emit(OPCODES[op])
tail = [] tail = []
tailappend = tail.append
for av in av[1]: for av in av[1]:
skip = len(code); emit(0) skip = _len(code); emit(0)
# _compile_info(code, av, flags) # _compile_info(code, av, flags)
_compile(code, av, flags) _compile(code, av, flags)
emit(OPCODES[JUMP]) emit(OPCODES[JUMP])
tail.append(len(code)); emit(0) tailappend(_len(code)); emit(0)
code[skip] = len(code) - skip code[skip] = _len(code) - skip
emit(0) # end of branch emit(0) # end of branch
for tail in tail: for tail in tail:
code[tail] = len(code) - tail code[tail] = _len(code) - tail
elif op is CATEGORY: elif op is CATEGORY:
emit(OPCODES[op]) emit(OPCODES[op])
if flags & SRE_FLAG_LOCALE: if flags & SRE_FLAG_LOCALE:
...@@ -148,16 +163,16 @@ def _compile(code, pattern, flags): ...@@ -148,16 +163,16 @@ def _compile(code, pattern, flags):
elif op is GROUPREF_EXISTS: elif op is GROUPREF_EXISTS:
emit(OPCODES[op]) emit(OPCODES[op])
emit((av[0]-1)*2) emit((av[0]-1)*2)
skipyes = len(code); emit(0) skipyes = _len(code); emit(0)
_compile(code, av[1], flags) _compile(code, av[1], flags)
if av[2]: if av[2]:
emit(OPCODES[JUMP]) emit(OPCODES[JUMP])
skipno = len(code); emit(0) skipno = _len(code); emit(0)
code[skipyes] = len(code) - skipyes + 1 code[skipyes] = _len(code) - skipyes + 1
_compile(code, av[2], flags) _compile(code, av[2], flags)
code[skipno] = len(code) - skipno code[skipno] = _len(code) - skipno
else: else:
code[skipyes] = len(code) - skipyes + 1 code[skipyes] = _len(code) - skipyes + 1
else: else:
raise ValueError, ("unsupported operand type", op) raise ValueError, ("unsupported operand type", op)
...@@ -165,7 +180,7 @@ def _compile_charset(charset, flags, code, fixup=None): ...@@ -165,7 +180,7 @@ def _compile_charset(charset, flags, code, fixup=None):
# compile charset subprogram # compile charset subprogram
emit = code.append emit = code.append
if fixup is None: if fixup is None:
fixup = lambda x: x fixup = _identityfunction
for op, av in _optimize_charset(charset, fixup): for op, av in _optimize_charset(charset, fixup):
emit(OPCODES[op]) emit(OPCODES[op])
if op is NEGATE: if op is NEGATE:
...@@ -193,11 +208,12 @@ def _compile_charset(charset, flags, code, fixup=None): ...@@ -193,11 +208,12 @@ def _compile_charset(charset, flags, code, fixup=None):
def _optimize_charset(charset, fixup): def _optimize_charset(charset, fixup):
# internal: optimize character set # internal: optimize character set
out = [] out = []
outappend = out.append
charmap = [False]*256 charmap = [False]*256
try: try:
for op, av in charset: for op, av in charset:
if op is NEGATE: if op is NEGATE:
out.append((op, av)) outappend((op, av))
elif op is LITERAL: elif op is LITERAL:
charmap[fixup(av)] = True charmap[fixup(av)] = True
elif op is RANGE: elif op is RANGE:
...@@ -212,35 +228,37 @@ def _optimize_charset(charset, fixup): ...@@ -212,35 +228,37 @@ def _optimize_charset(charset, fixup):
# compress character map # compress character map
i = p = n = 0 i = p = n = 0
runs = [] runs = []
runsappend = runs.append
for c in charmap: for c in charmap:
if c: if c:
if n == 0: if n == 0:
p = i p = i
n = n + 1 n = n + 1
elif n: elif n:
runs.append((p, n)) runsappend((p, n))
n = 0 n = 0
i = i + 1 i = i + 1
if n: if n:
runs.append((p, n)) runsappend((p, n))
if len(runs) <= 2: if len(runs) <= 2:
# use literal/range # use literal/range
for p, n in runs: for p, n in runs:
if n == 1: if n == 1:
out.append((LITERAL, p)) outappend((LITERAL, p))
else: else:
out.append((RANGE, (p, p+n-1))) outappend((RANGE, (p, p+n-1)))
if len(out) < len(charset): if len(out) < len(charset):
return out return out
else: else:
# use bitmap # use bitmap
data = _mk_bitmap(charmap) data = _mk_bitmap(charmap)
out.append((CHARSET, data)) outappend((CHARSET, data))
return out return out
return charset return charset
def _mk_bitmap(bits): def _mk_bitmap(bits):
data = [] data = []
dataappend = data.append
if _sre.CODESIZE == 2: if _sre.CODESIZE == 2:
start = (1, 0) start = (1, 0)
else: else:
...@@ -249,9 +267,9 @@ def _mk_bitmap(bits): ...@@ -249,9 +267,9 @@ def _mk_bitmap(bits):
for c in bits: for c in bits:
if c: if c:
v = v + m v = v + m
m = m << 1 m = m + m
if m > MAXCODE: if m > MAXCODE:
data.append(v) dataappend(v)
m, v = start m, v = start
return data return data
...@@ -295,7 +313,7 @@ def _optimize_unicode(charset, fixup): ...@@ -295,7 +313,7 @@ def _optimize_unicode(charset, fixup):
elif op is LITERAL: elif op is LITERAL:
charmap[fixup(av)] = True charmap[fixup(av)] = True
elif op is RANGE: elif op is RANGE:
for i in range(fixup(av[0]), fixup(av[1])+1): for i in xrange(fixup(av[0]), fixup(av[1])+1):
charmap[i] = True charmap[i] = True
elif op is CATEGORY: elif op is CATEGORY:
# XXX: could expand category # XXX: could expand category
...@@ -307,13 +325,13 @@ def _optimize_unicode(charset, fixup): ...@@ -307,13 +325,13 @@ def _optimize_unicode(charset, fixup):
if sys.maxunicode != 65535: if sys.maxunicode != 65535:
# XXX: negation does not work with big charsets # XXX: negation does not work with big charsets
return charset return charset
for i in range(65536): for i in xrange(65536):
charmap[i] = not charmap[i] charmap[i] = not charmap[i]
comps = {} comps = {}
mapping = [0]*256 mapping = [0]*256
block = 0 block = 0
data = [] data = []
for i in range(256): for i in xrange(256):
chunk = tuple(charmap[i*256:(i+1)*256]) chunk = tuple(charmap[i*256:(i+1)*256])
new = comps.setdefault(chunk, block) new = comps.setdefault(chunk, block)
mapping[i] = new mapping[i] = new
...@@ -348,19 +366,21 @@ def _compile_info(code, pattern, flags): ...@@ -348,19 +366,21 @@ def _compile_info(code, pattern, flags):
return # not worth it return # not worth it
# look for a literal prefix # look for a literal prefix
prefix = [] prefix = []
prefixappend = prefix.append
prefix_skip = 0 prefix_skip = 0
charset = [] # not used charset = [] # not used
charsetappend = charset.append
if not (flags & SRE_FLAG_IGNORECASE): if not (flags & SRE_FLAG_IGNORECASE):
# look for literal prefix # look for literal prefix
for op, av in pattern.data: for op, av in pattern.data:
if op is LITERAL: if op is LITERAL:
if len(prefix) == prefix_skip: if len(prefix) == prefix_skip:
prefix_skip = prefix_skip + 1 prefix_skip = prefix_skip + 1
prefix.append(av) prefixappend(av)
elif op is SUBPATTERN and len(av[1]) == 1: elif op is SUBPATTERN and len(av[1]) == 1:
op, av = av[1][0] op, av = av[1][0]
if op is LITERAL: if op is LITERAL:
prefix.append(av) prefixappend(av)
else: else:
break break
else: else:
...@@ -371,27 +391,29 @@ def _compile_info(code, pattern, flags): ...@@ -371,27 +391,29 @@ def _compile_info(code, pattern, flags):
if op is SUBPATTERN and av[1]: if op is SUBPATTERN and av[1]:
op, av = av[1][0] op, av = av[1][0]
if op is LITERAL: if op is LITERAL:
charset.append((op, av)) charsetappend((op, av))
elif op is BRANCH: elif op is BRANCH:
c = [] c = []
cappend = c.append
for p in av[1]: for p in av[1]:
if not p: if not p:
break break
op, av = p[0] op, av = p[0]
if op is LITERAL: if op is LITERAL:
c.append((op, av)) cappend((op, av))
else: else:
break break
else: else:
charset = c charset = c
elif op is BRANCH: elif op is BRANCH:
c = [] c = []
cappend = c.append
for p in av[1]: for p in av[1]:
if not p: if not p:
break break
op, av = p[0] op, av = p[0]
if op is LITERAL: if op is LITERAL:
c.append((op, av)) cappend((op, av))
else: else:
break break
else: else:
...@@ -432,7 +454,7 @@ def _compile_info(code, pattern, flags): ...@@ -432,7 +454,7 @@ def _compile_info(code, pattern, flags):
code.extend(prefix) code.extend(prefix)
# generate overlap table # generate overlap table
table = [-1] + ([0]*len(prefix)) table = [-1] + ([0]*len(prefix))
for i in range(len(prefix)): for i in xrange(len(prefix)):
table[i+1] = table[i]+1 table[i+1] = table[i]+1
while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
table[i+1] = table[table[i+1]-1]+1 table[i+1] = table[table[i+1]-1]+1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment