Commit 821a9d14 authored by Serhiy Storchaka's avatar Serhiy Storchaka Committed by GitHub

bpo-30340: Enhanced regular expressions optimization. (#1542)

This increased the performance of matching some patterns up to 25 times.
parent cbddf58c
......@@ -20,6 +20,7 @@ _LITERAL_CODES = {LITERAL, NOT_LITERAL}
_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
_SUCCESS_CODES = {SUCCESS, FAILURE}
_ASSERT_CODES = {ASSERT, ASSERT_NOT}
_UNIT_CODES = _LITERAL_CODES | {ANY, IN}
# Sets of lowercase characters which have the same uppercase.
_equivalences = (
......@@ -125,7 +126,7 @@ def _compile(code, pattern, flags):
elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE:
raise error("internal: unsupported template operator %r" % (op,))
elif _simple(av) and op is not REPEAT:
if _simple(av[2]):
if op is MAX_REPEAT:
emit(REPEAT_ONE)
else:
......@@ -404,10 +405,14 @@ def _bytes_to_codes(b):
assert len(a) * a.itemsize == len(b)
return a.tolist()
def _simple(av):
# check if av is a "simple" operator
lo, hi = av[2].getwidth()
return lo == hi == 1 and av[2][0][0] != SUBPATTERN
def _simple(p):
# check if this subpattern is a "simple" operator
if len(p) != 1:
return False
op, av = p[0]
if op is SUBPATTERN:
return av[0] is None and _simple(av[-1])
return op in _UNIT_CODES
def _generate_overlap_table(prefix):
"""
......
This diff is collapsed.
......@@ -1695,20 +1695,18 @@ class ReTests(unittest.TestCase):
dump = '''\
SUBPATTERN 1 0 0
LITERAL 46
SUBPATTERN None 0 0
BRANCH
IN
LITERAL 99
LITERAL 104
OR
LITERAL 112
LITERAL 121
SUBPATTERN None 0 0
GROUPREF_EXISTS 1
AT AT_END
ELSE
LITERAL 58
LITERAL 32
BRANCH
IN
LITERAL 99
LITERAL 104
OR
LITERAL 112
LITERAL 121
GROUPREF_EXISTS 1
AT AT_END
ELSE
LITERAL 58
LITERAL 32
'''
self.assertEqual(out.getvalue(), dump)
# Debug output is output again even a second time (bypassing
......
......@@ -326,6 +326,9 @@ Library
- bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is
running coroutine and the coroutine returned without any more ``await``.
- bpo-30340: Enhanced regular expressions optimization. This increased
the performance of matching some patterns up to 25 times.
- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers.
Now allowed several subsequential inline modifiers at the start of the
pattern (e.g. ``'(?i)(?s)...'``). In verbose mode whitespaces and comments
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment