Commit a3369a52 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issues #814253, #9179: Warnings now are raised when group references and

conditional group references are used in lookbehind assertions in regular
expressions.
parent a1543cdc
......@@ -281,7 +281,9 @@ The special characters are:
assertion`. ``(?<=abc)def`` will find a match in ``abcdef``, since the
lookbehind will back up 3 characters and check if the contained pattern matches.
The contained pattern must only match strings of some fixed length, meaning that
``abc`` or ``a|b`` are allowed, but ``a*`` and ``a{3,4}`` are not. Note that
``abc`` or ``a|b`` are allowed, but ``a*`` and ``a{3,4}`` are not. Group
references are not supported even if they match strings of some fixed length.
Note that
patterns which start with positive lookbehind assertions will not match at the
beginning of the string being searched; you will most likely want to use the
:func:`search` function rather than the :func:`match` function:
......@@ -301,7 +303,8 @@ The special characters are:
Matches if the current position in the string is not preceded by a match for
``...``. This is called a :dfn:`negative lookbehind assertion`. Similar to
positive lookbehind assertions, the contained pattern must only match strings of
some fixed length. Patterns which start with negative lookbehind assertions may
some fixed length and shouldn't contain group references.
Patterns which start with negative lookbehind assertions may
match at the beginning of the string being searched.
``(?(id/name)yes-pattern|no-pattern)``
......
......@@ -69,6 +69,8 @@ class Pattern:
self.open = []
self.groups = 1
self.groupdict = {}
self.lookbehind = 0
def opengroup(self, name=None):
gid = self.groups
self.groups = gid + 1
......@@ -352,6 +354,11 @@ def _escape(source, escape, state):
if group < state.groups:
if not state.checkgroup(group):
raise error("cannot refer to open group")
if state.lookbehind:
import warnings
warnings.warn('group references in lookbehind '
'assertions are not supported',
RuntimeWarning)
return GROUPREF, group
raise ValueError
if len(escape) == 2:
......@@ -630,6 +637,11 @@ def _parse(source, state):
if gid is None:
msg = "unknown group name: {0!r}".format(name)
raise error(msg)
if state.lookbehind:
import warnings
warnings.warn('group references in lookbehind '
'assertions are not supported',
RuntimeWarning)
subpatternappend((GROUPREF, gid))
continue
else:
......@@ -658,7 +670,10 @@ def _parse(source, state):
raise error("syntax error")
dir = -1 # lookbehind
char = sourceget()
state.lookbehind += 1
p = _parse_sub(source, state)
if dir < 0:
state.lookbehind -= 1
if not sourcematch(")"):
raise error("unbalanced parenthesis")
if char == "=":
......@@ -689,6 +704,11 @@ def _parse(source, state):
condgroup = int(condname)
except ValueError:
raise error("bad character in group name")
if state.lookbehind:
import warnings
warnings.warn('group references in lookbehind '
'assertions are not supported',
RuntimeWarning)
else:
# flags
if not source.next in FLAGS:
......
......@@ -557,7 +557,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
"a\n\nb")
def test_non_consuming(self):
def test_lookahead(self):
self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
......@@ -571,6 +571,37 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
# Group reference.
self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
# Named group reference.
self.assertTrue(re.match(r'(?P<g>a)b(?=(?P=g))a', 'aba'))
self.assertIsNone(re.match(r'(?P<g>a)b(?=(?P=g))c', 'abac'))
# Conditional group reference.
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
# Group used before defined.
self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))
self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))
self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))
def test_lookbehind(self):
self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))
self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))
self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
# Group reference.
self.assertWarns(RuntimeWarning, re.compile, r'(a)a(?<=\1)c')
# Named group reference.
self.assertWarns(RuntimeWarning, re.compile, r'(?P<g>a)a(?<=(?P=g))c')
# Conditional group reference.
self.assertWarns(RuntimeWarning, re.compile, r'(a)b(?<=(?(1)b|x))c')
# Group used before defined.
self.assertWarns(RuntimeWarning, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
def test_ignore_case(self):
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
......
......@@ -13,6 +13,10 @@ Core and Builtins
Library
-------
- Issues #814253, #9179: Warnings now are raised when group references and
conditional group references are used in lookbehind assertions in regular
expressions.
- Issue #23215: Multibyte codecs with custom error handlers that ignores errors
consumed too much memory and raised SystemError or MemoryError.
Original patch by Aleksi Torhamo.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment