Commit 305ccbe2 authored by Serhiy Storchaka's avatar Serhiy Storchaka Committed by GitHub

bpo-30298: Weaken the condition of deprecation warnings for inline modifiers. (#1490)

Now allowed several subsequential inline modifiers at the start of the
pattern (e.g. '(?i)(?s)...').  In verbose mode whitespaces and comments
now are allowed before and between inline modifiers (e.g.
'(?x) (?i) (?s)...').
parent 211a392c
...@@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True): ...@@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True):
sourcematch = source.match sourcematch = source.match
start = source.tell() start = source.tell()
while True: while True:
itemsappend(_parse(source, state, verbose)) itemsappend(_parse(source, state, verbose, not nested and not items))
if not sourcematch("|"): if not sourcematch("|"):
break break
...@@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose): ...@@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose):
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
return subpattern return subpattern
def _parse(source, state, verbose): def _parse(source, state, verbose, first=False):
# parse a simple pattern # parse a simple pattern
subpattern = SubPattern(state) subpattern = SubPattern(state)
...@@ -730,10 +730,9 @@ def _parse(source, state, verbose): ...@@ -730,10 +730,9 @@ def _parse(source, state, verbose):
state.checklookbehindgroup(condgroup, source) state.checklookbehindgroup(condgroup, source)
elif char in FLAGS or char == "-": elif char in FLAGS or char == "-":
# flags # flags
pos = source.pos
flags = _parse_flags(source, state, char) flags = _parse_flags(source, state, char)
if flags is None: # global flags if flags is None: # global flags
if pos != 3: # "(?x" if not first or subpattern:
import warnings import warnings
warnings.warn( warnings.warn(
'Flags not at the start of the expression %s%s' % ( 'Flags not at the start of the expression %s%s' % (
...@@ -742,6 +741,8 @@ def _parse(source, state, verbose): ...@@ -742,6 +741,8 @@ def _parse(source, state, verbose):
), ),
DeprecationWarning, stacklevel=7 DeprecationWarning, stacklevel=7
) )
if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
raise Verbose
continue continue
add_flags, del_flags = flags add_flags, del_flags = flags
group = None group = None
...@@ -795,9 +796,6 @@ def _parse_flags(source, state, char): ...@@ -795,9 +796,6 @@ def _parse_flags(source, state, char):
msg = "unknown flag" if char.isalpha() else "missing -, : or )" msg = "unknown flag" if char.isalpha() else "missing -, : or )"
raise source.error(msg, len(char)) raise source.error(msg, len(char))
if char == ")": if char == ")":
if ((add_flags & SRE_FLAG_VERBOSE) and
not (state.flags & SRE_FLAG_VERBOSE)):
raise Verbose
state.flags |= add_flags state.flags |= add_flags
return None return None
if add_flags & GLOBAL_FLAGS: if add_flags & GLOBAL_FLAGS:
......
...@@ -1325,32 +1325,43 @@ class ReTests(unittest.TestCase): ...@@ -1325,32 +1325,43 @@ class ReTests(unittest.TestCase):
upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
lower_char = '\u1ea1' # Latin Small Letter A with Dot Below lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
p = re.compile(upper_char, re.I | re.U) p = re.compile('.' + upper_char, re.I | re.S)
q = p.match(lower_char) q = p.match('\n' + lower_char)
self.assertTrue(q) self.assertTrue(q)
p = re.compile(lower_char, re.I | re.U) p = re.compile('.' + lower_char, re.I | re.S)
q = p.match(upper_char) q = p.match('\n' + upper_char)
self.assertTrue(q) self.assertTrue(q)
p = re.compile('(?i)' + upper_char, re.U) p = re.compile('(?i).' + upper_char, re.S)
q = p.match(lower_char) q = p.match('\n' + lower_char)
self.assertTrue(q) self.assertTrue(q)
p = re.compile('(?i)' + lower_char, re.U) p = re.compile('(?i).' + lower_char, re.S)
q = p.match(upper_char) q = p.match('\n' + upper_char)
self.assertTrue(q) self.assertTrue(q)
p = re.compile('(?iu)' + upper_char) p = re.compile('(?is).' + upper_char)
q = p.match(lower_char) q = p.match('\n' + lower_char)
self.assertTrue(q) self.assertTrue(q)
p = re.compile('(?iu)' + lower_char) p = re.compile('(?is).' + lower_char)
q = p.match(upper_char) q = p.match('\n' + upper_char)
self.assertTrue(q) self.assertTrue(q)
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char)) p = re.compile('(?s)(?i).' + upper_char)
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char)) q = p.match('\n' + lower_char)
self.assertTrue(q)
p = re.compile('(?s)(?i).' + lower_char)
q = p.match('\n' + upper_char)
self.assertTrue(q)
self.assertTrue(re.match('(?ix) ' + upper_char, lower_char))
self.assertTrue(re.match('(?ix) ' + lower_char, upper_char))
self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X))
self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char))
self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X))
p = upper_char + '(?i)' p = upper_char + '(?i)'
with self.assertWarns(DeprecationWarning) as warns: with self.assertWarns(DeprecationWarning) as warns:
...@@ -1368,6 +1379,26 @@ class ReTests(unittest.TestCase): ...@@ -1368,6 +1379,26 @@ class ReTests(unittest.TestCase):
'Flags not at the start of the expression %s (truncated)' % p[:20] 'Flags not at the start of the expression %s (truncated)' % p[:20]
) )
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
lower_char))
def test_dollar_matches_twice(self): def test_dollar_matches_twice(self):
"$ matches the end of string, and just before the terminating \n" "$ matches the end of string, and just before the terminating \n"
pattern = re.compile('$') pattern = re.compile('$')
......
...@@ -320,6 +320,12 @@ Extension Modules ...@@ -320,6 +320,12 @@ Extension Modules
Library Library
------- -------
- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers.
Now allowed several subsequential inline modifiers at the start of the
pattern (e.g. ``'(?i)(?s)...'``). In verbose mode whitespaces and comments
now are allowed before and between inline modifiers (e.g.
``'(?x) (?i) (?s)...'``).
- bpo-30285: Optimized case-insensitive matching and searching of regular - bpo-30285: Optimized case-insensitive matching and searching of regular
expressions. expressions.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment