Commit 9bd85b83 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #27030: Unknown escapes consisting of ``'\'`` and ASCII letter in

regular expressions now are errors.
parent d35bf032
...@@ -317,8 +317,9 @@ The special characters are: ...@@ -317,8 +317,9 @@ The special characters are:
The special sequences consist of ``'\'`` and a character from the list below. The special sequences consist of ``'\'`` and a character from the list below.
If the ordinary character is not on the list, then the resulting RE will match If the ordinary character is not ASCII digit or ASCII letter, then the
the second character. For example, ``\$`` matches the character ``'$'``. resulting RE will match the second character. For example, ``\$`` matches the
character ``'$'``.
``\number`` ``\number``
Matches the contents of the group of the same number. Groups are numbered Matches the contents of the group of the same number. Groups are numbered
...@@ -438,9 +439,8 @@ three digits in length. ...@@ -438,9 +439,8 @@ three digits in length.
.. versionchanged:: 3.3 .. versionchanged:: 3.3
The ``'\u'`` and ``'\U'`` escape sequences have been added. The ``'\u'`` and ``'\U'`` escape sequences have been added.
.. deprecated-removed:: 3.5 3.6 .. versionchanged:: 3.6
Unknown escapes consist of ``'\'`` and ASCII letter now raise a Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
deprecation warning and will be forbidden in Python 3.6.
.. seealso:: .. seealso::
...@@ -528,11 +528,11 @@ form. ...@@ -528,11 +528,11 @@ form.
current locale. The use of this flag is discouraged as the locale mechanism current locale. The use of this flag is discouraged as the locale mechanism
is very unreliable, and it only handles one "culture" at a time anyway; is very unreliable, and it only handles one "culture" at a time anyway;
you should use Unicode matching instead, which is the default in Python 3 you should use Unicode matching instead, which is the default in Python 3
for Unicode (str) patterns. This flag makes sense only with bytes patterns. for Unicode (str) patterns. This flag can be used only with bytes patterns.
.. deprecated-removed:: 3.5 3.6 .. versionchanged:: 3.6
Deprecated the use of :const:`re.LOCALE` with string patterns or :const:`re.LOCALE` can be used only with bytes patterns and is
:const:`re.ASCII`. not compatible with :const:`re.ASCII`.
.. data:: M .. data:: M
...@@ -738,9 +738,8 @@ form. ...@@ -738,9 +738,8 @@ form.
.. versionchanged:: 3.5 .. versionchanged:: 3.5
Unmatched groups are replaced with an empty string. Unmatched groups are replaced with an empty string.
.. deprecated-removed:: 3.5 3.6 .. versionchanged:: 3.6
Unknown escapes consist of ``'\'`` and ASCII letter now raise a Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
deprecation warning and will be forbidden in Python 3.6.
.. function:: subn(pattern, repl, string, count=0, flags=0) .. function:: subn(pattern, repl, string, count=0, flags=0)
......
...@@ -282,33 +282,6 @@ class Tokenizer: ...@@ -282,33 +282,6 @@ class Tokenizer:
def error(self, msg, offset=0): def error(self, msg, offset=0):
return error(msg, self.string, self.tell() - offset) return error(msg, self.string, self.tell() - offset)
# The following three functions are not used in this module anymore, but we keep
# them here (with DeprecationWarnings) for backwards compatibility.
def isident(char):
import warnings
warnings.warn('sre_parse.isident() will be removed in 3.5',
DeprecationWarning, stacklevel=2)
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
def isdigit(char):
import warnings
warnings.warn('sre_parse.isdigit() will be removed in 3.5',
DeprecationWarning, stacklevel=2)
return "0" <= char <= "9"
def isname(name):
import warnings
warnings.warn('sre_parse.isname() will be removed in 3.5',
DeprecationWarning, stacklevel=2)
# check that group name is a valid string
if not isident(name[0]):
return False
for char in name[1:]:
if not isident(char) and not isdigit(char):
return False
return True
def _class_escape(source, escape): def _class_escape(source, escape):
# handle escape code inside character class # handle escape code inside character class
code = ESCAPES.get(escape) code = ESCAPES.get(escape)
...@@ -351,9 +324,7 @@ def _class_escape(source, escape): ...@@ -351,9 +324,7 @@ def _class_escape(source, escape):
raise ValueError raise ValueError
if len(escape) == 2: if len(escape) == 2:
if c in ASCIILETTERS: if c in ASCIILETTERS:
import warnings raise source.error('bad escape %s' % escape, len(escape))
warnings.warn('bad escape %s' % escape,
DeprecationWarning, stacklevel=8)
return LITERAL, ord(escape[1]) return LITERAL, ord(escape[1])
except ValueError: except ValueError:
pass pass
...@@ -418,9 +389,7 @@ def _escape(source, escape, state): ...@@ -418,9 +389,7 @@ def _escape(source, escape, state):
raise source.error("invalid group reference", len(escape)) raise source.error("invalid group reference", len(escape))
if len(escape) == 2: if len(escape) == 2:
if c in ASCIILETTERS: if c in ASCIILETTERS:
import warnings raise source.error("bad escape %s" % escape, len(escape))
warnings.warn('bad escape %s' % escape,
DeprecationWarning, stacklevel=8)
return LITERAL, ord(escape[1]) return LITERAL, ord(escape[1])
except ValueError: except ValueError:
pass pass
...@@ -798,10 +767,7 @@ def fix_flags(src, flags): ...@@ -798,10 +767,7 @@ def fix_flags(src, flags):
# Check and fix flags according to the type of pattern (str or bytes) # Check and fix flags according to the type of pattern (str or bytes)
if isinstance(src, str): if isinstance(src, str):
if flags & SRE_FLAG_LOCALE: if flags & SRE_FLAG_LOCALE:
import warnings raise ValueError("cannot use LOCALE flag with a str pattern")
warnings.warn("LOCALE flag with a str pattern is deprecated. "
"Will be an error in 3.6",
DeprecationWarning, stacklevel=6)
if not flags & SRE_FLAG_ASCII: if not flags & SRE_FLAG_ASCII:
flags |= SRE_FLAG_UNICODE flags |= SRE_FLAG_UNICODE
elif flags & SRE_FLAG_UNICODE: elif flags & SRE_FLAG_UNICODE:
...@@ -810,10 +776,7 @@ def fix_flags(src, flags): ...@@ -810,10 +776,7 @@ def fix_flags(src, flags):
if flags & SRE_FLAG_UNICODE: if flags & SRE_FLAG_UNICODE:
raise ValueError("cannot use UNICODE flag with a bytes pattern") raise ValueError("cannot use UNICODE flag with a bytes pattern")
if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII: if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
import warnings raise ValueError("ASCII and LOCALE flags are incompatible")
warnings.warn("ASCII and LOCALE flags are incompatible. "
"Will be an error in 3.6",
DeprecationWarning, stacklevel=6)
return flags return flags
def parse(str, flags=0, pattern=None): def parse(str, flags=0, pattern=None):
...@@ -914,9 +877,7 @@ def parse_template(source, pattern): ...@@ -914,9 +877,7 @@ def parse_template(source, pattern):
this = chr(ESCAPES[this][1]) this = chr(ESCAPES[this][1])
except KeyError: except KeyError:
if c in ASCIILETTERS: if c in ASCIILETTERS:
import warnings raise s.error('bad escape %s' % this, len(this))
warnings.warn('bad escape %s' % this,
DeprecationWarning, stacklevel=4)
lappend(this) lappend(this)
else: else:
lappend(this) lappend(this)
......
...@@ -124,7 +124,7 @@ class ReTests(unittest.TestCase): ...@@ -124,7 +124,7 @@ class ReTests(unittest.TestCase):
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8))) (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ': for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
with self.subTest(c): with self.subTest(c):
with self.assertWarns(DeprecationWarning): with self.assertRaises(re.error):
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c) self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest') self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
...@@ -633,14 +633,10 @@ class ReTests(unittest.TestCase): ...@@ -633,14 +633,10 @@ class ReTests(unittest.TestCase):
re.purge() # for warnings re.purge() # for warnings
for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY': for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
with self.subTest(c): with self.subTest(c):
with self.assertWarns(DeprecationWarning): self.assertRaises(re.error, re.compile, '\\%c' % c)
self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
self.assertIsNone(re.match('\\%c' % c, 'a'))
for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ': for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
with self.subTest(c): with self.subTest(c):
with self.assertWarns(DeprecationWarning): self.assertRaises(re.error, re.compile, '[\\%c]' % c)
self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
self.assertIsNone(re.match('[\\%c]' % c, 'a'))
def test_string_boundaries(self): def test_string_boundaries(self):
# See http://bugs.python.org/issue10713 # See http://bugs.python.org/issue10713
...@@ -993,10 +989,8 @@ class ReTests(unittest.TestCase): ...@@ -993,10 +989,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i]))) self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0")) self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z")) self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
with self.assertWarns(DeprecationWarning): self.assertRaises(re.error, re.compile, br"\u1234")
self.assertTrue(re.match(br"\u1234", b'u1234')) self.assertRaises(re.error, re.compile, br"\U00012345")
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(br"\U00012345", b'U00012345'))
self.assertTrue(re.match(br"\0", b"\000")) self.assertTrue(re.match(br"\0", b"\000"))
self.assertTrue(re.match(br"\08", b"\0008")) self.assertTrue(re.match(br"\08", b"\0008"))
self.assertTrue(re.match(br"\01", b"\001")) self.assertTrue(re.match(br"\01", b"\001"))
...@@ -1018,10 +1012,8 @@ class ReTests(unittest.TestCase): ...@@ -1018,10 +1012,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i]))) self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i]))) self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i]))) self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
with self.assertWarns(DeprecationWarning): self.assertRaises(re.error, re.compile, br"[\u1234]")
self.assertTrue(re.match(br"[\u1234]", b'u')) self.assertRaises(re.error, re.compile, br"[\U00012345]")
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(br"[\U00012345]", b'U'))
self.checkPatternError(br"[\567]", self.checkPatternError(br"[\567]",
r'octal escape value \567 outside of ' r'octal escape value \567 outside of '
r'range 0-0o377', 1) r'range 0-0o377', 1)
...@@ -1363,12 +1355,12 @@ class ReTests(unittest.TestCase): ...@@ -1363,12 +1355,12 @@ class ReTests(unittest.TestCase):
if bletter: if bletter:
self.assertIsNone(pat.match(bletter)) self.assertIsNone(pat.match(bletter))
# Incompatibilities # Incompatibilities
self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE) self.assertRaises(ValueError, re.compile, '', re.LOCALE)
self.assertWarns(DeprecationWarning, re.compile, '(?L)') self.assertRaises(ValueError, re.compile, '(?L)')
self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII) self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII)
self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII) self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII)
self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE) self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
self.assertWarns(DeprecationWarning, re.compile, b'(?aL)') self.assertRaises(ValueError, re.compile, b'(?aL)')
def test_bug_6509(self): def test_bug_6509(self):
# Replacement strings of both types must parse properly. # Replacement strings of both types must parse properly.
...@@ -1419,13 +1411,6 @@ class ReTests(unittest.TestCase): ...@@ -1419,13 +1411,6 @@ class ReTests(unittest.TestCase):
# Test behaviour when not given a string or pattern as parameter # Test behaviour when not given a string or pattern as parameter
self.assertRaises(TypeError, re.compile, 0) self.assertRaises(TypeError, re.compile, 0)
def test_bug_13899(self):
# Issue #13899: re pattern r"[\A]" should work like "A" but matches
# nothing. Ditto B and Z.
with self.assertWarns(DeprecationWarning):
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
['A', 'B', '\b', 'C', 'Z'])
@bigmemtest(size=_2G, memuse=1) @bigmemtest(size=_2G, memuse=1)
def test_large_search(self, size): def test_large_search(self, size):
# Issue #10182: indices were 32-bit-truncated. # Issue #10182: indices were 32-bit-truncated.
......
...@@ -38,6 +38,9 @@ Core and Builtins ...@@ -38,6 +38,9 @@ Core and Builtins
Library Library
------- -------
- Issue #27030: Unknown escapes consisting of ``'\'`` and ASCII letter in
regular expressions now are errors.
- Issue #27186: Add os.PathLike support to DirEntry (part of PEP 519). - Issue #27186: Add os.PathLike support to DirEntry (part of PEP 519).
Initial patch by Jelle Zijlstra. Initial patch by Jelle Zijlstra.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment