Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
9bd85b83
Commit
9bd85b83
authored
Jun 11, 2016
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #27030: Unknown escapes consisting of ``'\'`` and ASCII letter in
regular expressions now are errors.
parent
d35bf032
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
32 additions
and
84 deletions
+32
-84
Doc/library/re.rst
Doc/library/re.rst
+11
-12
Lib/sre_parse.py
Lib/sre_parse.py
+5
-44
Lib/test/test_re.py
Lib/test/test_re.py
+13
-28
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Doc/library/re.rst
View file @
9bd85b83
...
...
@@ -317,8 +317,9 @@ The special characters are:
The special sequences consist of ``'\'`` and a character from the list below.
If the ordinary character is not on the list, then the resulting RE will match
the second character. For example, ``\$`` matches the character ``'$'``.
If the ordinary character is not ASCII digit or ASCII letter, then the
resulting RE will match the second character. For example, ``\$`` matches the
character ``'$'``.
``\number``
Matches the contents of the group of the same number. Groups are numbered
...
...
@@ -438,9 +439,8 @@ three digits in length.
.. versionchanged:: 3.3
The ``'\u'`` and ``'\U'`` escape sequences have been added.
.. deprecated-removed:: 3.5 3.6
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
deprecation warning and will be forbidden in Python 3.6.
.. versionchanged:: 3.6
Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
.. seealso::
...
...
@@ -528,11 +528,11 @@ form.
current locale. The use of this flag is discouraged as the locale mechanism
is very unreliable, and it only handles one "culture" at a time anyway;
you should use Unicode matching instead, which is the default in Python 3
for Unicode (str) patterns. This flag
makes sense
only with bytes patterns.
for Unicode (str) patterns. This flag
can be used
only with bytes patterns.
..
deprecated-removed:: 3.5
3.6
Deprecated the use of :const:`re.LOCALE` with string patterns or
:const:`re.ASCII`.
..
versionchanged::
3.6
:const:`re.LOCALE` can be used only with bytes patterns and is
not compatible with
:const:`re.ASCII`.
.. data:: M
...
...
@@ -738,9 +738,8 @@ form.
.. versionchanged:: 3.5
Unmatched groups are replaced with an empty string.
.. deprecated-removed:: 3.5 3.6
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
deprecation warning and will be forbidden in Python 3.6.
.. versionchanged:: 3.6
Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
.. function:: subn(pattern, repl, string, count=0, flags=0)
...
...
Lib/sre_parse.py
View file @
9bd85b83
...
...
@@ -282,33 +282,6 @@ class Tokenizer:
def error(self, msg, offset=0):
return error(msg, self.string, self.tell() - offset)
# The following three functions are not used in this module anymore, but we keep
# them here (with DeprecationWarnings) for backwards compatibility.
def isident(char):
import warnings
warnings.warn('sre_parse.isident() will be removed in 3.5',
DeprecationWarning, stacklevel=2)
return "
a
" <= char <= "
z
" or "
A
" <= char <= "
Z
" or char == "
_
"
def isdigit(char):
import warnings
warnings.warn('sre_parse.isdigit() will be removed in 3.5',
DeprecationWarning, stacklevel=2)
return "
0
" <= char <= "
9
"
def isname(name):
import warnings
warnings.warn('sre_parse.isname() will be removed in 3.5',
DeprecationWarning, stacklevel=2)
# check that group name is a valid string
if not isident(name[0]):
return False
for char in name[1:]:
if not isident(char) and not isdigit(char):
return False
return True
def _class_escape(source, escape):
# handle escape code inside character class
code = ESCAPES.get(escape)
...
...
@@ -351,9 +324,7 @@ def _class_escape(source, escape):
raise ValueError
if len(escape) == 2:
if c in ASCIILETTERS:
import warnings
warnings.warn('bad escape %s' % escape,
DeprecationWarning, stacklevel=8)
raise source.error('bad escape %s' % escape, len(escape))
return LITERAL, ord(escape[1])
except ValueError:
pass
...
...
@@ -418,9 +389,7 @@ def _escape(source, escape, state):
raise source.error("
invalid
group
reference
", len(escape))
if len(escape) == 2:
if c in ASCIILETTERS:
import warnings
warnings.warn('bad escape %s' % escape,
DeprecationWarning, stacklevel=8)
raise source.error("
bad
escape
%
s
" % escape, len(escape))
return LITERAL, ord(escape[1])
except ValueError:
pass
...
...
@@ -798,10 +767,7 @@ def fix_flags(src, flags):
# Check and fix flags according to the type of pattern (str or bytes)
if
isinstance
(
src
,
str
):
if
flags
&
SRE_FLAG_LOCALE
:
import
warnings
warnings
.
warn
(
"LOCALE flag with a str pattern is deprecated. "
"Will be an error in 3.6"
,
DeprecationWarning
,
stacklevel
=
6
)
raise
ValueError
(
"cannot use LOCALE flag with a str pattern"
)
if
not
flags
&
SRE_FLAG_ASCII
:
flags
|=
SRE_FLAG_UNICODE
elif
flags
&
SRE_FLAG_UNICODE
:
...
...
@@ -810,10 +776,7 @@ def fix_flags(src, flags):
if
flags
&
SRE_FLAG_UNICODE
:
raise
ValueError
(
"cannot use UNICODE flag with a bytes pattern"
)
if
flags
&
SRE_FLAG_LOCALE
and
flags
&
SRE_FLAG_ASCII
:
import
warnings
warnings
.
warn
(
"ASCII and LOCALE flags are incompatible. "
"Will be an error in 3.6"
,
DeprecationWarning
,
stacklevel
=
6
)
raise
ValueError
(
"ASCII and LOCALE flags are incompatible"
)
return
flags
def
parse
(
str
,
flags
=
0
,
pattern
=
None
):
...
...
@@ -914,9 +877,7 @@ def parse_template(source, pattern):
this
=
chr
(
ESCAPES
[
this
][
1
])
except
KeyError
:
if
c
in
ASCIILETTERS
:
import
warnings
warnings
.
warn
(
'bad escape %s'
%
this
,
DeprecationWarning
,
stacklevel
=
4
)
raise
s
.
error
(
'bad escape %s'
%
this
,
len
(
this
))
lappend
(
this
)
else
:
lappend
(
this
)
...
...
Lib/test/test_re.py
View file @
9bd85b83
...
...
@@ -124,7 +124,7 @@ class ReTests(unittest.TestCase):
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
for c in '
cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
':
with self.subTest(c):
with self.assert
Warns(DeprecationWarning
):
with self.assert
Raises(re.error
):
self.assertEqual(re.sub('
a
', '
\\
' + c, '
a
'), '
\\
' + c)
self.assertEqual(re.sub('
^
\
s
*
', '
X
', '
test
'), '
Xtest
')
...
...
@@ -633,14 +633,10 @@ class ReTests(unittest.TestCase):
re.purge() # for warnings
for c in '
ceghijklmopqyzCEFGHIJKLMNOPQRTVXY
':
with self.subTest(c):
with self.assertWarns(DeprecationWarning):
self.assertEqual(re.fullmatch('
\\
%
c
' % c, c).group(), c)
self.assertIsNone(re.match('
\\
%
c
' % c, '
a
'))
self.assertRaises(re.error, re.compile, '
\\
%
c
' % c)
for c in '
ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ
':
with self.subTest(c):
with self.assertWarns(DeprecationWarning):
self.assertEqual(re.fullmatch('
[
\\
%
c
]
' % c, c).group(), c)
self.assertIsNone(re.match('
[
\\
%
c
]
' % c, '
a
'))
self.assertRaises(re.error, re.compile, '
[
\\
%
c
]
' % c)
def test_string_boundaries(self):
# See http://bugs.python.org/issue10713
...
...
@@ -993,10 +989,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match((r"
\
x
%
02
x
" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"
\
x
%
02
x0
" % i).encode(), bytes([i])+b"
0
"))
self.assertTrue(re.match((r"
\
x
%
02
xz
" % i).encode(), bytes([i])+b"
z
"))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(br"
\
u1234
", b'u1234'))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(br"
\
U00012345
", b'U00012345'))
self.assertRaises(re.error, re.compile, br"
\
u1234
")
self.assertRaises(re.error, re.compile, br"
\
U00012345
")
self.assertTrue(re.match(br"
\
0
", b"
\
000
"))
self.assertTrue(re.match(br"
\
08
", b"
\
0008
"))
self.assertTrue(re.match(br"
\
01
", b"
\
001
"))
...
...
@@ -1018,10 +1012,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match((r"
[
\
x
%
02
x
]
" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"
[
\
x
%
02
x0
]
" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"
[
\
x
%
02
xz
]
" % i).encode(), bytes([i])))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(br"
[
\
u1234
]
", b'u'))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(br"
[
\
U00012345
]
", b'U'))
self.assertRaises(re.error, re.compile, br"
[
\
u1234
]
")
self.assertRaises(re.error, re.compile, br"
[
\
U00012345
]
")
self.checkPatternError(br"
[
\
567
]
",
r'octal escape value
\
567
outside of '
r'range 0-0o377', 1)
...
...
@@ -1363,12 +1355,12 @@ class ReTests(unittest.TestCase):
if bletter:
self.assertIsNone(pat.match(bletter))
# Incompatibilities
self.assert
Warns(DeprecationWarning
, re.compile, '', re.LOCALE)
self.assert
Warns(DeprecationWarning
, re.compile, '(?L)')
self.assert
Warns(DeprecationWarning
, re.compile, b'', re.LOCALE | re.ASCII)
self.assert
Warns(DeprecationWarning
, re.compile, b'(?L)', re.ASCII)
self.assert
Warns(DeprecationWarning
, re.compile, b'(?a)', re.LOCALE)
self.assert
Warns(DeprecationWarning
, re.compile, b'(?aL)')
self.assert
Raises(ValueError
, re.compile, '', re.LOCALE)
self.assert
Raises(ValueError
, re.compile, '(?L)')
self.assert
Raises(ValueError
, re.compile, b'', re.LOCALE | re.ASCII)
self.assert
Raises(ValueError
, re.compile, b'(?L)', re.ASCII)
self.assert
Raises(ValueError
, re.compile, b'(?a)', re.LOCALE)
self.assert
Raises(ValueError
, re.compile, b'(?aL)')
def test_bug_6509(self):
# Replacement strings of both types must parse properly.
...
...
@@ -1419,13 +1411,6 @@ class ReTests(unittest.TestCase):
# Test behaviour when not given a string or pattern as parameter
self.assertRaises(TypeError, re.compile, 0)
def test_bug_13899(self):
# Issue #13899: re pattern r"
[
\
A
]
" should work like "
A
" but matches
# nothing. Ditto B and Z.
with self.assertWarns(DeprecationWarning):
self.assertEqual(re.findall(r'[
\
A
\
B
\
b
\
C
\
Z]', 'AB
\
b
CZ'),
['A', 'B', '
\
b
', 'C', 'Z'])
@bigmemtest(size=_2G, memuse=1)
def test_large_search(self, size):
# Issue #10182: indices were 32-bit-truncated.
...
...
Misc/NEWS
View file @
9bd85b83
...
...
@@ -38,6 +38,9 @@ Core and Builtins
Library
-------
- Issue #27030: Unknown escapes consisting of ``'
\
'`` and ASCII letter in
regular expressions now are errors.
- Issue #27186: Add os.PathLike support to DirEntry (part of PEP 519).
Initial patch by Jelle Zijlstra.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment