Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
5908300e
Commit
5908300e
authored
Apr 13, 2017
by
Serhiy Storchaka
Committed by
GitHub
Apr 13, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-29995: re.escape() now escapes only special characters. (#1007)
parent
a6e395df
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
40 additions
and
51 deletions
+40
-51
Doc/library/re.rst
Doc/library/re.rst
+7
-3
Doc/tools/susp-ignored.csv
Doc/tools/susp-ignored.csv
+1
-1
Lib/idlelib/idle_test/test_replace.py
Lib/idlelib/idle_test/test_replace.py
+2
-2
Lib/re.py
Lib/re.py
+9
-27
Lib/test/test_re.py
Lib/test/test_re.py
+19
-18
Misc/NEWS
Misc/NEWS
+2
-0
No files found.
Doc/library/re.rst
View file @
5908300e
...
@@ -786,7 +786,7 @@ form.
...
@@ -786,7 +786,7 @@ form.
.. function:: escape(pattern)
.. function:: escape(pattern)
Escape
all the characters in *pattern* except ASCII letters, numbers and ``'_'``
.
Escape
special characters in *pattern*
.
This is useful if you want to match an arbitrary literal string that may
This is useful if you want to match an arbitrary literal string that may
have regular expression metacharacters in it. For example::
have regular expression metacharacters in it. For example::
...
@@ -795,15 +795,19 @@ form.
...
@@ -795,15 +795,19 @@ form.
>>> legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:"
>>> legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:"
>>> print('[%s]+' % re.escape(legal_chars))
>>> print('[%s]+' % re.escape(legal_chars))
[abcdefghijklmnopqrstuvwxyz0123456789
\!\#\$\%\&\'\*\+\-\.\^_\`\|\~\
:]+
[abcdefghijklmnopqrstuvwxyz0123456789
!\#\$%&'\*\+\-\.\^_`\|~
:]+
>>> operators = ['+', '-', '*', '/', '**']
>>> operators = ['+', '-', '*', '/', '**']
>>> print('|'.join(map(re.escape, sorted(operators, reverse=True))))
>>> print('|'.join(map(re.escape, sorted(operators, reverse=True))))
\
/|\-|\+|\*\*|\*
/|\-|\+|\*\*|\*
.. versionchanged:: 3.3
.. versionchanged:: 3.3
The ``'_'`` character is no longer escaped.
The ``'_'`` character is no longer escaped.
.. versionchanged:: 3.7
Only characters that can have special meaning in a regular expression
are escaped.
.. function:: purge()
.. function:: purge()
...
...
Doc/tools/susp-ignored.csv
View file @
5908300e
...
@@ -303,7 +303,7 @@ whatsnew/3.2,,:gz,">>> with tarfile.open(name='myarchive.tar.gz', mode='w:gz') a
...
@@ -303,7 +303,7 @@ whatsnew/3.2,,:gz,">>> with tarfile.open(name='myarchive.tar.gz', mode='w:gz') a
whatsnew/3.2,,:location,zope9-location = ${zope9:location}
whatsnew/3.2,,:location,zope9-location = ${zope9:location}
whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf
whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf
library/re,,`,!#$%&'*+-.^_`|~:
library/re,,`,!#$%&'*+-.^_`|~:
library/re,,`,
\!\#\$\%\&\'\*\+\-\.\^_\`\|\~\
:
library/re,,`,
!\#\$%&'\*\+\-\.\^_`\|~
:
library/tarfile,,:xz,'x:xz'
library/tarfile,,:xz,'x:xz'
library/xml.etree.elementtree,,:sometag,prefix:sometag
library/xml.etree.elementtree,,:sometag,prefix:sometag
library/xml.etree.elementtree,,:fictional,"<actors xmlns:fictional=""http://characters.example.com"""
library/xml.etree.elementtree,,:fictional,"<actors xmlns:fictional=""http://characters.example.com"""
...
...
Lib/idlelib/idle_test/test_replace.py
View file @
5908300e
...
@@ -221,8 +221,8 @@ class ReplaceDialogTest(unittest.TestCase):
...
@@ -221,8 +221,8 @@ class ReplaceDialogTest(unittest.TestCase):
self.assertIn('
Invalid
Replace
Expression
', showerror.message)
self.assertIn('
Invalid
Replace
Expression
', showerror.message)
# test access method
# test access method
self.engine.setcookedpat("
\
'
")
self.engine.setcookedpat("
?
")
equal(pv.get(), "
\
\
'
")
equal(pv.get(), "
\
\
?
")
def test_replace_backwards(self):
def test_replace_backwards(self):
equal = self.assertEqual
equal = self.assertEqual
...
...
Lib/re.py
View file @
5908300e
...
@@ -241,39 +241,21 @@ def template(pattern, flags=0):
...
@@ -241,39 +241,21 @@ def template(pattern, flags=0):
"Compile a template pattern, returning a pattern object"
"Compile a template pattern, returning a pattern object"
return
_compile
(
pattern
,
flags
|
T
)
return
_compile
(
pattern
,
flags
|
T
)
_alphanum_str
=
frozenset
(
# SPECIAL_CHARS
"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890"
)
# closing ')', '}' and ']'
_alphanum_bytes
=
frozenset
(
# '-' (a range in character set)
b"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890"
)
# '#' (comment) and WHITESPACE (ignored) in verbose mode
_special_chars_map
=
{
i
:
'
\
\
'
+
chr
(
i
)
for
i
in
b'()[]{}?*+-|^$
\
\
.#
\
t
\
n
\
r
\
v
\
f
'
}
def
escape
(
pattern
):
def
escape
(
pattern
):
"""
"""
Escape
all the characters in pattern except ASCII letters, numbers and '_'
.
Escape
special characters in a string
.
"""
"""
if
isinstance
(
pattern
,
str
):
if
isinstance
(
pattern
,
str
):
alphanum
=
_alphanum_str
return
pattern
.
translate
(
_special_chars_map
)
s
=
list
(
pattern
)
for
i
,
c
in
enumerate
(
pattern
):
if
c
not
in
alphanum
:
if
c
==
"
\
000
"
:
s
[
i
]
=
"
\
\
000"
else
:
else
:
s
[
i
]
=
"
\
\
"
+
c
pattern
=
str
(
pattern
,
'latin1'
)
return
""
.
join
(
s
)
return
pattern
.
translate
(
_special_chars_map
).
encode
(
'latin1'
)
else
:
alphanum
=
_alphanum_bytes
s
=
[]
esc
=
ord
(
b"
\
\
"
)
for
c
in
pattern
:
if
c
in
alphanum
:
s
.
append
(
c
)
else
:
if
c
==
0
:
s
.
extend
(
b"
\
\
000"
)
else
:
s
.
append
(
esc
)
s
.
append
(
c
)
return
bytes
(
s
)
# --------------------------------------------------------------------
# --------------------------------------------------------------------
# internals
# internals
...
...
Lib/test/test_re.py
View file @
5908300e
...
@@ -904,7 +904,7 @@ class ReTests(unittest.TestCase):
...
@@ -904,7 +904,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.search(r"
a
\
s
", "
a
").group(0), "
a
")
self.assertEqual(re.search(r"
a
\
s
", "
a
").group(0), "
a
")
def assertMatch(self, pattern, text, match=None, span=None,
def assertMatch(self, pattern, text, match=None, span=None,
matcher=re.match):
matcher=re.
full
match):
if match is None and span is None:
if match is None and span is None:
# the pattern matches the whole text
# the pattern matches the whole text
match = text
match = text
...
@@ -917,37 +917,38 @@ class ReTests(unittest.TestCase):
...
@@ -917,37 +917,38 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(), match)
self.assertEqual(m.group(), match)
self.assertEqual(m.span(), span)
self.assertEqual(m.span(), span)
LITERAL_CHARS = string.ascii_letters + string.digits + '!"
%&
\
',/:;<=>@_`~'
def
test_re_escape
(
self
):
def
test_re_escape
(
self
):
alnum_chars = string.ascii_letters + string.digits + '_'
p
=
''
.
join
(
chr
(
i
)
for
i
in
range
(
256
))
p
=
''
.
join
(
chr
(
i
)
for
i
in
range
(
256
))
for
c
in
p
:
for
c
in
p
:
if c in alnum_chars:
self.assertEqual(re.escape(c), c)
elif c == '
\
x00
':
self.assertEqual(re.escape(c), '
\
\
000')
else:
self.assertEqual(re.escape(c), '
\
\
' + c)
self
.
assertMatch
(
re
.
escape
(
c
),
c
)
self
.
assertMatch
(
re
.
escape
(
c
),
c
)
self
.
assertMatch
(
'['
+
re
.
escape
(
c
)
+
']'
,
c
)
self
.
assertMatch
(
'(?x)'
+
re
.
escape
(
c
),
c
)
self
.
assertMatch
(
re
.
escape
(
p
),
p
)
self
.
assertMatch
(
re
.
escape
(
p
),
p
)
for
c
in
'-.]{}'
:
self
.
assertEqual
(
re
.
escape
(
c
)[:
1
],
'
\
\
'
)
literal_chars
=
self
.
LITERAL_CHARS
self
.
assertEqual
(
re
.
escape
(
literal_chars
),
literal_chars
)
def test_re_escape_byte(self):
def
test_re_escape_bytes
(
self
):
alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
p
=
bytes
(
range
(
256
))
p
=
bytes
(
range
(
256
))
for
i
in
p
:
for
i
in
p
:
b
=
bytes
([
i
])
b
=
bytes
([
i
])
if b in alnum_chars:
self.assertEqual(re.escape(b), b)
elif i == 0:
self.assertEqual(re.escape(b), b'
\
\
000')
else:
self.assertEqual(re.escape(b), b'
\
\
' + b)
self
.
assertMatch
(
re
.
escape
(
b
),
b
)
self
.
assertMatch
(
re
.
escape
(
b
),
b
)
self
.
assertMatch
(
b'['
+
re
.
escape
(
b
)
+
b']'
,
b
)
self
.
assertMatch
(
b'(?x)'
+
re
.
escape
(
b
),
b
)
self
.
assertMatch
(
re
.
escape
(
p
),
p
)
self
.
assertMatch
(
re
.
escape
(
p
),
p
)
for
i
in
b'-.]{}'
:
b
=
bytes
([
i
])
self
.
assertEqual
(
re
.
escape
(
b
)[:
1
],
b'
\
\
'
)
literal_chars
=
self
.
LITERAL_CHARS
.
encode
(
'ascii'
)
self
.
assertEqual
(
re
.
escape
(
literal_chars
),
literal_chars
)
def
test_re_escape_non_ascii
(
self
):
def
test_re_escape_non_ascii
(
self
):
s
=
'xxx
\
u2620
\
u2620
\
u2620
xxx'
s
=
'xxx
\
u2620
\
u2620
\
u2620
xxx'
s_escaped
=
re
.
escape
(
s
)
s_escaped
=
re
.
escape
(
s
)
self.assertEqual(s_escaped,
'xxx
\
\
\
u2620
\
\
\
u2620
\
\
\
u2620
xxx'
)
self
.
assertEqual
(
s_escaped
,
s
)
self
.
assertMatch
(
s_escaped
,
s
)
self
.
assertMatch
(
s_escaped
,
s
)
self
.
assertMatch
(
'.%s+.'
%
re
.
escape
(
'
\
u2620
'
),
s
,
self
.
assertMatch
(
'.%s+.'
%
re
.
escape
(
'
\
u2620
'
),
s
,
'x
\
u2620
\
u2620
\
u2620
x'
,
(
2
,
7
),
re
.
search
)
'x
\
u2620
\
u2620
\
u2620
x'
,
(
2
,
7
),
re
.
search
)
...
@@ -955,7 +956,7 @@ class ReTests(unittest.TestCase):
...
@@ -955,7 +956,7 @@ class ReTests(unittest.TestCase):
def
test_re_escape_non_ascii_bytes
(
self
):
def
test_re_escape_non_ascii_bytes
(
self
):
b
=
'y
\
u2620
y
\
u2620
y'
.
encode
(
'utf-8'
)
b
=
'y
\
u2620
y
\
u2620
y'
.
encode
(
'utf-8'
)
b_escaped
=
re
.
escape
(
b
)
b_escaped
=
re
.
escape
(
b
)
self.assertEqual(b_escaped, b
'y
\
\
\
xe2
\
\
\
x98
\
\
\
xa0
y
\
\
\
xe2
\
\
\
x98
\
\
\
xa0
y'
)
self
.
assertEqual
(
b_escaped
,
b
)
self
.
assertMatch
(
b_escaped
,
b
)
self
.
assertMatch
(
b_escaped
,
b
)
res
=
re
.
findall
(
re
.
escape
(
'
\
u2620
'
.
encode
(
'utf-8'
)),
b
)
res
=
re
.
findall
(
re
.
escape
(
'
\
u2620
'
.
encode
(
'utf-8'
)),
b
)
self
.
assertEqual
(
len
(
res
),
2
)
self
.
assertEqual
(
len
(
res
),
2
)
...
...
Misc/NEWS
View file @
5908300e
...
@@ -320,6 +320,8 @@ Library
...
@@ -320,6 +320,8 @@ Library
- bpo-29998: Pickling and copying ImportError now preserves name and path
- bpo-29998: Pickling and copying ImportError now preserves name and path
attributes.
attributes.
- bpo-29995: re.escape() now escapes only regex special characters.
- bpo-29962: Add math.remainder operation, implementing remainder
- bpo-29962: Add math.remainder operation, implementing remainder
as specified in IEEE 754.
as specified in IEEE 754.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment