Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
5908300e
Commit
5908300e
authored
Apr 13, 2017
by
Serhiy Storchaka
Committed by
GitHub
Apr 13, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-29995: re.escape() now escapes only special characters. (#1007)
parent
a6e395df
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
40 additions
and
51 deletions
+40
-51
Doc/library/re.rst
Doc/library/re.rst
+7
-3
Doc/tools/susp-ignored.csv
Doc/tools/susp-ignored.csv
+1
-1
Lib/idlelib/idle_test/test_replace.py
Lib/idlelib/idle_test/test_replace.py
+2
-2
Lib/re.py
Lib/re.py
+9
-27
Lib/test/test_re.py
Lib/test/test_re.py
+19
-18
Misc/NEWS
Misc/NEWS
+2
-0
No files found.
Doc/library/re.rst
View file @
5908300e
...
...
@@ -786,7 +786,7 @@ form.
.. function:: escape(pattern)
Escape
all the characters in *pattern* except ASCII letters, numbers and ``'_'``
.
Escape
special characters in *pattern*
.
This is useful if you want to match an arbitrary literal string that may
have regular expression metacharacters in it. For example::
...
...
@@ -795,15 +795,19 @@ form.
>>> legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:"
>>> print('[%s]+' % re.escape(legal_chars))
[abcdefghijklmnopqrstuvwxyz0123456789
\!\#\$\%\&\'\*\+\-\.\^_\`\|\~\
:]+
[abcdefghijklmnopqrstuvwxyz0123456789
!\#\$%&'\*\+\-\.\^_`\|~
:]+
>>> operators = ['+', '-', '*', '/', '**']
>>> print('|'.join(map(re.escape, sorted(operators, reverse=True))))
\
/|\-|\+|\*\*|\*
/|\-|\+|\*\*|\*
.. versionchanged:: 3.3
The ``'_'`` character is no longer escaped.
.. versionchanged:: 3.7
Only characters that can have special meaning in a regular expression
are escaped.
.. function:: purge()
...
...
Doc/tools/susp-ignored.csv
View file @
5908300e
...
...
@@ -303,7 +303,7 @@ whatsnew/3.2,,:gz,">>> with tarfile.open(name='myarchive.tar.gz', mode='w:gz') a
whatsnew/3.2,,:location,zope9-location = ${zope9:location}
whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf
library/re,,`,!#$%&'*+-.^_`|~:
library/re,,`,
\!\#\$\%\&\'\*\+\-\.\^_\`\|\~\
:
library/re,,`,
!\#\$%&'\*\+\-\.\^_`\|~
:
library/tarfile,,:xz,'x:xz'
library/xml.etree.elementtree,,:sometag,prefix:sometag
library/xml.etree.elementtree,,:fictional,"<actors xmlns:fictional=""http://characters.example.com"""
...
...
Lib/idlelib/idle_test/test_replace.py
View file @
5908300e
...
...
@@ -221,8 +221,8 @@ class ReplaceDialogTest(unittest.TestCase):
self.assertIn('
Invalid
Replace
Expression
', showerror.message)
# test access method
self.engine.setcookedpat("
\
'
")
equal(pv.get(), "
\
\
'
")
self.engine.setcookedpat("
?
")
equal(pv.get(), "
\
\
?
")
def test_replace_backwards(self):
equal = self.assertEqual
...
...
Lib/re.py
View file @
5908300e
...
...
@@ -241,39 +241,21 @@ def template(pattern, flags=0):
"Compile a template pattern, returning a pattern object"
return
_compile
(
pattern
,
flags
|
T
)
_alphanum_str
=
frozenset
(
"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890"
)
_alphanum_bytes
=
frozenset
(
b"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890"
)
# SPECIAL_CHARS
# closing ')', '}' and ']'
# '-' (a range in character set)
# '#' (comment) and WHITESPACE (ignored) in verbose mode
_special_chars_map
=
{
i
:
'
\
\
'
+
chr
(
i
)
for
i
in
b'()[]{}?*+-|^$
\
\
.#
\
t
\
n
\
r
\
v
\
f
'
}
def
escape
(
pattern
):
"""
Escape
all the characters in pattern except ASCII letters, numbers and '_'
.
Escape
special characters in a string
.
"""
if
isinstance
(
pattern
,
str
):
alphanum
=
_alphanum_str
s
=
list
(
pattern
)
for
i
,
c
in
enumerate
(
pattern
):
if
c
not
in
alphanum
:
if
c
==
"
\
000
"
:
s
[
i
]
=
"
\
\
000"
return
pattern
.
translate
(
_special_chars_map
)
else
:
s
[
i
]
=
"
\
\
"
+
c
return
""
.
join
(
s
)
else
:
alphanum
=
_alphanum_bytes
s
=
[]
esc
=
ord
(
b"
\
\
"
)
for
c
in
pattern
:
if
c
in
alphanum
:
s
.
append
(
c
)
else
:
if
c
==
0
:
s
.
extend
(
b"
\
\
000"
)
else
:
s
.
append
(
esc
)
s
.
append
(
c
)
return
bytes
(
s
)
pattern
=
str
(
pattern
,
'latin1'
)
return
pattern
.
translate
(
_special_chars_map
).
encode
(
'latin1'
)
# --------------------------------------------------------------------
# internals
...
...
Lib/test/test_re.py
View file @
5908300e
...
...
@@ -904,7 +904,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.search(r"
a
\
s
", "
a
").group(0), "
a
")
def assertMatch(self, pattern, text, match=None, span=None,
matcher=re.match):
matcher=re.
full
match):
if match is None and span is None:
# the pattern matches the whole text
match = text
...
...
@@ -917,37 +917,38 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(), match)
self.assertEqual(m.span(), span)
LITERAL_CHARS = string.ascii_letters + string.digits + '!"
%&
\
',/:;<=>@_`~'
def
test_re_escape
(
self
):
alnum_chars = string.ascii_letters + string.digits + '_'
p
=
''
.
join
(
chr
(
i
)
for
i
in
range
(
256
))
for
c
in
p
:
if c in alnum_chars:
self.assertEqual(re.escape(c), c)
elif c == '
\
x00
':
self.assertEqual(re.escape(c), '
\
\
000')
else:
self.assertEqual(re.escape(c), '
\
\
' + c)
self
.
assertMatch
(
re
.
escape
(
c
),
c
)
self
.
assertMatch
(
'['
+
re
.
escape
(
c
)
+
']'
,
c
)
self
.
assertMatch
(
'(?x)'
+
re
.
escape
(
c
),
c
)
self
.
assertMatch
(
re
.
escape
(
p
),
p
)
for
c
in
'-.]{}'
:
self
.
assertEqual
(
re
.
escape
(
c
)[:
1
],
'
\
\
'
)
literal_chars
=
self
.
LITERAL_CHARS
self
.
assertEqual
(
re
.
escape
(
literal_chars
),
literal_chars
)
def test_re_escape_byte(self):
alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
def
test_re_escape_bytes
(
self
):
p
=
bytes
(
range
(
256
))
for
i
in
p
:
b
=
bytes
([
i
])
if b in alnum_chars:
self.assertEqual(re.escape(b), b)
elif i == 0:
self.assertEqual(re.escape(b), b'
\
\
000')
else:
self.assertEqual(re.escape(b), b'
\
\
' + b)
self
.
assertMatch
(
re
.
escape
(
b
),
b
)
self
.
assertMatch
(
b'['
+
re
.
escape
(
b
)
+
b']'
,
b
)
self
.
assertMatch
(
b'(?x)'
+
re
.
escape
(
b
),
b
)
self
.
assertMatch
(
re
.
escape
(
p
),
p
)
for
i
in
b'-.]{}'
:
b
=
bytes
([
i
])
self
.
assertEqual
(
re
.
escape
(
b
)[:
1
],
b'
\
\
'
)
literal_chars
=
self
.
LITERAL_CHARS
.
encode
(
'ascii'
)
self
.
assertEqual
(
re
.
escape
(
literal_chars
),
literal_chars
)
def
test_re_escape_non_ascii
(
self
):
s
=
'xxx
\
u2620
\
u2620
\
u2620
xxx'
s_escaped
=
re
.
escape
(
s
)
self.assertEqual(s_escaped,
'xxx
\
\
\
u2620
\
\
\
u2620
\
\
\
u2620
xxx'
)
self
.
assertEqual
(
s_escaped
,
s
)
self
.
assertMatch
(
s_escaped
,
s
)
self
.
assertMatch
(
'.%s+.'
%
re
.
escape
(
'
\
u2620
'
),
s
,
'x
\
u2620
\
u2620
\
u2620
x'
,
(
2
,
7
),
re
.
search
)
...
...
@@ -955,7 +956,7 @@ class ReTests(unittest.TestCase):
def
test_re_escape_non_ascii_bytes
(
self
):
b
=
'y
\
u2620
y
\
u2620
y'
.
encode
(
'utf-8'
)
b_escaped
=
re
.
escape
(
b
)
self.assertEqual(b_escaped, b
'y
\
\
\
xe2
\
\
\
x98
\
\
\
xa0
y
\
\
\
xe2
\
\
\
x98
\
\
\
xa0
y'
)
self
.
assertEqual
(
b_escaped
,
b
)
self
.
assertMatch
(
b_escaped
,
b
)
res
=
re
.
findall
(
re
.
escape
(
'
\
u2620
'
.
encode
(
'utf-8'
)),
b
)
self
.
assertEqual
(
len
(
res
),
2
)
...
...
Misc/NEWS
View file @
5908300e
...
...
@@ -320,6 +320,8 @@ Library
- bpo-29998: Pickling and copying ImportError now preserves name and path
attributes.
- bpo-29995: re.escape() now escapes only regex special characters.
- bpo-29962: Add math.remainder operation, implementing remainder
as specified in IEEE 754.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment