Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
a01a2ee9
Commit
a01a2ee9
authored
Sep 03, 2004
by
Gustavo Niemeyer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Applying modified version of patch #1018386, which fixes
some escaping bugs in SRE.
parent
ab9351bf
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
91 additions
and
43 deletions
+91
-43
Doc/lib/libre.tex
Doc/lib/libre.tex
+2
-1
Lib/sre_parse.py
Lib/sre_parse.py
+36
-42
Lib/test/test_re.py
Lib/test/test_re.py
+53
-0
No files found.
Doc/lib/libre.tex
View file @
a01a2ee9
...
...
@@ -387,7 +387,8 @@ also accepted by the regular expression parser:
Octal escapes are included in a limited form: If the first digit is a
0, or if there are three octal digits, it is considered an octal
escape. Otherwise, it is a group reference.
escape. Otherwise, it is a group reference. As for string literals,
octal escapes are always at most three digits in length.
% Note the lack of a period in the section title; it causes problems
...
...
Lib/sre_parse.py
View file @
a01a2ee9
...
...
@@ -217,21 +217,11 @@ def isname(name):
# check that group name is a valid string
if not isident(name[0]):
return False
for char in name:
for char in name
[1:]
:
if not isident(char) and not isdigit(char):
return False
return True
def _group(escape, groups):
# check if the escape string represents a valid group
try:
gid = int(escape[1:])
if gid and gid < groups:
return gid
except ValueError:
pass
return None # not a valid group
def _class_escape(source, escape):
# handle escape code inside character class
code = ESCAPES.get(escape)
...
...
@@ -241,7 +231,8 @@ def _class_escape(source, escape):
if code:
return code
try:
if escape[1:2] == "
x
":
c = escape[1:2]
if c == "
x
":
# hexadecimal escape (exactly two digits)
while source.next in HEXDIGITS and len(escape) < 4:
escape = escape + source.get()
...
...
@@ -249,12 +240,14 @@ def _class_escape(source, escape):
if len(escape) != 2:
raise error, "
bogus
escape
:
%
s
" % repr("
\\
" + escape)
return LITERAL, int(escape, 16) & 0xff
elif
escape[1:2]
in OCTDIGITS:
elif
c
in OCTDIGITS:
# octal escape (up to three digits)
while source.next in OCTDIGITS and len(escape) <
5
:
while source.next in OCTDIGITS and len(escape) <
4
:
escape = escape + source.get()
escape = escape[1:]
return LITERAL, int(escape, 8) & 0xff
elif c in DIGITS:
raise error, "
bogus
escape
:
%
s
" % repr(escape)
if len(escape) == 2:
return LITERAL, ord(escape[1])
except ValueError:
...
...
@@ -270,19 +263,20 @@ def _escape(source, escape, state):
if code:
return code
try:
if escape[1:2] == "
x
":
c = escape[1:2]
if c == "
x
":
# hexadecimal escape
while source.next in HEXDIGITS and len(escape) < 4:
escape = escape + source.get()
if len(escape) != 4:
raise ValueError
return LITERAL, int(escape[2:], 16) & 0xff
elif
escape[1:2]
== "
0
":
elif
c
== "
0
":
# octal escape
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
return LITERAL, int(escape[1:], 8) & 0xff
elif
escape[1:2]
in DIGITS:
elif
c
in DIGITS:
# octal escape *or* decimal group reference (sigh)
if source.next in DIGITS:
escape = escape + source.get()
...
...
@@ -291,9 +285,9 @@ def _escape(source, escape, state):
# got three octal digits; this is an octal escape
escape = escape + source.get()
return LITERAL, int(escape[1:], 8) & 0xff
#
got at least one decimal digit;
this is a group reference
group =
_group(escape, state.groups
)
if group:
#
not an octal escape, so
this is a group reference
group =
int(escape[1:]
)
if group
< state.groups
:
if not state.checkgroup(group):
raise error, "
cannot
refer
to
open
group
"
return GROUPREF, group
...
...
@@ -709,7 +703,8 @@ def parse_template(source, pattern):
break
# end of replacement string
if
this
and
this
[
0
]
==
"
\
\
"
:
# group
if
this
==
"
\
\
g"
:
c
=
this
[
1
:
2
]
if
c
==
"g"
:
name
=
""
if
s
.
match
(
"<"
):
while
1
:
...
...
@@ -723,6 +718,8 @@ def parse_template(source, pattern):
raise
error
,
"bad group name"
try
:
index
=
int
(
name
)
if
index
<
0
:
raise
error
,
"negative group number"
except
ValueError
:
if
not
isname
(
name
):
raise
error
,
"bad character in group name"
...
...
@@ -731,26 +728,23 @@ def parse_template(source, pattern):
except
KeyError
:
raise
IndexError
,
"unknown group name"
a
((
MARK
,
index
))
elif
len
(
this
)
>
1
and
this
[
1
]
in
DIGITS
:
code
=
None
while
1
:
group
=
_group
(
this
,
pattern
.
groups
+
1
)
if
group
:
if
(
s
.
next
not
in
DIGITS
or
not
_group
(
this
+
s
.
next
,
pattern
.
groups
+
1
)):
code
=
MARK
,
group
break
elif
s
.
next
in
OCTDIGITS
:
elif
c
==
"0"
:
if
s
.
next
in
OCTDIGITS
:
this
=
this
+
sget
()
if
s
.
next
in
OCTDIGITS
:
this
=
this
+
sget
()
else
:
break
if
not
code
:
this
=
this
[
1
:]
code
=
LITERAL
,
makechar
(
int
(
this
[
-
6
:],
8
)
&
0xff
)
if
code
[
0
]
is
LITERAL
:
literal
(
code
[
1
])
else
:
a
(
code
)
literal
(
makechar
(
int
(
this
[
1
:],
8
)
&
0xff
))
elif
c
in
DIGITS
:
isoctal
=
False
if
s
.
next
in
DIGITS
:
this
=
this
+
sget
()
if
(
c
in
OCTDIGITS
and
s
.
next
in
OCTDIGITS
and
this
[
2
]
in
OCTDIGITS
):
this
=
this
+
sget
()
isoctal
=
True
literal
(
makechar
(
int
(
this
[
1
:],
8
)
&
0xff
))
if
not
isoctal
:
a
((
MARK
,
int
(
this
[
1
:])))
else
:
try
:
this
=
makechar
(
ESCAPES
[
this
][
1
])
...
...
@@ -782,7 +776,7 @@ def expand_template(template, match):
for
index
,
group
in
groups
:
literals
[
index
]
=
s
=
g
(
group
)
if
s
is
None
:
raise
IndexError
raise
error
,
"unmatched group"
except
IndexError
:
raise
error
,
"
empty group
"
raise
error
,
"
invalid group reference
"
return
sep
.
join
(
literals
)
Lib/test/test_re.py
View file @
a01a2ee9
...
...
@@ -83,6 +83,48 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub('
\
r
\
n
', '
\
n
', '
abc
\
r
\
ndef
\
r
\
n
'),
'
abc
\
ndef
\
n
')
def test_sub_template_numeric_escape(self):
# bug 776311 and friends
self.assertEqual(re.sub('
x
', r'
\
0
', '
x
'), '
\
0
')
self.assertEqual(re.sub('
x
', r'
\
000
', '
x
'), '
\
000
')
self.assertEqual(re.sub('
x
', r'
\
001
', '
x
'), '
\
001
')
self.assertEqual(re.sub('
x
', r'
\
008
', '
x
'), '
\
0
' + '
8
')
self.assertEqual(re.sub('
x
', r'
\
009
', '
x
'), '
\
0
' + '
9
')
self.assertEqual(re.sub('
x
', r'
\
111
', '
x
'), '
\
111
')
self.assertEqual(re.sub('
x
', r'
\
117
', '
x
'), '
\
117
')
self.assertEqual(re.sub('
x
', r'
\
1111
', '
x
'), '
\
1111
')
self.assertEqual(re.sub('
x
', r'
\
1111
', '
x
'), '
\
111
' + '
1
')
self.assertEqual(re.sub('
x
', r'
\
00
', '
x
'), '
\
x00
')
self.assertEqual(re.sub('
x
', r'
\
07
', '
x
'), '
\
x07
')
self.assertEqual(re.sub('
x
', r'
\
08
', '
x
'), '
\
0
' + '
8
')
self.assertEqual(re.sub('
x
', r'
\
09
', '
x
'), '
\
0
' + '
9
')
self.assertEqual(re.sub('
x
', r'
\
0
a
', '
x
'), '
\
0
' + '
a
')
self.assertEqual(re.sub('
x
', r'
\
400
', '
x
'), '
\
0
')
self.assertEqual(re.sub('
x
', r'
\
777
', '
x
'), '
\
377
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
1
', '
x
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
8
', '
x
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
9
', '
x
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
11
', '
x
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
18
', '
x
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
1
a
', '
x
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
90
', '
x
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
99
', '
x
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
118
', '
x
') # r'
\
11
' + '
8
'
self.assertRaises(re.error, re.sub, '
x
', r'
\
11
a
', '
x
')
self.assertRaises(re.error, re.sub, '
x
', r'
\
181
', '
x
') # r'
\
18
' + '
1
'
self.assertRaises(re.error, re.sub, '
x
', r'
\
800
', '
x
') # r'
\
80
' + '
0
'
# in python2.3 (etc), these loop endlessly in sre_parser.py
self.assertEqual(re.sub('
(((((((((((
x
)))))))))))
', r'
\
11
', '
x
'), '
x
')
self.assertEqual(re.sub('
((((((((((
y
))))))))))(.)
', r'
\
118
', '
xyz
'),
'
xz8
')
self.assertEqual(re.sub('
((((((((((
y
))))))))))(.)
', r'
\
11
a
', '
xyz
'),
'
xza
')
def test_qualified_re_sub(self):
self.assertEqual(re.sub('
a
', 'b', '
aaaaa
'), '
bbbbb
')
self.assertEqual(re.sub('
a
', 'b', '
aaaaa
', 1), '
baaaa
')
...
...
@@ -105,6 +147,7 @@ class ReTests(unittest.TestCase):
self.assertRaises(IndexError, re.sub, '
(
?
P
<
a
>
x
)
', '
\
g
<
ab
>
', '
xx
')
self.assertRaises(re.error, re.sub, '
(
?
P
<
a
>
x
)
|
(
?
P
<
b
>
y
)
', '
\
g
<
b
>
', '
xx
')
self.assertRaises(re.error, re.sub, '
(
?
P
<
a
>
x
)
|
(
?
P
<
b
>
y
)
', '
\\
2
', '
xx
')
self.assertRaises(re.error, re.sub, '
(
?
P
<
a
>
x
)
', '
\
g
<-
1
>
', '
xx
')
def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('
x
x
', 2))
...
...
@@ -386,6 +429,16 @@ class ReTests(unittest.TestCase):
self.assertNotEqual(re.match(r"
\
x%
02
xz" % i, chr(i)+"z"), None)
self.assertRaises(re.error, re.match, "
\
9
11
", "")
def test_sre_character_class_literals(self):
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
self.assertNotEqual(re.match(r"[
\
%
03
o]" % i, chr(i)), None)
self.assertNotEqual(re.match(r"[
\
%
03
o0]" % i, chr(i)), None)
self.assertNotEqual(re.match(r"[
\
%
03
o8]" % i, chr(i)), None)
self.assertNotEqual(re.match(r"[
\
x%
02
x]" % i, chr(i)), None)
self.assertNotEqual(re.match(r"[
\
x%
02
x0]" % i, chr(i)), None)
self.assertNotEqual(re.match(r"[
\
x%
02
xz]" % i, chr(i)), None)
self.assertRaises(re.error, re.match, "[
\
9
11
]", "")
def test_bug_113254(self):
self.assertEqual(re.match(r'
(
a
)
|
(
b
)
', 'b').start(1), -1)
self.assertEqual(re.match(r'
(
a
)
|
(
b
)
', 'b').end(1), -1)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment