Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
7b3110b7
Commit
7b3110b7
authored
Feb 21, 2015
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issues #814253, #9179: Group references and conditional group references now
work in lookbehind assertions in regular expressions.
parent
0098ad02
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
92 additions
and
12 deletions
+92
-12
Doc/library/re.rst
Doc/library/re.rst
+3
-0
Lib/re.py
Lib/re.py
+3
-2
Lib/sre_parse.py
Lib/sre_parse.py
+42
-9
Lib/test/test_re.py
Lib/test/test_re.py
+41
-1
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Doc/library/re.rst
View file @
7b3110b7
...
...
@@ -297,6 +297,9 @@ The special characters are:
>>> m.group(0)
'egg'
.. versionchanged: 3.5
Added support for group references of fixed length.
``(?<!...)``
Matches if the current position in the string is not preceded by a match for
``...``. This is called a :dfn:`negative lookbehind assertion`. Similar to
...
...
Lib/re.py
View file @
7b3110b7
...
...
@@ -351,10 +351,11 @@ class Scanner:
s
=
sre_parse
.
Pattern
()
s
.
flags
=
flags
for
phrase
,
action
in
lexicon
:
gid
=
s
.
opengroup
()
p
.
append
(
sre_parse
.
SubPattern
(
s
,
[
(
SUBPATTERN
,
(
len
(
p
)
+
1
,
sre_parse
.
parse
(
phrase
,
flags
))),
(
SUBPATTERN
,
(
gid
,
sre_parse
.
parse
(
phrase
,
flags
))),
]))
s
.
groups
=
len
(
p
)
+
1
s
.
closegroup
(
gid
,
p
[
-
1
])
p
=
sre_parse
.
SubPattern
(
s
,
[(
BRANCH
,
(
None
,
p
))])
self
.
scanner
=
sre_compile
.
compile
(
p
)
def
scan
(
self
,
string
):
...
...
Lib/sre_parse.py
View file @
7b3110b7
...
...
@@ -68,12 +68,15 @@ class Pattern:
# master pattern object. keeps track of global attributes
def __init__(self):
self.flags = 0
self.open = []
self.groups = 1
self.groupdict = {}
self.subpatterns = [None] # group 0
self.lookbehindgroups = None
@property
def groups(self):
return len(self.subpatterns)
def opengroup(self, name=None):
gid = self.groups
self.
groups = gid + 1
self.
subpatterns.append(None)
if self.groups > MAXGROUPS:
raise error("
groups
number
is
too
large
")
if name is not None:
...
...
@@ -82,12 +85,19 @@ class Pattern:
raise error("
redefinition
of
group
name
%
r
as
group
%
d
;
"
"
was
group
%
d
" % (name, gid, ogid))
self.groupdict[name] = gid
self.open.append(gid)
return gid
def closegroup(self, gid):
self.
open.remove(gid)
def closegroup(self, gid
, p
):
self.
subpatterns[gid] = p
def checkgroup(self, gid):
return gid < self.groups and gid not in self.open
return gid < self.groups and self.subpatterns[gid] is not None
def checklookbehindgroup(self, gid, source):
if self.lookbehindgroups is not None:
if not self.checkgroup(gid):
raise source.error('cannot refer to an open group')
if gid >= self.lookbehindgroups:
raise source.error('cannot refer to group defined in the same '
'lookbehind subpattern')
class SubPattern:
# a subpattern, in intermediate form
...
...
@@ -183,7 +193,21 @@ class SubPattern:
elif op in _UNITCODES:
lo = lo + 1
hi = hi + 1
elif op == SUCCESS:
elif op is GROUPREF:
i, j = self.pattern.subpatterns[av].getwidth()
lo = lo + i
hi = hi + j
elif op is GROUPREF_EXISTS:
i, j = av[1].getwidth()
if av[2] is not None:
l, h = av[2].getwidth()
i = min(i, l)
j = max(j, h)
else:
i = 0
lo = lo + i
hi = hi + j
elif op is SUCCESS:
break
self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)
return self.width
...
...
@@ -379,6 +403,7 @@ def _escape(source, escape, state):
if not state.checkgroup(group):
raise source.error("
cannot
refer
to
open
group
",
len(escape))
state.checklookbehindgroup(group, source)
return GROUPREF, group
raise ValueError
if len(escape) == 2:
...
...
@@ -641,6 +666,7 @@ def _parse(source, state):
if
gid
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
raise
source
.
error
(
msg
,
len
(
name
)
+
1
)
state
.
checklookbehindgroup
(
gid
,
source
)
subpatternappend
((
GROUPREF
,
gid
))
continue
else
:
...
...
@@ -668,7 +694,13 @@ def _parse(source, state):
if
char
is
None
or
char
not
in
"=!"
:
raise
source
.
error
(
"syntax error"
)
dir
=
-
1
# lookbehind
lookbehindgroups
=
state
.
lookbehindgroups
if
lookbehindgroups
is
None
:
state
.
lookbehindgroups
=
state
.
groups
p
=
_parse_sub
(
source
,
state
)
if
dir
<
0
:
if
lookbehindgroups
is
None
:
state
.
lookbehindgroups
=
None
if
not
sourcematch
(
")"
):
raise
source
.
error
(
"unbalanced parenthesis"
)
if
char
==
"="
:
...
...
@@ -701,6 +733,7 @@ def _parse(source, state):
if
condgroup
>=
MAXGROUPS
:
raise
source
.
error
(
"the group number is too large"
,
len
(
condname
)
+
1
)
state
.
checklookbehindgroup
(
condgroup
,
source
)
elif
char
in
FLAGS
:
# flags
state
.
flags
|=
FLAGS
[
char
]
...
...
@@ -726,7 +759,7 @@ def _parse(source, state):
if
not
sourcematch
(
")"
):
raise
source
.
error
(
"unbalanced parenthesis"
)
if
group
is
not
None
:
state
.
closegroup
(
group
)
state
.
closegroup
(
group
,
p
)
subpatternappend
((
SUBPATTERN
,
(
group
,
p
)))
else
:
while
True
:
...
...
Lib/test/test_re.py
View file @
7b3110b7
...
...
@@ -604,7 +604,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match("
a
.
*
b", "
a
\
n
\
nb
", re.DOTALL).group(0),
"
a
\
n
\
nb
")
def test_
non_consuming
(self):
def test_
lookahead
(self):
self.assertEqual(re.match("
(
a
(
?
=
\
s
[
^
a
]))
", "
a
b").group(1), "
a
")
self.assertEqual(re.match("
(
a
(
?
=
\
s
[
^
a
]
*
))
", "
a
b").group(1), "
a
")
self.assertEqual(re.match("
(
a
(
?
=
\
s
[
abc
]))
", "
a
b").group(1), "
a
")
...
...
@@ -618,6 +618,46 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match(r"
(
a
)(
?!
\
s
\
1
)
", "
a
b").group(1), "
a
")
self.assertEqual(re.match(r"
(
a
)(
?!
\
s
(
abc
|
a
))
", "
a
b").group(1), "
a
")
# Group reference.
self.assertTrue(re.match(r'(a)b(?=
\
1
)a', 'aba'))
self.assertIsNone(re.match(r'(a)b(?=
\
1
)c', 'abac'))
# Conditional group reference.
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
# Group used before defined.
self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))
self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))
self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))
def test_lookbehind(self):
self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))
self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))
self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
# Group reference.
self.assertTrue(re.match(r'(a)a(?<=
\
1
)c', 'aac'))
self.assertIsNone(re.match(r'(a)b(?<=
\
1
)a', 'abaa'))
self.assertIsNone(re.match(r'(a)a(?<!
\
1
)c', 'aac'))
self.assertTrue(re.match(r'(a)b(?<!
\
1
)a', 'abaa'))
# Conditional group reference.
self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))
self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))
self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))
self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))
self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))
# Group used before defined.
self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc'))
self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc'))
# Group defined in the same lookbehind pattern
self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)
\
2
)(c)')
self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)')
self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=
\
2
))(c)')
def test_ignore_case(self):
self.assertEqual(re.match("
abc
", "
ABC
", re.I).group(0), "
ABC
")
self.assertEqual(re.match(b"
abc
", b"
ABC
", re.I).group(0), b"
ABC
")
...
...
Misc/NEWS
View file @
7b3110b7
...
...
@@ -13,6 +13,9 @@ Core and Builtins
Library
-------
- Issues #814253, #9179: Group references and conditional group references now
work in lookbehind assertions in regular expressions.
- Issue #23215: Multibyte codecs with custom error handlers that ignores errors
consumed too much memory and raised SystemError or MemoryError.
Original patch by Aleksi Torhamo.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment