Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
9baa5b2d
Commit
9baa5b2d
authored
Sep 29, 2014
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #22437: Number of capturing groups in regular expression is no longer
limited by 100.
parent
c31e6227
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
76 additions
and
27 deletions
+76
-27
Doc/whatsnew/3.5.rst
Doc/whatsnew/3.5.rst
+6
-0
Lib/sre_compile.py
Lib/sre_compile.py
+0
-6
Lib/sre_constants.py
Lib/sre_constants.py
+1
-1
Lib/sre_parse.py
Lib/sre_parse.py
+10
-0
Lib/test/test_re.py
Lib/test/test_re.py
+16
-2
Misc/NEWS
Misc/NEWS
+3
-0
Modules/_sre.c
Modules/_sre.c
+37
-14
Modules/sre.h
Modules/sre.h
+3
-4
No files found.
Doc/whatsnew/3.5.rst
View file @
9baa5b2d
...
...
@@ -217,6 +217,12 @@ os
* :class:`os.stat_result` now has a :attr:`~os.stat_result.st_file_attributes`
attribute on Windows (contributed by Ben Hoyt in :issue:`21719`).
re
--
* Number of capturing groups in regular expression is no longer limited by 100.
(Contributed by Serhiy Storchaka in :issue:`22437`.)
shutil
------
...
...
Lib/sre_compile.py
View file @
9baa5b2d
...
...
@@ -470,12 +470,6 @@ def compile(p, flags=0):
# print code
# XXX: <fl> get rid of this limitation!
if
p
.
pattern
.
groups
>
100
:
raise
AssertionError
(
"sorry, but this version only supports 100 named groups"
)
# map in either direction
groupindex
=
p
.
pattern
.
groupdict
indexgroup
=
[
None
]
*
p
.
pattern
.
groups
...
...
Lib/sre_constants.py
View file @
9baa5b2d
...
...
@@ -15,7 +15,7 @@
MAGIC
=
20031017
from
_sre
import
MAXREPEAT
from
_sre
import
MAXREPEAT
,
MAXGROUPS
# SRE standard exception (access as sre.error)
# should this really be here?
...
...
Lib/sre_parse.py
View file @
9baa5b2d
...
...
@@ -72,6 +72,8 @@ class Pattern:
def opengroup(self, name=None):
gid = self.groups
self.groups = gid + 1
if self.groups > MAXGROUPS:
raise error("
groups
number
is
too
large
")
if name is not None:
ogid = self.groupdict.get(name, None)
if ogid is not None:
...
...
@@ -695,8 +697,14 @@ def _parse(source, state):
else
:
try
:
condgroup
=
int
(
condname
)
if
condgroup
<
0
:
raise
ValueError
except
ValueError
:
raise
error
(
"bad character in group name"
)
if
not
condgroup
:
raise
error
(
"bad group number"
)
if
condgroup
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
else
:
# flags
if
not
source
.
next
in
FLAGS
:
...
...
@@ -822,6 +830,8 @@ def parse_template(source, pattern):
index
=
int
(
name
)
if
index
<
0
:
raise
error
(
"negative group number"
)
if
index
>=
MAXGROUPS
:
raise
error
(
"the group number is too large"
)
except
ValueError
:
if
not
name
.
isidentifier
():
raise
error
(
"bad character in group name"
)
...
...
Lib/test/test_re.py
View file @
9baa5b2d
...
...
@@ -193,6 +193,7 @@ class ReTests(unittest.TestCase):
def test_symbolic_groups(self):
re.compile('
(
?
P
<
a
>
x
)(
?
P
=
a
)(
?
(
a
)
y
)
')
re.compile('
(
?
P
<
a1
>
x
)(
?
P
=
a1
)(
?
(
a1
)
y
)
')
re.compile('
(
?
P
<
a1
>
x
)
\
1
(
?
(
1
)
y
)
')
self.assertRaises(re.error, re.compile, '
(
?
P
<
a
>
)(
?
P
<
a
>
)
')
self.assertRaises(re.error, re.compile, '
(
?
Px
)
')
self.assertRaises(re.error, re.compile, '
(
?
P
=
)
')
...
...
@@ -212,6 +213,10 @@ class ReTests(unittest.TestCase):
re.compile('
(
?
P
<
µ
>
x
)(
?
P
=
µ
)(
?
(
µ
)
y
)
')
re.compile('
(
?
P
<
𝔘𝔫𝔦𝔠𝔬𝔡𝔢
>
x
)(
?
P
=
𝔘𝔫𝔦𝔠𝔬𝔡𝔢
)(
?
(
𝔘𝔫𝔦𝔠𝔬𝔡𝔢
)
y
)
')
self.assertRaises(re.error, re.compile, '
(
?
P
<
©
>
x
)
')
# Support > 100 groups.
pat = '
|
'.join('
x
(
?
P
<
a
%
d
>%
x
)
y
' % (i, i) for i in range(1, 200 + 1))
pat = '
(
?
:
%
s
)(
?
(
200
)
z
|
t
)
' % pat
self.assertEqual(re.match(pat, '
xc8yz
').span(), (0, 5))
def test_symbolic_refs(self):
self.assertRaises(re.error, re.sub, '
(
?
P
<
a
>
x
)
', '
\
g
<
a
', '
xx
')
...
...
@@ -228,6 +233,9 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub('
(
?
P
<
µ
>
x
)
', r'
\
g
<
µ
>
', '
xx
'), '
xx
')
self.assertEqual(re.sub('
(
?
P
<
𝔘𝔫𝔦𝔠𝔬𝔡𝔢
>
x
)
', r'
\
g
<
𝔘𝔫𝔦𝔠𝔬𝔡𝔢
>
', '
xx
'), '
xx
')
self.assertRaises(re.error, re.sub, '
(
?
P
<
a
>
x
)
', r'
\
g
<
©
>
', '
xx
')
# Support > 100 groups.
pat = '
|
'.join('
x
(
?
P
<
a
%
d
>%
x
)
y
' % (i, i) for i in range(1, 200 + 1))
self.assertEqual(re.sub(pat, '
\
g
<
200
>
', '
xc8yzxc8y
'), '
c8zc8
')
def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('
x
x
', 2))
...
...
@@ -404,6 +412,10 @@ class ReTests(unittest.TestCase):
self.assertIsNone(p.match('
abd
'))
self.assertIsNone(p.match('
ac
'))
# Support > 100 groups.
pat = '
|
'.join('
x
(
?
P
<
a
%
d
>%
x
)
y
' % (i, i) for i in range(1, 200 + 1))
pat = '
(
?
:
%
s
)(
?
(
200
)
z
)
' % pat
self.assertEqual(re.match(pat, '
xc8yz
').span(), (0, 5))
def test_re_groupref(self):
self.assertEqual(re.match(r'
^
(
\
|
)
?
([
^
()]
+
)
\
1
$
', '
|
a
|
').groups(),
...
...
@@ -1070,8 +1082,10 @@ class ReTests(unittest.TestCase):
# a RuntimeError is raised instead of OverflowError.
long_overflow = 2**128
self.assertRaises(TypeError, re.finditer, "
a
", {})
self.assertRaises(OverflowError, _sre.compile, "
abc
", 0, [long_overflow])
self.assertRaises(TypeError, _sre.compile, {}, 0, [])
with self.assertRaises(OverflowError):
_sre.compile("
abc
", 0, [long_overflow], 0, [], [])
with self.assertRaises(TypeError):
_sre.compile({}, 0, [], 0, [], [])
def test_search_dot_unicode(self):
self.assertTrue(re.search("
123.
*-
", '123abc-'))
...
...
Misc/NEWS
View file @
9baa5b2d
...
...
@@ -145,6 +145,9 @@ Core and Builtins
Library
-------
-
Issue
#
22437
:
Number
of
capturing
groups
in
regular
expression
is
no
longer
limited
by
100.
-
Issue
#
17442
:
InteractiveInterpreter
now
displays
the
full
chained
traceback
in
its
showtraceback
method
,
to
match
the
built
in
interactive
interpreter
.
...
...
Modules/_sre.c
View file @
9baa5b2d
...
...
@@ -357,6 +357,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
memset
(
state
,
0
,
sizeof
(
SRE_STATE
));
state
->
mark
=
PyMem_New
(
void
*
,
pattern
->
groups
*
2
);
if
(
!
state
->
mark
)
{
PyErr_NoMemory
();
goto
err
;
}
state
->
lastmark
=
-
1
;
state
->
lastindex
=
-
1
;
...
...
@@ -409,6 +414,8 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
return
string
;
err:
PyMem_Del
(
state
->
mark
);
state
->
mark
=
NULL
;
if
(
state
->
buffer
.
buf
)
PyBuffer_Release
(
&
state
->
buffer
);
return
NULL
;
...
...
@@ -421,6 +428,8 @@ state_fini(SRE_STATE* state)
PyBuffer_Release
(
&
state
->
buffer
);
Py_XDECREF
(
state
->
string
);
data_stack_dealloc
(
state
);
PyMem_Del
(
state
->
mark
);
state
->
mark
=
NULL
;
}
/* calculate offset from start of string */
...
...
@@ -560,6 +569,7 @@ pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
PyObject
*
pattern
=
NULL
;
SRE_STATE
state
;
Py_ssize_t
status
;
PyObject
*
match
;
if
(
!
PyArg_ParseTupleAndKeywords
(
args
,
kwargs
,
"|Onn$O:match"
,
_keywords
,
...
...
@@ -579,12 +589,14 @@ pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
status
=
sre_match
(
&
state
,
PatternObject_GetCode
(
self
),
0
);
TRACE
((
"|%p|%p|END
\n
"
,
PatternObject_GetCode
(
self
),
state
.
ptr
));
if
(
PyErr_Occurred
())
if
(
PyErr_Occurred
())
{
state_fini
(
&
state
);
return
NULL
;
}
match
=
pattern_new_match
(
self
,
&
state
,
status
);
state_fini
(
&
state
);
return
(
PyObject
*
)
pattern_new_match
(
self
,
&
state
,
status
);
return
match
;
}
static
PyObject
*
...
...
@@ -592,6 +604,7 @@ pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE
state
;
Py_ssize_t
status
;
PyObject
*
match
;
PyObject
*
string
=
NULL
,
*
string2
=
NULL
;
Py_ssize_t
start
=
0
;
...
...
@@ -616,12 +629,14 @@ pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
status
=
sre_match
(
&
state
,
PatternObject_GetCode
(
self
),
1
);
TRACE
((
"|%p|%p|END
\n
"
,
PatternObject_GetCode
(
self
),
state
.
ptr
));
if
(
PyErr_Occurred
())
if
(
PyErr_Occurred
())
{
state_fini
(
&
state
);
return
NULL
;
}
match
=
pattern_new_match
(
self
,
&
state
,
status
);
state_fini
(
&
state
);
return
pattern_new_match
(
self
,
&
state
,
status
);
return
match
;
}
static
PyObject
*
...
...
@@ -629,6 +644,7 @@ pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE
state
;
Py_ssize_t
status
;
PyObject
*
match
;
PyObject
*
string
=
NULL
,
*
string2
=
NULL
;
Py_ssize_t
start
=
0
;
...
...
@@ -652,12 +668,14 @@ pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
TRACE
((
"|%p|%p|END
\n
"
,
PatternObject_GetCode
(
self
),
state
.
ptr
));
state_fini
(
&
state
);
if
(
PyErr_Occurred
())
if
(
PyErr_Occurred
())
{
state_fini
(
&
state
);
return
NULL
;
}
return
pattern_new_match
(
self
,
&
state
,
status
);
match
=
pattern_new_match
(
self
,
&
state
,
status
);
state_fini
(
&
state
);
return
match
;
}
static
PyObject
*
...
...
@@ -1417,7 +1435,7 @@ _compile(PyObject* self_, PyObject* args)
PyObject
*
groupindex
=
NULL
;
PyObject
*
indexgroup
=
NULL
;
if
(
!
PyArg_ParseTuple
(
args
,
"OiO!
|
nOO"
,
&
pattern
,
&
flags
,
if
(
!
PyArg_ParseTuple
(
args
,
"OiO!nOO"
,
&
pattern
,
&
flags
,
&
PyList_Type
,
&
code
,
&
groups
,
&
groupindex
,
&
indexgroup
))
return
NULL
;
...
...
@@ -1933,10 +1951,9 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
static
int
_validate_outer
(
SRE_CODE
*
code
,
SRE_CODE
*
end
,
Py_ssize_t
groups
)
{
if
(
groups
<
0
||
groups
>
100
||
code
>=
end
||
end
[
-
1
]
!=
SRE_OP_SUCCESS
)
if
(
groups
<
0
||
(
size_t
)
groups
>
SRE_MAXGROUPS
||
code
>=
end
||
end
[
-
1
]
!=
SRE_OP_SUCCESS
)
FAIL
;
if
(
groups
==
0
)
/* fix for simplejson */
groups
=
100
;
/* 100 groups should always be safe */
return
_validate_inner
(
code
,
end
-
1
,
groups
);
}
...
...
@@ -2747,6 +2764,12 @@ PyMODINIT_FUNC PyInit__sre(void)
Py_DECREF
(
x
);
}
x
=
PyLong_FromUnsignedLong
(
SRE_MAXGROUPS
);
if
(
x
)
{
PyDict_SetItemString
(
d
,
"MAXGROUPS"
,
x
);
Py_DECREF
(
x
);
}
x
=
PyUnicode_FromString
(
copyright
);
if
(
x
)
{
PyDict_SetItemString
(
d
,
"copyright"
,
x
);
...
...
Modules/sre.h
View file @
9baa5b2d
...
...
@@ -18,8 +18,10 @@
#define SRE_CODE Py_UCS4
#if SIZEOF_SIZE_T > 4
# define SRE_MAXREPEAT (~(SRE_CODE)0)
# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2)
#else
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2)
#endif
typedef
struct
{
...
...
@@ -52,9 +54,6 @@ typedef struct {
typedef
unsigned
int
(
*
SRE_TOLOWER_HOOK
)(
unsigned
int
ch
);
/* FIXME: <fl> shouldn't be a constant, really... */
#define SRE_MARK_SIZE 200
typedef
struct
SRE_REPEAT_T
{
Py_ssize_t
count
;
SRE_CODE
*
pattern
;
/* points to REPEAT operator arguments */
...
...
@@ -76,7 +75,7 @@ typedef struct {
/* registers */
Py_ssize_t
lastindex
;
Py_ssize_t
lastmark
;
void
*
mark
[
SRE_MARK_SIZE
]
;
void
*
*
mark
;
/* dynamically allocated stuff */
char
*
data_stack
;
size_t
data_stack_size
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment