Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Kirill Smelkov
cpython
Commits
9eabac68
Commit
9eabac68
authored
11 years ago
by
Serhiy Storchaka
Browse files
Options
Download
Email Patches
Plain Diff
Issue #18685: Restore re performance to pre-PEP 393 levels.
parent
ffb58e96
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
350 additions
and
384 deletions
+350
-384
Lib/test/test_re.py
Lib/test/test_re.py
+23
-0
Misc/NEWS
Misc/NEWS
+2
-0
Modules/_sre.c
Modules/_sre.c
+322
-378
Modules/sre.h
Modules/sre.h
+3
-6
No files found.
Lib/test/test_re.py
View file @
9eabac68
...
...
@@ -77,6 +77,8 @@ class ReTests(unittest.TestCase):
self
.
assertTypedEqual
(
re
.
sub
(
b
'y'
,
B
(
b
'a'
),
B
(
b
'xyz'
)),
b
'xaz'
)
self
.
assertTypedEqual
(
re
.
sub
(
b
'y'
,
bytearray
(
b
'a'
),
bytearray
(
b
'xyz'
)),
b
'xaz'
)
self
.
assertTypedEqual
(
re
.
sub
(
b
'y'
,
memoryview
(
b
'a'
),
memoryview
(
b
'xyz'
)),
b
'xaz'
)
for
y
in
(
"
\xe0
"
,
"
\u0430
"
,
"
\U0001d49c
"
):
self
.
assertEqual
(
re
.
sub
(
y
,
'a'
,
'x%sz'
%
y
),
'xaz'
)
self
.
assertEqual
(
re
.
sub
(
"(?i)b+"
,
"x"
,
"bbbb BBBB"
),
'x x'
)
self
.
assertEqual
(
re
.
sub
(
r
'\d+'
,
self
.
bump_num
,
'08.2 -2 23x99y'
),
...
...
@@ -250,6 +252,13 @@ class ReTests(unittest.TestCase):
[
b
''
,
b
'a'
,
b
'b'
,
b
'c'
])
self
.
assertTypedEqual
(
re
.
split
(
b
"(:*)"
,
string
),
[
b
''
,
b
':'
,
b
'a'
,
b
':'
,
b
'b'
,
b
'::'
,
b
'c'
])
for
a
,
b
,
c
in
(
"
\xe0\xdf\xe7
"
,
"
\u0430\u0431\u0432
"
,
"
\U0001d49c\U0001d49e\U0001d4b5
"
):
string
=
":%s:%s::%s"
%
(
a
,
b
,
c
)
self
.
assertEqual
(
re
.
split
(
":"
,
string
),
[
''
,
a
,
b
,
''
,
c
])
self
.
assertEqual
(
re
.
split
(
":*"
,
string
),
[
''
,
a
,
b
,
c
])
self
.
assertEqual
(
re
.
split
(
"(:*)"
,
string
),
[
''
,
':'
,
a
,
':'
,
b
,
'::'
,
c
])
self
.
assertEqual
(
re
.
split
(
"(?::*)"
,
":a:b::c"
),
[
''
,
'a'
,
'b'
,
'c'
])
self
.
assertEqual
(
re
.
split
(
"(:)*"
,
":a:b::c"
),
...
...
@@ -287,6 +296,14 @@ class ReTests(unittest.TestCase):
[
b
":"
,
b
"::"
,
b
":::"
])
self
.
assertTypedEqual
(
re
.
findall
(
b
"(:)(:*)"
,
string
),
[(
b
":"
,
b
""
),
(
b
":"
,
b
":"
),
(
b
":"
,
b
"::"
)])
for
x
in
(
"
\xe0
"
,
"
\u0430
"
,
"
\U0001d49c
"
):
xx
=
x
*
2
xxx
=
x
*
3
string
=
"a%sb%sc%sd"
%
(
x
,
xx
,
xxx
)
self
.
assertEqual
(
re
.
findall
(
"%s+"
%
x
,
string
),
[
x
,
xx
,
xxx
])
self
.
assertEqual
(
re
.
findall
(
"(%s+)"
%
x
,
string
),
[
x
,
xx
,
xxx
])
self
.
assertEqual
(
re
.
findall
(
"(%s)(%s*)"
%
(
x
,
x
),
string
),
[(
x
,
""
),
(
x
,
x
),
(
x
,
xx
)])
def
test_bug_117612
(
self
):
self
.
assertEqual
(
re
.
findall
(
r
"(a|(b))"
,
"aba"
),
...
...
@@ -305,6 +322,12 @@ class ReTests(unittest.TestCase):
self
.
assertEqual
(
re
.
match
(
b
'(a)'
,
string
).
group
(
0
),
b
'a'
)
self
.
assertEqual
(
re
.
match
(
b
'(a)'
,
string
).
group
(
1
),
b
'a'
)
self
.
assertEqual
(
re
.
match
(
b
'(a)'
,
string
).
group
(
1
,
1
),
(
b
'a'
,
b
'a'
))
for
a
in
(
"
\xe0
"
,
"
\u0430
"
,
"
\U0001d49c
"
):
self
.
assertEqual
(
re
.
match
(
a
,
a
).
groups
(),
())
self
.
assertEqual
(
re
.
match
(
'(%s)'
%
a
,
a
).
groups
(),
(
a
,))
self
.
assertEqual
(
re
.
match
(
'(%s)'
%
a
,
a
).
group
(
0
),
a
)
self
.
assertEqual
(
re
.
match
(
'(%s)'
%
a
,
a
).
group
(
1
),
a
)
self
.
assertEqual
(
re
.
match
(
'(%s)'
%
a
,
a
).
group
(
1
,
1
),
(
a
,
a
))
pat
=
re
.
compile
(
'((a)|(b))(c)?'
)
self
.
assertEqual
(
pat
.
match
(
'a'
).
groups
(),
(
'a'
,
'a'
,
None
,
None
))
...
...
This diff is collapsed.
Click to expand it.
Misc/NEWS
View file @
9eabac68
...
...
@@ -21,6 +21,8 @@ Core and Builtins
Library
-------
- Issue #18685: Restore re performance to pre-PEP 393 levels.
- Issue #19339: telnetlib module is now using time.monotonic() when available
to compute timeout.
...
...
This diff is collapsed.
Click to expand it.
Modules/_sre.c
View file @
9eabac68
This diff is collapsed.
Click to expand it.
Modules/sre.h
View file @
9eabac68
...
...
@@ -31,9 +31,7 @@ typedef struct {
PyObject
*
pattern
;
/* pattern source (or None) */
int
flags
;
/* flags used when compiling pattern source */
PyObject
*
weakreflist
;
/* List of weak references */
int
logical_charsize
;
/* pattern charsize (or -1) */
int
charsize
;
Py_buffer
view
;
int
isbytes
;
/* pattern type (1 - bytes, 0 - string, -1 - None) */
/* pattern code */
Py_ssize_t
codesize
;
SRE_CODE
code
[
1
];
...
...
@@ -73,9 +71,8 @@ typedef struct {
/* attributes for the match object */
PyObject
*
string
;
Py_ssize_t
pos
,
endpos
;
/* character size */
int
logical_charsize
;
/* kind of thing: 1 - bytes, 2/4 - unicode */
int
charsize
;
int
isbytes
;
int
charsize
;
/* character size */
/* registers */
Py_ssize_t
lastindex
;
Py_ssize_t
lastmark
;
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment