Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
9c15ec1c
Commit
9c15ec1c
authored
Oct 23, 2013
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #19365: Optimized the parsing of long replacement string in re.sub*()
functions.
parent
4d397008
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
44 deletions
+33
-44
Lib/sre_parse.py
Lib/sre_parse.py
+30
-44
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/sre_parse.py
View file @
9c15ec1c
...
...
@@ -769,35 +769,33 @@ def parse_template(source, pattern):
# group references
s
=
Tokenizer
(
source
)
sget
=
s
.
get
p
=
[]
a
=
p
.
append
def
literal
(
literal
,
p
=
p
,
pappend
=
a
):
if
p
and
p
[
-
1
][
0
]
is
LITERAL
:
p
[
-
1
]
=
LITERAL
,
p
[
-
1
][
1
]
+
literal
else
:
pappend
((
LITERAL
,
literal
))
sep
=
source
[:
0
]
if
isinstance
(
sep
,
str
):
makechar
=
chr
else
:
makechar
=
chr
while
1
:
groups
=
[]
literals
=
[]
literal
=
[]
lappend
=
literal
.
append
def
addgroup
(
index
):
if
literal
:
literals
.
append
(
''
.
join
(
literal
))
del
literal
[:]
groups
.
append
((
len
(
literals
),
index
))
literals
.
append
(
None
)
while
True
:
this
=
sget
()
if
this
is
None
:
break
# end of replacement string
if
this
and
this
[
0
]
==
"
\
\
"
:
if
this
[
0
]
==
"
\
\
"
:
# group
c
=
this
[
1
:
2
]
c
=
this
[
1
]
if
c
==
"g"
:
name
=
""
if
s
.
match
(
"<"
):
while
1
:
while
True
:
char
=
sget
()
if
char
is
None
:
raise
error
(
"unterminated group name"
)
if
char
==
">"
:
break
name
=
name
+
char
name
+=
char
if
not
name
:
raise
error
(
"missing group name"
)
try
:
...
...
@@ -811,50 +809,38 @@ def parse_template(source, pattern):
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
raise
IndexError
(
"unknown group name"
)
a
((
MARK
,
index
)
)
a
ddgroup
(
index
)
elif
c
==
"0"
:
if
s
.
next
in
OCTDIGITS
:
this
=
this
+
sget
()
this
+=
sget
()
if
s
.
next
in
OCTDIGITS
:
this
=
this
+
sget
()
l
iteral
(
makecha
r
(
int
(
this
[
1
:],
8
)
&
0xff
))
this
+=
sget
()
l
append
(
ch
r
(
int
(
this
[
1
:],
8
)
&
0xff
))
elif
c
in
DIGITS
:
isoctal
=
False
if
s
.
next
in
DIGITS
:
this
=
this
+
sget
()
this
+=
sget
()
if
(
c
in
OCTDIGITS
and
this
[
2
]
in
OCTDIGITS
and
s
.
next
in
OCTDIGITS
):
this
=
this
+
sget
()
this
+=
sget
()
isoctal
=
True
l
iteral
(
makecha
r
(
int
(
this
[
1
:],
8
)
&
0xff
))
l
append
(
ch
r
(
int
(
this
[
1
:],
8
)
&
0xff
))
if
not
isoctal
:
a
((
MARK
,
int
(
this
[
1
:])
))
a
ddgroup
(
int
(
this
[
1
:]
))
else
:
try
:
this
=
makecha
r
(
ESCAPES
[
this
][
1
])
this
=
ch
r
(
ESCAPES
[
this
][
1
])
except
KeyError
:
pass
l
iteral
(
this
)
l
append
(
this
)
else
:
literal
(
this
)
# convert template to groups and literals lists
i
=
0
groups
=
[]
groupsappend
=
groups
.
append
literals
=
[
None
]
*
len
(
p
)
if
isinstance
(
source
,
str
):
encode
=
lambda
x
:
x
else
:
lappend
(
this
)
if
literal
:
literals
.
append
(
''
.
join
(
literal
))
if
not
isinstance
(
source
,
str
):
# The tokenizer implicitly decodes bytes objects as latin-1, we must
# therefore re-encode the final representation.
encode
=
lambda
x
:
x
.
encode
(
'latin-1'
)
for
c
,
s
in
p
:
if
c
is
MARK
:
groupsappend
((
i
,
s
))
# literal[i] is already None
else
:
literals
[
i
]
=
encode
(
s
)
i
=
i
+
1
literals
=
[
None
if
s
is
None
else
s
.
encode
(
'latin-1'
)
for
s
in
literals
]
return
groups
,
literals
def
expand_template
(
template
,
match
):
...
...
Misc/NEWS
View file @
9c15ec1c
...
...
@@ -19,6 +19,9 @@ Core and Builtins
Library
-------
- Issue #19365: Optimized the parsing of long replacement string in re.sub*()
functions.
- Issue #19352: Fix unittest discovery when a module can be reached
through several paths (e.g. under Debian/Ubuntu with virtualenv).
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment