Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
5bb282d5
Commit
5bb282d5
authored
Oct 23, 2013
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #19365: Optimized the parsing of long replacement string in re.sub*()
functions.
parent
97bb27d5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
44 deletions
+33
-44
Lib/sre_parse.py
Lib/sre_parse.py
+30
-44
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/sre_parse.py
View file @
5bb282d5
...
@@ -769,35 +769,33 @@ def parse_template(source, pattern):
...
@@ -769,35 +769,33 @@ def parse_template(source, pattern):
# group references
# group references
s
=
Tokenizer
(
source
)
s
=
Tokenizer
(
source
)
sget
=
s
.
get
sget
=
s
.
get
p
=
[]
groups
=
[]
a
=
p
.
append
literals
=
[]
def
literal
(
literal
,
p
=
p
,
pappend
=
a
):
literal
=
[]
if
p
and
p
[
-
1
][
0
]
is
LITERAL
:
lappend
=
literal
.
append
p
[
-
1
]
=
LITERAL
,
p
[
-
1
][
1
]
+
literal
def
addgroup
(
index
):
else
:
if
literal
:
pappend
((
LITERAL
,
literal
))
literals
.
append
(
''
.
join
(
literal
))
sep
=
source
[:
0
]
del
literal
[:]
if
isinstance
(
sep
,
str
):
groups
.
append
((
len
(
literals
),
index
))
makechar
=
chr
literals
.
append
(
None
)
else
:
while
True
:
makechar
=
chr
while
1
:
this
=
sget
()
this
=
sget
()
if
this
is
None
:
if
this
is
None
:
break
# end of replacement string
break
# end of replacement string
if
this
and
this
[
0
]
==
"
\
\
"
:
if
this
[
0
]
==
"
\
\
"
:
# group
# group
c
=
this
[
1
:
2
]
c
=
this
[
1
]
if
c
==
"g"
:
if
c
==
"g"
:
name
=
""
name
=
""
if
s
.
match
(
"<"
):
if
s
.
match
(
"<"
):
while
1
:
while
True
:
char
=
sget
()
char
=
sget
()
if
char
is
None
:
if
char
is
None
:
raise
error
(
"unterminated group name"
)
raise
error
(
"unterminated group name"
)
if
char
==
">"
:
if
char
==
">"
:
break
break
name
=
name
+
char
name
+=
char
if
not
name
:
if
not
name
:
raise
error
(
"missing group name"
)
raise
error
(
"missing group name"
)
try
:
try
:
...
@@ -811,50 +809,38 @@ def parse_template(source, pattern):
...
@@ -811,50 +809,38 @@ def parse_template(source, pattern):
index
=
pattern
.
groupindex
[
name
]
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
except
KeyError
:
raise
IndexError
(
"unknown group name"
)
raise
IndexError
(
"unknown group name"
)
a
((
MARK
,
index
)
)
a
ddgroup
(
index
)
elif
c
==
"0"
:
elif
c
==
"0"
:
if
s
.
next
in
OCTDIGITS
:
if
s
.
next
in
OCTDIGITS
:
this
=
this
+
sget
()
this
+=
sget
()
if
s
.
next
in
OCTDIGITS
:
if
s
.
next
in
OCTDIGITS
:
this
=
this
+
sget
()
this
+=
sget
()
l
iteral
(
makecha
r
(
int
(
this
[
1
:],
8
)
&
0xff
))
l
append
(
ch
r
(
int
(
this
[
1
:],
8
)
&
0xff
))
elif
c
in
DIGITS
:
elif
c
in
DIGITS
:
isoctal
=
False
isoctal
=
False
if
s
.
next
in
DIGITS
:
if
s
.
next
in
DIGITS
:
this
=
this
+
sget
()
this
+=
sget
()
if
(
c
in
OCTDIGITS
and
this
[
2
]
in
OCTDIGITS
and
if
(
c
in
OCTDIGITS
and
this
[
2
]
in
OCTDIGITS
and
s
.
next
in
OCTDIGITS
):
s
.
next
in
OCTDIGITS
):
this
=
this
+
sget
()
this
+=
sget
()
isoctal
=
True
isoctal
=
True
l
iteral
(
makecha
r
(
int
(
this
[
1
:],
8
)
&
0xff
))
l
append
(
ch
r
(
int
(
this
[
1
:],
8
)
&
0xff
))
if
not
isoctal
:
if
not
isoctal
:
a
((
MARK
,
int
(
this
[
1
:])
))
a
ddgroup
(
int
(
this
[
1
:]
))
else
:
else
:
try
:
try
:
this
=
makecha
r
(
ESCAPES
[
this
][
1
])
this
=
ch
r
(
ESCAPES
[
this
][
1
])
except
KeyError
:
except
KeyError
:
pass
pass
l
iteral
(
this
)
l
append
(
this
)
else
:
else
:
literal
(
this
)
lappend
(
this
)
# convert template to groups and literals lists
if
literal
:
i
=
0
literals
.
append
(
''
.
join
(
literal
))
groups
=
[]
if
not
isinstance
(
source
,
str
):
groupsappend
=
groups
.
append
literals
=
[
None
]
*
len
(
p
)
if
isinstance
(
source
,
str
):
encode
=
lambda
x
:
x
else
:
# The tokenizer implicitly decodes bytes objects as latin-1, we must
# The tokenizer implicitly decodes bytes objects as latin-1, we must
# therefore re-encode the final representation.
# therefore re-encode the final representation.
encode
=
lambda
x
:
x
.
encode
(
'latin-1'
)
literals
=
[
None
if
s
is
None
else
s
.
encode
(
'latin-1'
)
for
s
in
literals
]
for
c
,
s
in
p
:
if
c
is
MARK
:
groupsappend
((
i
,
s
))
# literal[i] is already None
else
:
literals
[
i
]
=
encode
(
s
)
i
=
i
+
1
return
groups
,
literals
return
groups
,
literals
def
expand_template
(
template
,
match
):
def
expand_template
(
template
,
match
):
...
...
Misc/NEWS
View file @
5bb282d5
...
@@ -19,6 +19,9 @@ Core and Builtins
...
@@ -19,6 +19,9 @@ Core and Builtins
Library
Library
-------
-------
- Issue #19365: Optimized the parsing of long replacement string in re.sub*()
functions.
- Issue #19352: Fix unittest discovery when a module can be reached
- Issue #19352: Fix unittest discovery when a module can be reached
through several paths (e.g. under Debian/Ubuntu with virtualenv).
through several paths (e.g. under Debian/Ubuntu with virtualenv).
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment