Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
a9d24e67
Commit
a9d24e67
authored
Mar 14, 2013
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Plain Diff
Issue #1285086: Get rid of the refcounting hack and speed up
urllib.parse.unquote() and urllib.parse.unquote_to_bytes().
parents
ac356fcf
8ea4616f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
30 additions
and
36 deletions
+30
-36
Lib/urllib/parse.py
Lib/urllib/parse.py
+27
-36
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/urllib/parse.py
View file @
a9d24e67
...
...
@@ -27,6 +27,7 @@ parsing quirks from older RFCs are retained. The testcases in
test_urlparse.py provides a good indicator of parsing behavior.
"""
import
re
import
sys
import
collections
...
...
@@ -470,6 +471,10 @@ def urldefrag(url):
defrag
=
url
return
_coerce_result
(
DefragResult
(
defrag
,
frag
))
_hexdig
=
'0123456789ABCDEFabcdef'
_hextobyte
=
{(
a
+
b
).
encode
():
bytes
([
int
(
a
+
b
,
16
)])
for
a
in
_hexdig
for
b
in
_hexdig
}
def
unquote_to_bytes
(
string
):
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
# Note: strings are encoded as UTF-8. This is only an issue if it contains
...
...
@@ -480,16 +485,21 @@ def unquote_to_bytes(string):
return
b''
if
isinstance
(
string
,
str
):
string
=
string
.
encode
(
'utf-8'
)
re
s
=
string
.
split
(
b'%'
)
if
len
(
re
s
)
==
1
:
bit
s
=
string
.
split
(
b'%'
)
if
len
(
bit
s
)
==
1
:
return
string
string
=
res
[
0
]
for
item
in
res
[
1
:]:
res
=
[
bits
[
0
]]
append
=
res
.
append
for
item
in
bits
[
1
:]:
try
:
string
+=
bytes
([
int
(
item
[:
2
],
16
)])
+
item
[
2
:]
except
ValueError
:
string
+=
b'%'
+
item
return
string
append
(
_hextobyte
[
item
[:
2
]])
append
(
item
[
2
:])
except
KeyError
:
append
(
b'%'
)
append
(
item
)
return
b''
.
join
(
res
)
_asciire
=
re
.
compile
(
'([
\
x00
-
\
x7f
]+)'
)
def
unquote
(
string
,
encoding
=
'utf-8'
,
errors
=
'replace'
):
"""Replace %xx escapes by their single-character equivalent. The optional
...
...
@@ -501,39 +511,20 @@ def unquote(string, encoding='utf-8', errors='replace'):
unquote('abc%20def') -> 'abc def'.
"""
if
string
==
''
:
return
string
res
=
string
.
split
(
'%'
)
if
len
(
res
)
==
1
:
if
'%'
not
in
string
:
string
.
split
return
string
if
encoding
is
None
:
encoding
=
'utf-8'
if
errors
is
None
:
errors
=
'replace'
# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
pct_sequence
=
b''
string
=
res
[
0
]
for
item
in
res
[
1
:]:
try
:
if
not
item
:
raise
ValueError
pct_sequence
+=
bytes
.
fromhex
(
item
[:
2
])
rest
=
item
[
2
:]
if
not
rest
:
# This segment was just a single percent-encoded character.
# May be part of a sequence of code units, so delay decoding.
# (Stored in pct_sequence).
continue
except
ValueError
:
rest
=
'%'
+
item
# Encountered non-percent-encoded characters. Flush the current
# pct_sequence.
string
+=
pct_sequence
.
decode
(
encoding
,
errors
)
+
rest
pct_sequence
=
b''
if
pct_sequence
:
# Flush the final pct_sequence
string
+=
pct_sequence
.
decode
(
encoding
,
errors
)
return
string
bits
=
_asciire
.
split
(
string
)
res
=
[
bits
[
0
]]
append
=
res
.
append
for
i
in
range
(
1
,
len
(
bits
),
2
):
append
(
unquote_to_bytes
(
bits
[
i
]).
decode
(
encoding
,
errors
))
append
(
bits
[
i
+
1
])
return
''
.
join
(
res
)
def
parse_qs
(
qs
,
keep_blank_values
=
False
,
strict_parsing
=
False
,
encoding
=
'utf-8'
,
errors
=
'replace'
):
...
...
Misc/NEWS
View file @
a9d24e67
...
...
@@ -193,6 +193,9 @@ Core and Builtins
Library
-------
- Issue #1285086: Get rid of the refcounting hack and speed up
urllib.parse.unquote() and urllib.parse.unquote_to_bytes().
- Issue #17368: Fix an off-by-one error in the Python JSON decoder that caused
a failure while decoding empty object literals when object_pairs_hook was
specified.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment