Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
5dba6dfe
Commit
5dba6dfe
authored
Jul 18, 2010
by
Senthil Kumaran
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixing Issue1712522 - urllib.quote to support Unicode. The default
encoding='utf-8' and errors='strict'.
parent
5d10d33c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
152 additions
and
7 deletions
+152
-7
Doc/library/urllib.rst
Doc/library/urllib.rst
+19
-3
Lib/test/test_urllib.py
Lib/test/test_urllib.py
+107
-0
Lib/urllib.py
Lib/urllib.py
+22
-4
Misc/NEWS
Misc/NEWS
+4
-0
No files found.
Doc/library/urllib.rst
View file @
5dba6dfe
...
...
@@ -202,24 +202,40 @@ High-level interface
Utility functions
-----------------
.. function:: quote(string[, safe])
.. function:: quote(string[, safe
[, encoding[, errors]]
])
Replace special characters in *string* using the ``%xx`` escape. Letters,
digits, and the characters ``'_.-'`` are never quoted. By default, this
function is intended for quoting the path section of the URL.The optional
function is intended for quoting the path section of the URL.
The optional
*safe* parameter specifies additional characters that should not be quoted
--- its default value is ``'/'``.
*string* may be either a :class:`str` or a :class:`unicode`.
The optional *encoding* and *errors* parameters specify how to deal with
non-ASCII characters, as accepted by the :meth:`unicode.encode` method.
*encoding* defaults to ``'utf-8'``.
*errors* defaults to ``'strict'``, meaning unsupported characters raise a
:class:`UnicodeEncodeError`.
Non-Unicode strings are not encoded by default, and all bytes are allowed.
Example: ``quote('/~connolly/')`` yields ``'/%7econnolly/'``.
Example: ``quote(u'/El Niño/')`` yields ``'/El%20Ni%C3%B1o/'``.
.. versionchanged:: 2.7.1
Added *encoding* and *errors* parameters.
.. function:: quote_plus(string[, safe])
.. function:: quote_plus(string[, safe[, encoding[, errors]]])
Like :func:`quote`, but also replaces spaces by plus signs, as required for
quoting HTML form values when building up a query string to go into a URL.
Plus signs in the original string are escaped unless they are included in
*safe*. It also does not have *safe* default to ``'/'``.
Example: ``quote_plus(u'/El Niño/')`` yields ``'%2FEl+Ni%C3%B1o%2F'``.
.. function:: unquote(string)
...
...
Lib/test/test_urllib.py
View file @
5dba6dfe
...
...
@@ -355,6 +355,38 @@ class QuotingTests(unittest.TestCase):
self
.
assertEqual
(
quote_by_default
,
result
,
"using quote_plus(): %s != %s"
%
(
quote_by_default
,
result
))
# Safe expressed as unicode rather than str
result
=
urllib
.
quote
(
quote_by_default
,
safe
=
u"<>"
)
self
.
assertEqual
(
quote_by_default
,
result
,
"using quote(): %r != %r"
%
(
quote_by_default
,
result
))
# "Safe" non-ASCII bytes should still work
# (Technically disallowed by the URI standard, but allowed for
# backwards compatibility with previous versions of Python)
result
=
urllib
.
quote
(
b"a
\
xfc
b"
,
safe
=
b"
\
xfc
"
)
expect
=
b"a
\
xfc
b"
self
.
assertEqual
(
expect
,
result
,
"using quote(): %r != %r"
%
(
expect
,
result
))
# Same as above, but with 'safe' as a unicode rather than str
# "Safe" non-ASCII unicode characters should have no effect
# (Since URIs are not allowed to have non-ASCII characters)
result
=
urllib
.
quote
(
b"a
\
xfc
b"
,
safe
=
u"
\
xfc
"
)
expect
=
urllib
.
quote
(
b"a
\
xfc
b"
,
safe
=
""
)
self
.
assertEqual
(
expect
,
result
,
"using quote(): %r != %r"
%
(
expect
,
result
))
# Same as above, but quoting a unicode rather than a str
result
=
urllib
.
quote
(
u"a
\
xfc
b"
,
encoding
=
"latin-1"
,
safe
=
b"
\
xfc
"
)
expect
=
b"a
\
xfc
b"
self
.
assertEqual
(
expect
,
result
,
"using quote(): %r != %r"
%
(
expect
,
result
))
# Same as above, but with both the quoted value and 'safe' as unicode
result
=
urllib
.
quote
(
u"a
\
xfc
b"
,
encoding
=
"latin-1"
,
safe
=
u"
\
xfc
"
)
expect
=
urllib
.
quote
(
u"a
\
xfc
b"
,
encoding
=
"latin-1"
,
safe
=
""
)
self
.
assertEqual
(
expect
,
result
,
"using quote(): %r != %r"
%
(
expect
,
result
))
def
test_default_quoting
(
self
):
# Make sure all characters that should be quoted are by default sans
...
...
@@ -406,6 +438,81 @@ class QuotingTests(unittest.TestCase):
'
alpha
%
2
Bbeta
+
gamma
')
self.assertEqual(urllib.quote_plus('
alpha
+
beta
gamma
', '
+
'),
'
alpha
+
beta
+
gamma
')
# Test with unicode
self.assertEqual(urllib.quote_plus(u'
alpha
+
beta
gamma
'),
'
alpha
%
2
Bbeta
+
gamma
')
# Test with safe unicode
self.assertEqual(urllib.quote_plus('
alpha
+
beta
gamma
', u'
+
'),
'
alpha
+
beta
+
gamma
')
def test_quote_bytes(self):
# Non-ASCII bytes should quote directly to percent-encoded values
given = b"
\
xa2
\
xd8
ab
\
xff
"
expect = "%A2%D8ab%FF"
result = urllib.quote(given)
self.assertEqual(expect, result,
"using quote(): %r != %r" % (expect, result))
# Encoding argument should raise UnicodeDecodeError on bytes input
# with non-ASCII characters (just as with str.encode).
self.assertRaises(UnicodeDecodeError, urllib.quote, given,
encoding="latin-1")
def test_quote_with_unicode(self):
# Characters in Latin-1 range, encoded by default in UTF-8
given = u"
\
xa2
\
xd8
ab
\
xff
"
expect = "%C2%A2%C3%98ab%C3%BF"
result = urllib.quote(given)
self.assertEqual(expect, result,
"using quote(): %r != %r" % (expect, result))
# Characters in Latin-1 range, encoded by with None (default)
result = urllib.quote(given, encoding=None, errors=None)
self.assertEqual(expect, result,
"using quote(): %r != %r" % (expect, result))
# Characters in Latin-1 range, encoded with Latin-1
given = u"
\
xa2
\
xd8
ab
\
xff
"
expect = "%A2%D8ab%FF"
result = urllib.quote(given, encoding="latin-1")
self.assertEqual(expect, result,
"using quote(): %r != %r" % (expect, result))
# Characters in BMP, encoded by default in UTF-8
given = u"
\
u6f22
\
u5b57
" # "Kanji"
expect = "%E6%BC%A2%E5%AD%97"
result = urllib.quote(given)
self.assertEqual(expect, result,
"using quote(): %r != %r" % (expect, result))
# Characters in BMP, encoded with Latin-1
given = u"
\
u6f22
\
u5b57
"
self.assertRaises(UnicodeEncodeError, urllib.quote, given,
encoding="latin-1")
# Characters in BMP, encoded with Latin-1, with replace error handling
given = u"
\
u6f22
\
u5b57
"
expect = "%3F%3F" # "??"
result = urllib.quote(given, encoding="latin-1",
errors="replace")
self.assertEqual(expect, result,
"using quote(): %r != %r" % (expect, result))
# Characters in BMP, Latin-1, with xmlcharref error handling
given = u"
\
u6f22
\
u5b57
"
expect = "%26%2328450%3B%26%2323383%3B" # "漢字"
result = urllib.quote(given, encoding="latin-1",
errors="xmlcharrefreplace")
self.assertEqual(expect, result,
"using quote(): %r != %r" % (expect, result))
def test_quote_plus_with_unicode(self):
# Encoding (latin-1) test for quote_plus
given = u"
\
xa2
\
xd8
\
xff
"
expect = "%A2%D8+%FF"
result = urllib.quote_plus(given, encoding="latin-1")
self.assertEqual(expect, result,
"using quote_plus(): %r != %r" % (expect, result))
# Errors test for quote_plus
given = u"ab
\
u6f22
\
u5b57
cd"
expect = "ab%3F%3F+cd"
result = urllib.quote_plus(given, encoding="latin-1",
errors="replace")
self.assertEqual(expect, result,
"using quote_plus(): %r != %r" % (expect, result))
class UnquotingTests(unittest.TestCase):
"""Tests for unquote() and unquote_plus()
...
...
Lib/urllib.py
View file @
5dba6dfe
...
...
@@ -1193,7 +1193,7 @@ for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
_safe_map
[
c
]
=
c
if
(
i
<
128
and
c
in
always_safe
)
else
'%{:02X}'
.
format
(
i
)
_safe_quoters
=
{}
def
quote
(
s
,
safe
=
'/'
):
def
quote
(
s
,
safe
=
'/'
,
encoding
=
None
,
errors
=
None
):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
...
...
@@ -1213,10 +1213,28 @@ def quote(s, safe='/'):
is reserved, but in typical usage the quote function is being
called on a path where the existing slash characters are used as
reserved characters.
string and safe may be either str or unicode objects.
The optional encoding and errors parameters specify how to deal with the
non-ASCII characters, as accepted by the unicode.encode method.
By default, encoding='utf-8' (characters are encoded with UTF-8), and
errors='strict' (unsupported characters raise a UnicodeEncodeError).
"""
# fastpath
if
not
s
:
return
s
if
encoding
is
not
None
or
isinstance
(
s
,
unicode
):
if
encoding
is
None
:
encoding
=
'utf-8'
if
errors
is
None
:
errors
=
'strict'
s
=
s
.
encode
(
encoding
,
errors
)
if
isinstance
(
safe
,
unicode
):
# Normalize 'safe' by converting to str and removing non-ASCII chars
safe
=
safe
.
encode
(
'ascii'
,
'ignore'
)
cachekey
=
(
safe
,
always_safe
)
try
:
(
quoter
,
safe
)
=
_safe_quoters
[
cachekey
]
...
...
@@ -1230,12 +1248,12 @@ def quote(s, safe='/'):
return
s
return
''
.
join
(
map
(
quoter
,
s
))
def
quote_plus
(
s
,
safe
=
''
):
def
quote_plus
(
s
,
safe
=
''
,
encoding
=
None
,
errors
=
None
):
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
if
' '
in
s
:
s
=
quote
(
s
,
safe
+
' '
)
s
=
quote
(
s
,
safe
+
' '
,
encoding
,
errors
)
return
s
.
replace
(
' '
,
'+'
)
return
quote
(
s
,
safe
)
return
quote
(
s
,
safe
,
encoding
,
errors
)
def
urlencode
(
query
,
doseq
=
0
):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
...
...
Misc/NEWS
View file @
5dba6dfe
...
...
@@ -18,6 +18,10 @@ Core and Builtins
Library
-------
- Issue 1712522: urllib.quote supports Unicode String with encoding and errors
parameter. The encoding parameter defaults to utf-8 and errors to strict.
Patch by Matt Giuca.
- Issue #7646: The fnmatch pattern cache no longer grows without bound.
- Issue #9136: Fix 'dictionary changed size during iteration'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment