Commit 520dd5ef authored by Senthil Kumaran's avatar Senthil Kumaran

Fix Issue5468 - urlencode to handle bytes and other alternate encodings.

(Extensive tests provided). Patch by Dan Mahn.
parent 0bf28042
...@@ -310,23 +310,29 @@ The :mod:`urllib.parse` module defines the following functions: ...@@ -310,23 +310,29 @@ The :mod:`urllib.parse` module defines the following functions:
``b'a&\xef'``. ``b'a&\xef'``.
.. function:: urlencode(query, doseq=False) .. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None)
Convert a mapping object or a sequence of two-element tuples to a Convert a mapping object or a sequence of two-element tuples, which may
"url-encoded" string, suitable to pass to :func:`urlopen` above as the either be a :class:`str` or a :class:`bytes`, to a "url-encoded" string,
optional *data* argument. This is useful to pass a dictionary of form suitable to pass to :func:`urlopen` above as the optional *data* argument.
fields to a ``POST`` request. The resulting string is a series of This is useful to pass a dictionary of form fields to a ``POST`` request.
``key=value`` pairs separated by ``'&'`` characters, where both *key* and The resulting string is a series of ``key=value`` pairs separated by ``'&'``
*value* are quoted using :func:`quote_plus` above. When a sequence of characters, where both *key* and *value* are quoted using :func:`quote_plus`
two-element tuples is used as the *query* argument, the first element of above. When a sequence of two-element tuples is used as the *query*
each tuple is a key and the second is a value. The value element in itself argument, the first element of each tuple is a key and the second is a
can be a sequence and in that case, if the optional parameter *doseq* is value. The value element in itself can be a sequence and in that case, if
evaluates to *True*, individual ``key=value`` pairs separated by ``'&'`` are the optional parameter *doseq* is evaluates to *True*, individual
generated for each element of the value sequence for the key. The order of ``key=value`` pairs separated by ``'&'`` are generated for each element of
parameters in the encoded string will match the order of parameter tuples in the value sequence for the key. The order of parameters in the encoded
the sequence. This module provides the functions :func:`parse_qs` and string will match the order of parameter tuples in the sequence. This module
:func:`parse_qsl` which are used to parse query strings into Python data provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used
structures. to parse query strings into Python data structures.
When *query* parameter is a :class:`str`, the *safe*, *encoding* and *error*
parameters are sent the :func:`quote_plus` for encoding.
.. versionchanged:: 3.2
query paramater supports bytes and string.
.. seealso:: .. seealso::
......
...@@ -795,6 +795,116 @@ class urlencode_Tests(unittest.TestCase): ...@@ -795,6 +795,116 @@ class urlencode_Tests(unittest.TestCase):
self.assertEqual("a=a&a=b", self.assertEqual("a=a&a=b",
urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True)) urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
def test_urlencode_encoding(self):
# ASCII encoding. Expect %3F with errors="replace'
given = (('\u00a0', '\u00c1'),)
expect = '%3F=%3F'
result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
self.assertEqual(expect, result)
# Default is UTF-8 encoding.
given = (('\u00a0', '\u00c1'),)
expect = '%C2%A0=%C3%81'
result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
# Latin-1 encoding.
given = (('\u00a0', '\u00c1'),)
expect = '%A0=%C1'
result = urllib.parse.urlencode(given, encoding="latin-1")
self.assertEqual(expect, result)
def test_urlencode_encoding_doseq(self):
# ASCII Encoding. Expect %3F with errors="replace'
given = (('\u00a0', '\u00c1'),)
expect = '%3F=%3F'
result = urllib.parse.urlencode(given, doseq=True,
encoding="ASCII", errors="replace")
self.assertEqual(expect, result)
# ASCII Encoding. On a sequence of values.
given = (("\u00a0", (1, "\u00c1")),)
expect = '%3F=1&%3F=%3F'
result = urllib.parse.urlencode(given, True,
encoding="ASCII", errors="replace")
self.assertEqual(expect, result)
# Utf-8
given = (("\u00a0", "\u00c1"),)
expect = '%C2%A0=%C3%81'
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
given = (("\u00a0", (42, "\u00c1")),)
expect = '%C2%A0=42&%C2%A0=%C3%81'
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
# latin-1
given = (("\u00a0", "\u00c1"),)
expect = '%A0=%C1'
result = urllib.parse.urlencode(given, True, encoding="latin-1")
self.assertEqual(expect, result)
given = (("\u00a0", (42, "\u00c1")),)
expect = '%A0=42&%A0=%C1'
result = urllib.parse.urlencode(given, True, encoding="latin-1")
self.assertEqual(expect, result)
def test_urlencode_bytes(self):
given = ((b'\xa0\x24', b'\xc1\x24'),)
expect = '%A0%24=%C1%24'
result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
# Sequence of values
given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
expect = '%A0%24=42&%A0%24=%C1%24'
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
def test_urlencode_encoding_safe_parameter(self):
# Send '$' (\x24) as safe character
# Default utf-8 encoding
given = ((b'\xa0\x24', b'\xc1\x24'),)
result = urllib.parse.urlencode(given, safe=":$")
expect = '%A0$=%C1$'
self.assertEqual(expect, result)
given = ((b'\xa0\x24', b'\xc1\x24'),)
result = urllib.parse.urlencode(given, doseq=True, safe=":$")
expect = '%A0$=%C1$'
self.assertEqual(expect, result)
# Safe parameter in sequence
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
result = urllib.parse.urlencode(given, True, safe=":$")
self.assertEqual(expect, result)
# Test all above in latin-1 encoding
given = ((b'\xa0\x24', b'\xc1\x24'),)
result = urllib.parse.urlencode(given, safe=":$",
encoding="latin-1")
expect = '%A0$=%C1$'
self.assertEqual(expect, result)
given = ((b'\xa0\x24', b'\xc1\x24'),)
expect = '%A0$=%C1$'
result = urllib.parse.urlencode(given, doseq=True, safe=":$",
encoding="latin-1")
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
result = urllib.parse.urlencode(given, True, safe=":$",
encoding="latin-1")
self.assertEqual(expect, result)
class Pathname_Tests(unittest.TestCase): class Pathname_Tests(unittest.TestCase):
"""Test pathname2url() and url2pathname()""" """Test pathname2url() and url2pathname()"""
......
...@@ -559,7 +559,7 @@ def quote_from_bytes(bs, safe='/'): ...@@ -559,7 +559,7 @@ def quote_from_bytes(bs, safe='/'):
_safe_quoters[safe] = quoter = Quoter(safe).__getitem__ _safe_quoters[safe] = quoter = Quoter(safe).__getitem__
return ''.join([quoter(char) for char in bs]) return ''.join([quoter(char) for char in bs])
def urlencode(query, doseq=False): def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
"""Encode a sequence of two-element tuples or dictionary into a URL query string. """Encode a sequence of two-element tuples or dictionary into a URL query string.
If any values in the query arg are sequences and doseq is true, each If any values in the query arg are sequences and doseq is true, each
...@@ -568,6 +568,10 @@ def urlencode(query, doseq=False): ...@@ -568,6 +568,10 @@ def urlencode(query, doseq=False):
If the query arg is a sequence of two-element tuples, the order of the If the query arg is a sequence of two-element tuples, the order of the
parameters in the output will match the order of parameters in the parameters in the output will match the order of parameters in the
input. input.
The query arg may be either a string or a bytes type. When query arg is a
string, the safe, encoding and error parameters are sent the quote_plus for
encoding.
""" """
if hasattr(query, "items"): if hasattr(query, "items"):
...@@ -592,14 +596,28 @@ def urlencode(query, doseq=False): ...@@ -592,14 +596,28 @@ def urlencode(query, doseq=False):
l = [] l = []
if not doseq: if not doseq:
for k, v in query: for k, v in query:
k = quote_plus(str(k)) if isinstance(k, bytes):
v = quote_plus(str(v)) k = quote_plus(k, safe)
else:
k = quote_plus(str(k), safe, encoding, errors)
if isinstance(v, bytes):
v = quote_plus(v, safe)
else:
v = quote_plus(str(v), safe, encoding, errors)
l.append(k + '=' + v) l.append(k + '=' + v)
else: else:
for k, v in query: for k, v in query:
k = quote_plus(str(k)) if isinstance(k, bytes):
if isinstance(v, str): k = quote_plus(k, safe)
v = quote_plus(v) else:
k = quote_plus(str(k), safe, encoding, errors)
if isinstance(v, bytes):
v = quote_plus(v, safe)
l.append(k + '=' + v)
elif isinstance(v, str):
v = quote_plus(v, safe, encoding, errors)
l.append(k + '=' + v) l.append(k + '=' + v)
else: else:
try: try:
...@@ -607,12 +625,16 @@ def urlencode(query, doseq=False): ...@@ -607,12 +625,16 @@ def urlencode(query, doseq=False):
x = len(v) x = len(v)
except TypeError: except TypeError:
# not a sequence # not a sequence
v = quote_plus(str(v)) v = quote_plus(str(v), safe, encoding, errors)
l.append(k + '=' + v) l.append(k + '=' + v)
else: else:
# loop over the sequence # loop over the sequence
for elt in v: for elt in v:
l.append(k + '=' + quote_plus(str(elt))) if isinstance(elt, bytes):
elt = quote_plus(elt, safe)
else:
elt = quote_plus(str(elt), safe, encoding, errors)
l.append(k + '=' + elt)
return '&'.join(l) return '&'.join(l)
# Utilities to parse URLs (most of these return None for missing parts): # Utilities to parse URLs (most of these return None for missing parts):
......
...@@ -468,6 +468,9 @@ C-API ...@@ -468,6 +468,9 @@ C-API
Library Library
------- -------
- Issue #5468: urlencode to handle bytes type and other encodings in its query
parameter. Patch by Dan Mahn.
- Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop - Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop
module, ensure that the input string length is a multiple of the frame size module, ensure that the input string length is a multiple of the frame size
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment