Commit 37ddbb8a authored by Florent Xicluna's avatar Florent Xicluna

Merged revisions 76719,81270-81272,83294,83319,84038-84039 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

................
  r76719 | antoine.pitrou | 2009-12-08 20:38:17 +0100 (mar., 08 déc. 2009) | 9 lines

  Merged revisions 76718 via svnmerge from
  svn+ssh://pythondev@svn.python.org/python/trunk

  ........
    r76718 | antoine.pitrou | 2009-12-08 20:35:12 +0100 (mar., 08 déc. 2009) | 3 lines

    Fix transient refleaks in test_urllib. Thanks to Florent Xicluna.
  ........
................
  r81270 | florent.xicluna | 2010-05-17 19:24:07 +0200 (lun., 17 mai 2010) | 9 lines

  Merged revision 81259 via svnmerge from
  svn+ssh://pythondev@svn.python.org/python/trunk

  ........
    r81259 | florent.xicluna | 2010-05-17 12:39:07 +0200 (lun, 17 mai 2010) | 2 lines

    Slight style cleanup.
  ........
................
  r81271 | florent.xicluna | 2010-05-17 19:33:07 +0200 (lun., 17 mai 2010) | 11 lines

  Issue #1285086: Speed up urllib.parse functions: quote, quote_from_bytes, unquote, unquote_to_bytes.

  Recorded merge of revisions 81265 via svnmerge from
  svn+ssh://pythondev@svn.python.org/python/trunk

  ........
    r81265 | florent.xicluna | 2010-05-17 15:35:09 +0200 (lun, 17 mai 2010) | 2 lines

    Issue #1285086: Speed up urllib.quote and urllib.unquote for simple cases.
  ........
................
  r81272 | florent.xicluna | 2010-05-17 20:01:22 +0200 (lun., 17 mai 2010) | 2 lines

  Inadvertently removed part of the comment in r81271.
................
  r83294 | senthil.kumaran | 2010-07-30 21:34:36 +0200 (ven., 30 juil. 2010) | 2 lines

  Fix issue9301 - handle unquote({}) kind of case.
................
  r83319 | florent.xicluna | 2010-07-31 10:56:55 +0200 (sam., 31 juil. 2010) | 2 lines

  Fix an oversight in r83294.  unquote() should reject bytes.  Issue #9301.
................
  r84038 | florent.xicluna | 2010-08-14 20:30:35 +0200 (sam., 14 août 2010) | 1 line

  Silence the BytesWarning, due to patch r83294 for #9301
................
  r84039 | florent.xicluna | 2010-08-14 22:51:58 +0200 (sam., 14 août 2010) | 1 line

  Silence BytesWarning while testing exception
................
parent b4efb3d8
...@@ -261,8 +261,8 @@ class urlretrieve_FileTests(unittest.TestCase): ...@@ -261,8 +261,8 @@ class urlretrieve_FileTests(unittest.TestCase):
result = urllib.request.urlretrieve("file:%s" % support.TESTFN) result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
self.assertEqual(result[0], support.TESTFN) self.assertEqual(result[0], support.TESTFN)
self.assertTrue(isinstance(result[1], email.message.Message), self.assertTrue(isinstance(result[1], email.message.Message),
"did not get a email.message.Message instance as second " "did not get a email.message.Message instance "
"returned value") "as second returned value")
def test_copy(self): def test_copy(self):
# Test that setting the filename argument works. # Test that setting the filename argument works.
...@@ -539,6 +539,7 @@ class QuotingTests(unittest.TestCase): ...@@ -539,6 +539,7 @@ class QuotingTests(unittest.TestCase):
self.assertEqual(expect, result, self.assertEqual(expect, result,
"using quote_plus(): %r != %r" % (expect, result)) "using quote_plus(): %r != %r" % (expect, result))
class UnquotingTests(unittest.TestCase): class UnquotingTests(unittest.TestCase):
"""Tests for unquote() and unquote_plus() """Tests for unquote() and unquote_plus()
...@@ -566,6 +567,10 @@ class UnquotingTests(unittest.TestCase): ...@@ -566,6 +567,10 @@ class UnquotingTests(unittest.TestCase):
self.assertEqual(result.count('%'), 1, self.assertEqual(result.count('%'), 1,
"using unquote(): not all characters escaped: " "using unquote(): not all characters escaped: "
"%s" % result) "%s" % result)
self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
with support.check_warnings():
self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
def test_unquoting_badpercent(self): def test_unquoting_badpercent(self):
# Test unquoting on bad percent-escapes # Test unquoting on bad percent-escapes
...@@ -600,6 +605,8 @@ class UnquotingTests(unittest.TestCase): ...@@ -600,6 +605,8 @@ class UnquotingTests(unittest.TestCase):
result = urllib.parse.unquote_to_bytes(given) result = urllib.parse.unquote_to_bytes(given)
self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
% (expect, result)) % (expect, result))
self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
def test_unquoting_mixed_case(self): def test_unquoting_mixed_case(self):
# Test unquoting on mixed-case hex digits in the percent-escapes # Test unquoting on mixed-case hex digits in the percent-escapes
...@@ -741,7 +748,7 @@ class urlencode_Tests(unittest.TestCase): ...@@ -741,7 +748,7 @@ class urlencode_Tests(unittest.TestCase):
expect_somewhere = ["1st=1", "2nd=2", "3rd=3"] expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
result = urllib.parse.urlencode(given) result = urllib.parse.urlencode(given)
for expected in expect_somewhere: for expected in expect_somewhere:
self.assertTrue(expected in result, self.assertIn(expected, result,
"testing %s: %s not found in %s" % "testing %s: %s not found in %s" %
(test_type, expected, result)) (test_type, expected, result))
self.assertEqual(result.count('&'), 2, self.assertEqual(result.count('&'), 2,
...@@ -788,8 +795,7 @@ class urlencode_Tests(unittest.TestCase): ...@@ -788,8 +795,7 @@ class urlencode_Tests(unittest.TestCase):
result = urllib.parse.urlencode(given, True) result = urllib.parse.urlencode(given, True)
for value in given["sequence"]: for value in given["sequence"]:
expect = "sequence=%s" % value expect = "sequence=%s" % value
self.assertTrue(expect in result, self.assertIn(expect, result)
"%s not found in %s" % (expect, result))
self.assertEqual(result.count('&'), 2, self.assertEqual(result.count('&'), 2,
"Expected 2 '&'s, got %s" % result.count('&')) "Expected 2 '&'s, got %s" % result.count('&'))
......
...@@ -39,7 +39,7 @@ uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', ...@@ -39,7 +39,7 @@ uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
'svn', 'svn+ssh', 'sftp', 'nfs',' git', 'git+ssh'] 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']
non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
...@@ -61,8 +61,9 @@ MAX_CACHE_SIZE = 20 ...@@ -61,8 +61,9 @@ MAX_CACHE_SIZE = 20
_parse_cache = {} _parse_cache = {}
def clear_cache(): def clear_cache():
"""Clear the parse cache.""" """Clear the parse cache and the quoters cache."""
_parse_cache.clear() _parse_cache.clear()
_safe_quoters.clear()
class ResultMixin(object): class ResultMixin(object):
...@@ -302,17 +303,22 @@ def unquote_to_bytes(string): ...@@ -302,17 +303,22 @@ def unquote_to_bytes(string):
"""unquote_to_bytes('abc%20def') -> b'abc def'.""" """unquote_to_bytes('abc%20def') -> b'abc def'."""
# Note: strings are encoded as UTF-8. This is only an issue if it contains # Note: strings are encoded as UTF-8. This is only an issue if it contains
# unescaped non-ASCII characters, which URIs should not. # unescaped non-ASCII characters, which URIs should not.
if not string:
# Is it a string-like object?
string.split
return b''
if isinstance(string, str): if isinstance(string, str):
string = string.encode('utf-8') string = string.encode('utf-8')
res = string.split(b'%') res = string.split(b'%')
res[0] = res[0] if len(res) == 1:
for i in range(1, len(res)): return string
item = res[i] string = res[0]
for item in res[1:]:
try: try:
res[i] = bytes([int(item[:2], 16)]) + item[2:] string += bytes([int(item[:2], 16)]) + item[2:]
except ValueError: except ValueError:
res[i] = b'%' + item string += b'%' + item
return b''.join(res) return string
def unquote(string, encoding='utf-8', errors='replace'): def unquote(string, encoding='utf-8', errors='replace'):
"""Replace %xx escapes by their single-character equivalent. The optional """Replace %xx escapes by their single-character equivalent. The optional
...@@ -324,36 +330,39 @@ def unquote(string, encoding='utf-8', errors='replace'): ...@@ -324,36 +330,39 @@ def unquote(string, encoding='utf-8', errors='replace'):
unquote('abc%20def') -> 'abc def'. unquote('abc%20def') -> 'abc def'.
""" """
if encoding is None: encoding = 'utf-8' if string == '':
if errors is None: errors = 'replace' return string
# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
# (list of single-byte bytes objects)
pct_sequence = []
res = string.split('%') res = string.split('%')
for i in range(1, len(res)): if len(res) == 1:
item = res[i] return string
if encoding is None:
encoding = 'utf-8'
if errors is None:
errors = 'replace'
# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
pct_sequence = b''
string = res[0]
for item in res[1:]:
try: try:
if not item: raise ValueError if not item:
pct_sequence.append(bytes.fromhex(item[:2])) raise ValueError
pct_sequence += bytes.fromhex(item[:2])
rest = item[2:] rest = item[2:]
except ValueError:
rest = '%' + item
if not rest: if not rest:
# This segment was just a single percent-encoded character. # This segment was just a single percent-encoded character.
# May be part of a sequence of code units, so delay decoding. # May be part of a sequence of code units, so delay decoding.
# (Stored in pct_sequence). # (Stored in pct_sequence).
res[i] = '' continue
else: except ValueError:
rest = '%' + item
# Encountered non-percent-encoded characters. Flush the current # Encountered non-percent-encoded characters. Flush the current
# pct_sequence. # pct_sequence.
res[i] = b''.join(pct_sequence).decode(encoding, errors) + rest string += pct_sequence.decode(encoding, errors) + rest
pct_sequence = [] pct_sequence = b''
if pct_sequence: if pct_sequence:
# Flush the final pct_sequence # Flush the final pct_sequence
# res[-1] will always be empty if pct_sequence != [] string += pct_sequence.decode(encoding, errors)
assert not res[-1], "string=%r, res=%r" % (string, res) return string
res[-1] = b''.join(pct_sequence).decode(encoding, errors)
return ''.join(res)
def parse_qs(qs, keep_blank_values=False, strict_parsing=False): def parse_qs(qs, keep_blank_values=False, strict_parsing=False):
"""Parse a query given as a string argument. """Parse a query given as a string argument.
...@@ -434,7 +443,8 @@ _ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' ...@@ -434,7 +443,8 @@ _ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
b'abcdefghijklmnopqrstuvwxyz' b'abcdefghijklmnopqrstuvwxyz'
b'0123456789' b'0123456789'
b'_.-') b'_.-')
_safe_quoters= {} _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
_safe_quoters = {}
class Quoter(collections.defaultdict): class Quoter(collections.defaultdict):
"""A mapping from bytes (in range(0,256)) to strings. """A mapping from bytes (in range(0,256)) to strings.
...@@ -446,7 +456,7 @@ class Quoter(collections.defaultdict): ...@@ -446,7 +456,7 @@ class Quoter(collections.defaultdict):
# of cached keys don't call Python code at all). # of cached keys don't call Python code at all).
def __init__(self, safe): def __init__(self, safe):
"""safe: bytes object.""" """safe: bytes object."""
self.safe = _ALWAYS_SAFE.union(c for c in safe if c < 128) self.safe = _ALWAYS_SAFE.union(safe)
def __repr__(self): def __repr__(self):
# Without this, will just display as a defaultdict # Without this, will just display as a defaultdict
...@@ -454,7 +464,7 @@ class Quoter(collections.defaultdict): ...@@ -454,7 +464,7 @@ class Quoter(collections.defaultdict):
def __missing__(self, b): def __missing__(self, b):
# Handle a cache miss. Store quoted string in cache and return. # Handle a cache miss. Store quoted string in cache and return.
res = b in self.safe and chr(b) or ('%%%02X' % b) res = chr(b) if b in self.safe else '%{:02X}'.format(b)
self[b] = res self[b] = res
return res return res
...@@ -488,6 +498,8 @@ def quote(string, safe='/', encoding=None, errors=None): ...@@ -488,6 +498,8 @@ def quote(string, safe='/', encoding=None, errors=None):
errors='strict' (unsupported characters raise a UnicodeEncodeError). errors='strict' (unsupported characters raise a UnicodeEncodeError).
""" """
if isinstance(string, str): if isinstance(string, str):
if not string:
return string
if encoding is None: if encoding is None:
encoding = 'utf-8' encoding = 'utf-8'
if errors is None: if errors is None:
...@@ -522,18 +534,22 @@ def quote_from_bytes(bs, safe='/'): ...@@ -522,18 +534,22 @@ def quote_from_bytes(bs, safe='/'):
not perform string-to-bytes encoding. It always returns an ASCII string. not perform string-to-bytes encoding. It always returns an ASCII string.
quote_from_bytes(b'abc def\xab') -> 'abc%20def%AB' quote_from_bytes(b'abc def\xab') -> 'abc%20def%AB'
""" """
if not isinstance(bs, (bytes, bytearray)):
raise TypeError("quote_from_bytes() expected bytes")
if not bs:
return ''
if isinstance(safe, str): if isinstance(safe, str):
# Normalize 'safe' by converting to bytes and removing non-ASCII chars # Normalize 'safe' by converting to bytes and removing non-ASCII chars
safe = safe.encode('ascii', 'ignore') safe = safe.encode('ascii', 'ignore')
cachekey = bytes(safe) # In case it was a bytearray else:
if not (isinstance(bs, bytes) or isinstance(bs, bytearray)): safe = bytes([c for c in safe if c < 128])
raise TypeError("quote_from_bytes() expected a bytes") if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
return bs.decode()
try: try:
quoter = _safe_quoters[cachekey] quoter = _safe_quoters[safe]
except KeyError: except KeyError:
quoter = Quoter(safe) _safe_quoters[safe] = quoter = Quoter(safe).__getitem__
_safe_quoters[cachekey] = quoter return ''.join([quoter(char) for char in bs])
return ''.join([quoter[char] for char in bs])
def urlencode(query, doseq=False, safe='', encoding=None, errors=None): def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
"""Encode a sequence of two-element tuples or dictionary into a URL query string. """Encode a sequence of two-element tuples or dictionary into a URL query string.
......
...@@ -1988,7 +1988,7 @@ class FancyURLopener(URLopener): ...@@ -1988,7 +1988,7 @@ class FancyURLopener(URLopener):
else: else:
return self.open(newurl, data) return self.open(newurl, data)
def get_user_passwd(self, host, realm, clear_cache = 0): def get_user_passwd(self, host, realm, clear_cache=0):
key = realm + '@' + host.lower() key = realm + '@' + host.lower()
if key in self.auth_cache: if key in self.auth_cache:
if clear_cache: if clear_cache:
......
...@@ -291,6 +291,9 @@ Library ...@@ -291,6 +291,9 @@ Library
compilation in a non-ASCII directory if stdout encoding is ASCII (eg. if compilation in a non-ASCII directory if stdout encoding is ASCII (eg. if
stdout is not a TTY). stdout is not a TTY).
- Issue #1285086: Speed up urllib.parse functions: quote, quote_from_bytes,
unquote, unquote_to_bytes.
- Issue #8688: Distutils now recalculates MANIFEST everytime. - Issue #8688: Distutils now recalculates MANIFEST everytime.
- Issue #5099: subprocess.Popen.__del__ no longer references global objects - Issue #5099: subprocess.Popen.__del__ no longer references global objects
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment