Commit 671edc4b authored by Jeremy Hylton's avatar Jeremy Hylton

Make a new urllib package .

It consists of code from urllib, urllib2, urlparse, and robotparser.
The old modules have all been removed.  The new package has five
submodules: urllib.parse, urllib.request, urllib.response,
urllib.error, and urllib.robotparser.  The urllib.request.urlopen()
function uses the url opener from urllib2.

Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.

Joint work with Senthil Kumaran.
parent bde7b549
......@@ -35,7 +35,7 @@ from operator import attrgetter
from io import StringIO
import sys
import os
import urllib
import urllib.parse
import email.parser
__all__ = ["MiniFieldStorage", "FieldStorage",
......@@ -216,8 +216,8 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
else:
continue
if len(nv[1]) or keep_blank_values:
name = urllib.unquote(nv[0].replace('+', ' '))
value = urllib.unquote(nv[1].replace('+', ' '))
name = urllib.parse.unquote(nv[0].replace('+', ' '))
value = urllib.parse.unquote(nv[1].replace('+', ' '))
r.append((name, value))
return r
......
......@@ -7,8 +7,9 @@ Implements the Distutils 'register' command (register with the repository).
__revision__ = "$Id$"
import os, string, urllib2, getpass, urlparse
import os, string, getpass
import io
import urllib.parse, urllib.request
from distutils.core import PyPIRCCommand
from distutils.errors import *
......@@ -94,7 +95,8 @@ class register(PyPIRCCommand):
def classifiers(self):
''' Fetch the list of classifiers from the server.
'''
response = urllib2.urlopen(self.repository+'?:action=list_classifiers')
url = self.repository+'?:action=list_classifiers'
response = urllib.request.urlopen(url)
print(response.read())
def verify_metadata(self):
......@@ -166,8 +168,8 @@ Your selection [default 1]: ''', end=' ')
password = getpass.getpass('Password: ')
# set up the authentication
auth = urllib2.HTTPPasswordMgr()
host = urlparse.urlparse(self.repository)[1]
auth = urllib.request.HTTPPasswordMgr()
host = urllib.parse.urlparse(self.repository)[1]
auth.add_password(self.realm, host, username, password)
# send the info to the server and report the result
code, result = self.post_to_server(self.build_post_data('submit'),
......@@ -276,20 +278,20 @@ Your selection [default 1]: ''', end=' ')
'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8'%boundary,
'Content-length': str(len(body))
}
req = urllib2.Request(self.repository, body, headers)
req = urllib.request.Request(self.repository, body, headers)
# handle HTTP and include the Basic Auth handler
opener = urllib2.build_opener(
urllib2.HTTPBasicAuthHandler(password_mgr=auth)
opener = urllib.request.build_opener(
urllib.request.HTTPBasicAuthHandler(password_mgr=auth)
)
data = ''
try:
result = opener.open(req)
except urllib2.HTTPError as e:
except urllib.error.HTTPError as e:
if self.show_response:
data = e.fp.read()
result = e.code, e.msg
except urllib2.URLError as e:
except urllib.error.URLError as e:
result = 500, str(e)
else:
if self.show_response:
......
......@@ -13,7 +13,7 @@ import platform
import configparser
import http.client
import base64
import urlparse
import urllib.parse
class upload(PyPIRCCommand):
......@@ -145,10 +145,11 @@ class upload(PyPIRCCommand):
self.announce("Submitting %s to %s" % (filename, self.repository), log.INFO)
# build the Request
# We can't use urllib2 since we need to send the Basic
# We can't use urllib since we need to send the Basic
# auth right with the first request
# TODO(jhylton): Can we fix urllib?
schema, netloc, url, params, query, fragments = \
urlparse.urlparse(self.repository)
urllib.parse.urlparse(self.repository)
assert not params and not query and not fragments
if schema == 'http':
http = http.client.HTTPConnection(netloc)
......
......@@ -25,6 +25,7 @@ import time
import base64
import random
import socket
import urllib.parse
import warnings
from io import StringIO
......@@ -218,8 +219,7 @@ def encode_rfc2231(s, charset=None, language=None):
charset is given but not language, the string is encoded using the empty
string for language.
"""
import urllib
s = urllib.quote(s, safe='')
s = urllib.parse.quote(s, safe='')
if charset is None and language is None:
return s
if language is None:
......@@ -234,7 +234,6 @@ def decode_params(params):
params is a sequence of 2-tuples containing (param name, string value).
"""
import urllib
# Copy params so we don't mess with the original
params = params[:]
new_params = []
......@@ -272,7 +271,7 @@ def decode_params(params):
# language specifiers at the beginning of the string.
for num, s, encoded in continuations:
if encoded:
s = urllib.unquote(s)
s = urllib.parse.unquote(s)
extended = True
value.append(s)
value = quote(EMPTYSTRING.join(value))
......
......@@ -70,7 +70,7 @@ import io
import socket
import email.parser
import email.message
from urlparse import urlsplit
from urllib.parse import urlsplit
import warnings
__all__ = ["HTTPResponse", "HTTPConnection",
......
......@@ -28,7 +28,10 @@ http://wwwsearch.sf.net/):
__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
import re, urlparse, copy, time, urllib
import copy
import re
import time
import urllib.parse, urllib.request
try:
import threading as _threading
except ImportError:
......@@ -580,7 +583,7 @@ def request_host(request):
"""
url = request.get_full_url()
host = urlparse.urlparse(url)[1]
host = urllib.parse.urlparse(url)[1]
if host == "":
host = request.get_header("Host", "")
......@@ -602,13 +605,11 @@ def eff_request_host(request):
def request_path(request):
"""request-URI, as defined by RFC 2965."""
url = request.get_full_url()
#scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
#req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
path, parameters, query, frag = urlparse.urlparse(url)[2:]
path, parameters, query, frag = urllib.parse.urlparse(url)[2:]
if parameters:
path = "%s;%s" % (path, parameters)
path = escape_path(path)
req_path = urlparse.urlunparse(("", "", path, "", query, frag))
req_path = urllib.parse.urlunparse(("", "", path, "", query, frag))
if not req_path.startswith("/"):
# fix bad RFC 2396 absoluteURI
req_path = "/"+req_path
......@@ -644,7 +645,7 @@ def escape_path(path):
# And here, kind of: draft-fielding-uri-rfc2396bis-03
# (And in draft IRI specification: draft-duerst-iri-05)
# (And here, for new URI schemes: RFC 2718)
path = urllib.quote(path, HTTP_PATH_SAFE)
path = urllib.parse.quote(path, HTTP_PATH_SAFE)
path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
return path
......@@ -1197,8 +1198,7 @@ class CookieJar:
"""Collection of HTTP cookies.
You may not need to know about this class: try
urllib2.build_opener(HTTPCookieProcessor).open(url).
urllib.request.build_opener(HTTPCookieProcessor).open(url).
"""
non_word_re = re.compile(r"\W")
......
......@@ -93,7 +93,7 @@ import cgi
import time
import socket # For gethostbyaddr()
import shutil
import urllib
import urllib.parse
import select
import mimetypes
import posixpath
......@@ -683,7 +683,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
return None
list.sort(key=lambda a: a.lower())
r = []
displaypath = cgi.escape(urllib.unquote(self.path))
displaypath = cgi.escape(urllib.parse.unquote(self.path))
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
......@@ -699,7 +699,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a>\n'
% (urllib.quote(linkname), cgi.escape(displayname)))
% (urllib.parse.quote(linkname), cgi.escape(displayname)))
r.append("</ul>\n<hr>\n</body>\n</html>\n")
enc = sys.getfilesystemencoding()
encoded = ''.join(r).encode(enc)
......@@ -723,7 +723,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
# abandon query parameters
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = posixpath.normpath(urllib.unquote(path))
path = posixpath.normpath(urllib.parse.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
......@@ -947,7 +947,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
env['SERVER_PROTOCOL'] = self.protocol_version
env['SERVER_PORT'] = str(self.server.server_port)
env['REQUEST_METHOD'] = self.command
uqrest = urllib.unquote(rest)
uqrest = urllib.parse.unquote(rest)
env['PATH_INFO'] = uqrest
env['PATH_TRANSLATED'] = self.translate_path(uqrest)
env['SCRIPT_NAME'] = scriptname
......
......@@ -2,7 +2,7 @@
Do not import directly; use urllib instead."""
import urllib
import urllib.parse
import os
__all__ = ["url2pathname","pathname2url"]
......@@ -13,7 +13,7 @@ def url2pathname(pathname):
#
# XXXX The .. handling should be fixed...
#
tp = urllib.splittype(pathname)[0]
tp = urllib.parsesplittype(pathname)[0]
if tp and tp != 'file':
raise RuntimeError('Cannot convert non-local URL to pathname')
# Turn starting /// into /, an empty hostname means current host
......@@ -47,7 +47,7 @@ def url2pathname(pathname):
i = i + 1
rv = ':' + ':'.join(components)
# and finally unquote slashes and other funny characters
return urllib.unquote(rv)
return urllib.parseunquote(rv)
def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL
......@@ -73,8 +73,8 @@ def pathname2url(pathname):
return '/'.join(components)
def _pncomp2url(component):
component = urllib.quote(component[:31], safe='') # We want to quote slashes
return component
# We want to quote slashes
return urllib.parsequote(component[:31], safe='')
def test():
for url in ["index.html",
......
......@@ -24,7 +24,7 @@ read_mime_types(file) -- parse one file, return a dictionary or None
import os
import posixpath
import urllib
import urllib.parse
__all__ = [
"guess_type","guess_extension","guess_all_extensions",
......@@ -104,7 +104,7 @@ class MimeTypes:
Optional `strict' argument when False adds a bunch of commonly found,
but non-standard types.
"""
scheme, url = urllib.splittype(url)
scheme, url = urllib.parse.splittype(url)
if scheme == 'data':
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
......
......@@ -725,7 +725,7 @@ def dash_R(the_module, test, indirect_test, huntrleaks):
def dash_R_cleanup(fs, ps, pic, abcs):
import gc, copyreg
import _strptime, linecache
import urlparse, urllib, urllib2, mimetypes, doctest
import urllib.parse, urllib.request, mimetypes, doctest
import struct, filecmp, _abcoll
from distutils.dir_util import _path_created
from weakref import WeakSet
......@@ -758,9 +758,8 @@ def dash_R_cleanup(fs, ps, pic, abcs):
_path_created.clear()
re.purge()
_strptime._regex_cache.clear()
urlparse.clear_cache()
urllib.urlcleanup()
urllib2.install_opener(None)
urllib.parse.clear_cache()
urllib.request.urlcleanup()
linecache.clearcache()
mimetypes._default_mime_types()
filecmp._cache.clear()
......
......@@ -352,10 +352,10 @@ def check_syntax_error(testcase, statement):
testcase.fail('Missing SyntaxError: "%s"' % statement)
def open_urlresource(url, *args, **kw):
import urllib, urlparse
import urllib.request, urllib.parse
requires('urlfetch')
filename = urlparse.urlparse(url)[2].split('/')[-1] # '/': it's URL!
filename = urllib.parse.urlparse(url)[2].split('/')[-1] # '/': it's URL!
for path in [os.path.curdir, os.path.pardir]:
fn = os.path.join(path, filename)
......@@ -363,7 +363,7 @@ def open_urlresource(url, *args, **kw):
return open(fn, *args, **kw)
print('\tfetching %s ...' % url, file=get_original_stdout())
fn, _ = urllib.urlretrieve(url, filename)
fn, _ = urllib.request.urlretrieve(url, filename)
return open(fn, *args, **kw)
......
......@@ -111,7 +111,7 @@ class AllTest(unittest.TestCase):
self.check_all("re")
self.check_all("reprlib")
self.check_all("rlcompleter")
self.check_all("robotparser")
self.check_all("urllib.robotparser")
self.check_all("sched")
self.check_all("shelve")
self.check_all("shlex")
......@@ -134,8 +134,6 @@ class AllTest(unittest.TestCase):
self.check_all("traceback")
self.check_all("tty")
self.check_all("unittest")
self.check_all("urllib")
self.check_all("urlparse")
self.check_all("uu")
self.check_all("warnings")
self.check_all("wave")
......
This diff is collapsed.
......@@ -11,7 +11,7 @@ import os
import sys
import base64
import shutil
import urllib
import urllib.parse
import http.client
import tempfile
import threading
......@@ -322,7 +322,8 @@ class CGIHTTPServerTestCase(BaseTestCase):
(res.read(), res.getheader('Content-type'), res.status))
def test_post(self):
params = urllib.urlencode({'spam' : 1, 'eggs' : 'python', 'bacon' : 123456})
params = urllib.parse.urlencode(
{'spam' : 1, 'eggs' : 'python', 'bacon' : 123456})
headers = {'Content-type' : 'application/x-www-form-urlencoded'}
res = self.request('/cgi-bin/file2.py', 'POST', params, headers)
......
......@@ -247,22 +247,22 @@ class ImportHooksTestCase(ImportHooksBaseTestCase):
i = ImpWrapper()
sys.meta_path.append(i)
sys.path_hooks.append(ImpWrapper)
mnames = ("colorsys", "urlparse", "distutils.core")
mnames = ("colorsys", "urllib.parse", "distutils.core")
for mname in mnames:
parent = mname.split(".")[0]
for n in list(sys.modules.keys()):
for n in list(sys.modules):
if n.startswith(parent):
del sys.modules[n]
for mname in mnames:
m = __import__(mname, globals(), locals(), ["__dummy__"])
m.__loader__ # to make sure we actually handled the import
# Delete urllib from modules because urlparse was imported above.
# Without this hack, test_socket_ssl fails if run in this order:
# regrtest.py test_codecmaps_tw test_importhooks test_socket_ssl
try:
del sys.modules['urllib']
except KeyError:
pass
## # Delete urllib from modules because urlparse was imported above.
## # Without this hack, test_socket_ssl fails if run in this order:
## # regrtest.py test_codecmaps_tw test_importhooks test_socket_ssl
## try:
## del sys.modules['urllib']
## except KeyError:
## pass
def test_main():
support.run_unittest(ImportHooksTestCase)
......
......@@ -156,16 +156,6 @@ class PyclbrTest(TestCase):
# These were once about the 10 longest modules
cm('random', ignore=('Random',)) # from _random import Random as CoreGenerator
cm('cgi', ignore=('log',)) # set with = in module
cm('urllib', ignore=('_CFNumberToInt32',
'_CStringFromCFString',
'_CFSetup',
'getproxies_registry',
'proxy_bypass_registry',
'proxy_bypass_macosx_sysconf',
'open_https',
'_https_connection',
'getproxies_macosx_sysconf',
'getproxies_internetconfig',)) # not on all platforms
cm('pickle')
cm('aifc', ignore=('openfp',)) # set with = in module
cm('sre_parse', ignore=('dump',)) # from sre_constants import *
......
import unittest, robotparser
import io
import unittest
import urllib.robotparser
from test import support
class RobotTestCase(unittest.TestCase):
......@@ -34,7 +35,7 @@ def RobotTest(index, robots_txt, good_urls, bad_urls,
agent="test_robotparser"):
lines = io.StringIO(robots_txt).readlines()
parser = robotparser.RobotFileParser()
parser = urllib.robotparser.RobotFileParser()
parser.parse(lines)
for url in good_urls:
tests.addTest(RobotTestCase(index, parser, url, 1, agent))
......@@ -140,7 +141,7 @@ class TestCase(unittest.TestCase):
support.requires('network')
# whole site is password-protected.
url = 'http://mueblesmoraleda.com'
parser = robotparser.RobotFileParser()
parser = urllib.robotparser.RobotFileParser()
parser.set_url(url)
parser.read()
self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False)
......
......@@ -10,7 +10,7 @@ import subprocess
import time
import os
import pprint
import urllib, urlparse
import urllib.parse, urllib.request
import shutil
import traceback
import asyncore
......@@ -440,8 +440,8 @@ else:
"""
# abandon query parameters
path = urlparse.urlparse(path)[2]
path = os.path.normpath(urllib.unquote(path))
path = urllib.parse.urlparse(path)[2]
path = os.path.normpath(urllib.parse.unquote(path))
words = path.split('/')
words = filter(None, words)
path = self.root
......@@ -943,7 +943,7 @@ else:
# now fetch the same data from the HTTPS server
url = 'https://%s:%d/%s' % (
HOST, server.port, os.path.split(CERTFILE)[1])
f = urllib.urlopen(url)
f = urllib.request.urlopen(url)
dlen = f.info().get("content-length")
if dlen and (int(dlen) > 0):
d2 = f.read(int(dlen))
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -4,10 +4,11 @@ import unittest
from test import support
from test.test_urllib2 import sanepathname2url
import os
import socket
import urllib2
import sys
import os
import urllib.error
import urllib.request
def _retry_thrice(func, exc, *args, **kwargs):
......@@ -28,7 +29,8 @@ def _wrap_with_retry_thrice(func, exc):
# Connecting to remote hosts is flaky. Make it more robust by retrying
# the connection several times.
_urlopen_with_retry = _wrap_with_retry_thrice(urllib2.urlopen, urllib2.URLError)
_urlopen_with_retry = _wrap_with_retry_thrice(urllib.request.urlopen,
urllib.error.URLError)
class AuthTests(unittest.TestCase):
......@@ -78,16 +80,11 @@ class CloseSocketTest(unittest.TestCase):
# calling .close() on urllib2's response objects should close the
# underlying socket
# delve deep into response to fetch socket._socketobject
response = _urlopen_with_retry("http://www.python.org/")
abused_fileobject = response.fp
httpresponse = abused_fileobject.raw
self.assert_(httpresponse.__class__ is http.client.HTTPResponse)
fileobject = httpresponse.fp
self.assert_(not fileobject.closed)
sock = response.fp
self.assert_(not sock.closed)
response.close()
self.assert_(fileobject.closed)
self.assert_(sock.closed)
class OtherNetworkTests(unittest.TestCase):
def setUp(self):
......@@ -116,8 +113,9 @@ class OtherNetworkTests(unittest.TestCase):
f.write('hi there\n')
f.close()
urls = [
'file:'+sanepathname2url(os.path.abspath(TESTFN)),
('file:///nonsensename/etc/passwd', None, urllib2.URLError),
'file:' + sanepathname2url(os.path.abspath(TESTFN)),
('file:///nonsensename/etc/passwd', None,
urllib.error.URLError),
]
self._test_urls(urls, self._extra_handlers(), retry=True)
finally:
......@@ -157,9 +155,9 @@ class OtherNetworkTests(unittest.TestCase):
import logging
debug = logging.getLogger("test_urllib2").debug
urlopen = urllib2.build_opener(*handlers).open
urlopen = urllib.request.build_opener(*handlers).open
if retry:
urlopen = _wrap_with_retry_thrice(urlopen, urllib2.URLError)
urlopen = _wrap_with_retry_thrice(urlopen, urllib.error.URLError)
for url in urls:
if isinstance(url, tuple):
......@@ -186,7 +184,7 @@ class OtherNetworkTests(unittest.TestCase):
def _extra_handlers(self):
handlers = []
cfh = urllib2.CacheFTPHandler()
cfh = urllib.request.CacheFTPHandler()
cfh.setTimeout(1)
handlers.append(cfh)
......@@ -197,7 +195,7 @@ class TimeoutTest(unittest.TestCase):
def test_http_basic(self):
self.assertTrue(socket.getdefaulttimeout() is None)
u = _urlopen_with_retry("http://www.python.org")
self.assertTrue(u.fp.raw.fp._sock.gettimeout() is None)
self.assertTrue(u.fp._sock.gettimeout() is None)
def test_http_default_timeout(self):
self.assertTrue(socket.getdefaulttimeout() is None)
......@@ -206,7 +204,7 @@ class TimeoutTest(unittest.TestCase):
u = _urlopen_with_retry("http://www.python.org")
finally:
socket.setdefaulttimeout(None)
self.assertEqual(u.fp.raw.fp._sock.gettimeout(), 60)
self.assertEqual(u.fp._sock.gettimeout(), 60)
def test_http_no_timeout(self):
self.assertTrue(socket.getdefaulttimeout() is None)
......@@ -215,11 +213,11 @@ class TimeoutTest(unittest.TestCase):
u = _urlopen_with_retry("http://www.python.org", timeout=None)
finally:
socket.setdefaulttimeout(None)
self.assertTrue(u.fp.raw.fp._sock.gettimeout() is None)
self.assertTrue(u.fp._sock.gettimeout() is None)
def test_http_timeout(self):
u = _urlopen_with_retry("http://www.python.org", timeout=120)
self.assertEqual(u.fp.raw.fp._sock.gettimeout(), 120)
self.assertEqual(u.fp._sock.gettimeout(), 120)
FTP_HOST = "ftp://ftp.mirror.nl/pub/mirror/gnu/"
......
......@@ -4,7 +4,7 @@ import unittest
from test import support
import socket
import urllib
import urllib.request
import sys
import os
import email.message
......@@ -36,11 +36,11 @@ class URLTimeoutTest(unittest.TestCase):
socket.setdefaulttimeout(None)
def testURLread(self):
f = _open_with_retry(urllib.urlopen, "http://www.python.org/")
f = _open_with_retry(urllib.request.urlopen, "http://www.python.org/")
x = f.read()
class urlopenNetworkTests(unittest.TestCase):
"""Tests urllib.urlopen using the network.
"""Tests urllib.reqest.urlopen using the network.
These tests are not exhaustive. Assuming that testing using files does a
good job overall of some of the basic interface features. There are no
......@@ -55,7 +55,7 @@ class urlopenNetworkTests(unittest.TestCase):
"""
def urlopen(self, *args):
return _open_with_retry(urllib.urlopen, *args)
return _open_with_retry(urllib.request.urlopen, *args)
def test_basic(self):
# Simple test expected to pass.
......@@ -105,7 +105,7 @@ class urlopenNetworkTests(unittest.TestCase):
def test_getcode(self):
# test getcode() with the fancy opener to get 404 error codes
URL = "http://www.python.org/XXXinvalidXXX"
open_url = urllib.FancyURLopener().open(URL)
open_url = urllib.request.FancyURLopener().open(URL)
try:
code = open_url.getcode()
finally:
......@@ -114,7 +114,7 @@ class urlopenNetworkTests(unittest.TestCase):
def test_fileno(self):
if (sys.platform in ('win32',) or
not hasattr(os, 'fdopen')):
not hasattr(os, 'fdopen')):
# On Windows, socket handles are not file descriptors; this
# test can't pass on Windows.
return
......@@ -142,13 +142,14 @@ class urlopenNetworkTests(unittest.TestCase):
# domain will be spared to serve its defined
# purpose.
# urllib.urlopen, "http://www.sadflkjsasadf.com/")
urllib.urlopen, "http://www.python.invalid./")
urllib.request.urlopen,
"http://www.python.invalid./")
class urlretrieveNetworkTests(unittest.TestCase):
"""Tests urllib.urlretrieve using the network."""
"""Tests urllib.request.urlretrieve using the network."""
def urlretrieve(self, *args):
return _open_with_retry(urllib.urlretrieve, *args)
return _open_with_retry(urllib.request.urlretrieve, *args)
def test_basic(self):
# Test basic functionality.
......
......@@ -2,7 +2,7 @@
from test import support
import unittest
import urlparse
import urllib.parse
RFC1808_BASE = "http://a/b/c/d;p?q#f"
RFC2396_BASE = "http://a/b/c/d;p?q"
......@@ -10,19 +10,19 @@ RFC2396_BASE = "http://a/b/c/d;p?q"
class UrlParseTestCase(unittest.TestCase):
def checkRoundtrips(self, url, parsed, split):
result = urlparse.urlparse(url)
result = urllib.parse.urlparse(url)
self.assertEqual(result, parsed)
t = (result.scheme, result.netloc, result.path,
result.params, result.query, result.fragment)
self.assertEqual(t, parsed)
# put it back together and it should be the same
result2 = urlparse.urlunparse(result)
result2 = urllib.parse.urlunparse(result)
self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl())
# the result of geturl() is a fixpoint; we can always parse it
# again to get the same result:
result3 = urlparse.urlparse(result.geturl())
result3 = urllib.parse.urlparse(result.geturl())
self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme)
......@@ -37,17 +37,17 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(result3.port, result.port)
# check the roundtrip using urlsplit() as well
result = urlparse.urlsplit(url)
result = urllib.parse.urlsplit(url)
self.assertEqual(result, split)
t = (result.scheme, result.netloc, result.path,
result.query, result.fragment)
self.assertEqual(t, split)
result2 = urlparse.urlunsplit(result)
result2 = urllib.parse.urlunsplit(result)
self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl())
# check the fixpoint property of re-parsing the result of geturl()
result3 = urlparse.urlsplit(result.geturl())
result3 = urllib.parse.urlsplit(result.geturl())
self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme)
......@@ -83,7 +83,7 @@ class UrlParseTestCase(unittest.TestCase):
self.checkRoundtrips(url, parsed, split)
def test_http_roundtrips(self):
# urlparse.urlsplit treats 'http:' as an optimized special case,
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
# so we test both 'http:' and 'https:' in all the following.
# Three cheers for white box knowledge!
testcases = [
......@@ -111,13 +111,13 @@ class UrlParseTestCase(unittest.TestCase):
self.checkRoundtrips(url, parsed, split)
def checkJoin(self, base, relurl, expected):
self.assertEqual(urlparse.urljoin(base, relurl), expected,
self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
(base, relurl, expected))
def test_unparse_parse(self):
for u in ['Python', './Python']:
self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
def test_RFC1808(self):
# "normal" cases from RFC 1808:
......@@ -223,11 +223,11 @@ class UrlParseTestCase(unittest.TestCase):
(RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
(RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
]:
self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
def test_urlsplit_attributes(self):
url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
p = urlparse.urlsplit(url)
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
self.assertEqual(p.path, "/doc/")
......@@ -242,7 +242,7 @@ class UrlParseTestCase(unittest.TestCase):
#self.assertEqual(p.geturl(), url)
url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
p = urlparse.urlsplit(url)
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/")
......@@ -259,7 +259,7 @@ class UrlParseTestCase(unittest.TestCase):
# and request email addresses as usernames.
url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
p = urlparse.urlsplit(url)
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/")
......@@ -274,11 +274,11 @@ class UrlParseTestCase(unittest.TestCase):
def test_attributes_bad_port(self):
"""Check handling of non-integer ports."""
p = urlparse.urlsplit("http://www.example.net:foo")
p = urllib.parse.urlsplit("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port)
p = urlparse.urlparse("http://www.example.net:foo")
p = urllib.parse.urlparse("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port)
......@@ -289,7 +289,7 @@ class UrlParseTestCase(unittest.TestCase):
# scheme://netloc syntax, the netloc and related attributes
# should be left empty.
uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
p = urlparse.urlsplit(uri)
p = urllib.parse.urlsplit(uri)
self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
......@@ -297,7 +297,7 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), uri)
p = urlparse.urlparse(uri)
p = urllib.parse.urlparse(uri)
self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
......@@ -307,7 +307,7 @@ class UrlParseTestCase(unittest.TestCase):
def test_noslash(self):
# Issue 1637: http://foo.com?query is legal
self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
('http', 'example.com', '', '', 'blahblah=/foo', ''))
def test_main():
......
......@@ -111,8 +111,10 @@ class XMLRPCTestCase(unittest.TestCase):
(int(2**34),))
xmlrpclib.dumps((xmlrpclib.MAXINT, xmlrpclib.MININT))
self.assertRaises(OverflowError, xmlrpclib.dumps, (xmlrpclib.MAXINT+1,))
self.assertRaises(OverflowError, xmlrpclib.dumps, (xmlrpclib.MININT-1,))
self.assertRaises(OverflowError, xmlrpclib.dumps,
(xmlrpclib.MAXINT+1,))
self.assertRaises(OverflowError, xmlrpclib.dumps,
(xmlrpclib.MININT-1,))
def dummy_write(s):
pass
......@@ -120,9 +122,10 @@ class XMLRPCTestCase(unittest.TestCase):
m = xmlrpclib.Marshaller()
m.dump_int(xmlrpclib.MAXINT, dummy_write)
m.dump_int(xmlrpclib.MININT, dummy_write)
self.assertRaises(OverflowError, m.dump_int, xmlrpclib.MAXINT+1, dummy_write)
self.assertRaises(OverflowError, m.dump_int, xmlrpclib.MININT-1, dummy_write)
self.assertRaises(OverflowError, m.dump_int,
xmlrpclib.MAXINT+1, dummy_write)
self.assertRaises(OverflowError, m.dump_int,
xmlrpclib.MININT-1, dummy_write)
def test_dump_none(self):
value = alist + [None]
......@@ -132,7 +135,6 @@ class XMLRPCTestCase(unittest.TestCase):
xmlrpclib.loads(strg)[0][0])
self.assertRaises(TypeError, xmlrpclib.dumps, (arg1,))
class HelperTestCase(unittest.TestCase):
def test_escape(self):
self.assertEqual(xmlrpclib.escape("a&b"), "a&amp;b")
......@@ -160,7 +162,6 @@ class FaultTestCase(unittest.TestCase):
# private methods
self.assertRaises(AttributeError,
xmlrpc.server.resolve_dotted_attribute, str, '__add')
self.assert_(xmlrpc.server.resolve_dotted_attribute(str, 'title'))
class DateTimeTestCase(unittest.TestCase):
......@@ -170,7 +171,8 @@ class DateTimeTestCase(unittest.TestCase):
def test_time(self):
d = 1181399930.036952
t = xmlrpclib.DateTime(d)
self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", time.localtime(d)))
self.assertEqual(str(t),
time.strftime("%Y%m%dT%H:%M:%S", time.localtime(d)))
def test_time_tuple(self):
d = (2007,6,9,10,38,50,5,160,0)
......@@ -180,7 +182,7 @@ class DateTimeTestCase(unittest.TestCase):
def test_time_struct(self):
d = time.localtime(1181399930.036952)
t = xmlrpclib.DateTime(d)
self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", d))
self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", d))
def test_datetime_datetime(self):
d = datetime.datetime(2007,1,2,3,4,5)
......@@ -350,12 +352,12 @@ class SimpleServerTestCase(unittest.TestCase):
self.assertEqual(response.reason, 'Not Found')
def test_introspection1(self):
expected_methods = set(['pow', 'div', 'my_function', 'add',
'system.listMethods', 'system.methodHelp',
'system.methodSignature', 'system.multicall'])
try:
p = xmlrpclib.ServerProxy('http://localhost:%d' % PORT)
meth = p.system.listMethods()
expected_methods = set(['pow', 'div', 'my_function', 'add',
'system.listMethods', 'system.methodHelp',
'system.methodSignature', 'system.multicall'])
self.assertEqual(set(meth), expected_methods)
except (xmlrpclib.ProtocolError, socket.error) as e:
# ignore failures due to non-blocking socket 'unavailable' errors
......@@ -593,7 +595,8 @@ class CGIHandlerTestCase(unittest.TestCase):
# will respond exception, if so, our goal is achieved ;)
handle = open(support.TESTFN, "r").read()
# start with 44th char so as not to get http header, we just need only xml
# start with 44th char so as not to get http header, we just
# need only xml
self.assertRaises(xmlrpclib.Fault, xmlrpclib.loads, handle[44:])
os.remove("xmldata.txt")
......
This diff is collapsed.
"""Exception classes raised by urllib.
The base exception class is URLError, which inherits from IOError. It
doesn't define any behavior of its own, but is the base class for all
exceptions defined in this package.
HTTPError is an exception class that is also a valid HTTP response
instance. It behaves this way because HTTP protocol errors are valid
responses, with a status code, headers, and a body. In some contexts,
an application may want to handle an exception like a regular
response.
"""
import urllib.response
# do these error classes make sense?
# make sure all of the IOError stuff is overridden. we just want to be
# subtypes.
class URLError(IOError):
# URLError is a sub-type of IOError, but it doesn't share any of
# the implementation. need to override __init__ and __str__.
# It sets self.args for compatibility with other EnvironmentError
# subclasses, but args doesn't have the typical format with errno in
# slot 0 and strerror in slot 1. This may be better than nothing.
def __init__(self, reason, filename=None):
self.args = reason,
self.reason = reason
if filename is not None:
self.filename = filename
def __str__(self):
return '<urlopen error %s>' % self.reason
class HTTPError(URLError, urllib.response.addinfourl):
"""Raised when HTTP error occurs, but also acts like non-error return"""
__super_init = urllib.response.addinfourl.__init__
def __init__(self, url, code, msg, hdrs, fp):
self.code = code
self.msg = msg
self.hdrs = hdrs
self.fp = fp
self.filename = url
# The addinfourl classes depend on fp being a valid file
# object. In some cases, the HTTPError may not have a valid
# file object. If this happens, the simplest workaround is to
# not initialize the base classes.
if fp is not None:
self.__super_init(fp, hdrs, url, code)
def __str__(self):
return 'HTTP Error %s: %s' % (self.code, self.msg)
# exception raised when downloaded size does not match content-length
class ContentTooShortError(URLError):
def __init__(self, message, content):
URLError.__init__(self, message)
self.content = content
This diff is collapsed.
This diff is collapsed.
"""Response classes used by urllib.
The base class, addbase, defines a minimal file-like interface,
including read() and readline(). The typical response object is an
addinfourl instance, which defines an info() method that returns
headers and a geturl() method that returns the url.
"""
class addbase(object):
"""Base class for addinfo and addclosehook."""
# XXX Add a method to expose the timeout on the underlying socket?
def __init__(self, fp):
# TODO(jhylton): Is there a better way to delegate using io?
self.fp = fp
self.read = self.fp.read
self.readline = self.fp.readline
# TODO(jhylton): Make sure an object with readlines() is also iterable
if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
if hasattr(self.fp, "fileno"):
self.fileno = self.fp.fileno
else:
self.fileno = lambda: None
if hasattr(self.fp, "__iter__"):
self.__iter__ = self.fp.__iter__
if hasattr(self.fp, "__next__"):
self.__next__ = self.fp.__next__
def __repr__(self):
return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
id(self), self.fp)
def close(self):
self.read = None
self.readline = None
self.readlines = None
self.fileno = None
if self.fp: self.fp.close()
self.fp = None
class addclosehook(addbase):
"""Class to add a close hook to an open file."""
def __init__(self, fp, closehook, *hookargs):
addbase.__init__(self, fp)
self.closehook = closehook
self.hookargs = hookargs
def close(self):
addbase.close(self)
if self.closehook:
self.closehook(*self.hookargs)
self.closehook = None
self.hookargs = None
class addinfo(addbase):
"""class to add an info() method to an open file."""
def __init__(self, fp, headers):
addbase.__init__(self, fp)
self.headers = headers
def info(self):
return self.headers
class addinfourl(addbase):
"""class to add info() and geturl() methods to an open file."""
def __init__(self, fp, headers, url, code=None):
addbase.__init__(self, fp)
self.headers = headers
self.url = url
self.code = code
def info(self):
return self.headers
def getcode(self):
return self.code
def geturl(self):
return self.url
......@@ -9,8 +9,8 @@
The robots.txt Exclusion Protocol is implemented as specified in
http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
"""
import urlparse
import urllib
import urllib.parse, urllib.request
__all__ = ["RobotFileParser"]
......@@ -48,24 +48,19 @@ class RobotFileParser:
def set_url(self, url):
"""Sets the URL referring to a robots.txt file."""
self.url = url
self.host, self.path = urlparse.urlparse(url)[1:3]
self.host, self.path = urllib.parse.urlparse(url)[1:3]
def read(self):
"""Reads the robots.txt URL and feeds it to the parser."""
opener = URLopener()
f = opener.open(self.url)
lines = []
line = f.readline()
while line:
lines.append(line.strip())
line = f.readline()
self.errcode = opener.errcode
if self.errcode in (401, 403):
self.disallow_all = True
elif self.errcode >= 400:
self.allow_all = True
elif self.errcode == 200 and lines:
self.parse(lines)
try:
f = urllib.request.urlopen(self.url)
except urllib.error.HTTPError as err:
if err.code in (401, 403):
self.disallow_all = True
elif err.code >= 400:
self.allow_all = True
else:
self.parse(f.read().splitlines())
def _add_entry(self, entry):
if "*" in entry.useragents:
......@@ -75,15 +70,15 @@ class RobotFileParser:
self.entries.append(entry)
def parse(self, lines):
"""parse the input lines from a robots.txt file.
We allow that a user-agent: line is not preceded by
one or more blank lines."""
"""Parse the input lines from a robots.txt file.
We allow that a user-agent: line is not preceded by
one or more blank lines.
"""
state = 0
linenumber = 0
entry = Entry()
for line in lines:
linenumber = linenumber + 1
if not line:
if state == 1:
entry = Entry()
......@@ -102,7 +97,7 @@ class RobotFileParser:
line = line.split(':', 1)
if len(line) == 2:
line[0] = line[0].strip().lower()
line[1] = urllib.unquote(line[1].strip())
line[1] = urllib.parse.unquote(line[1].strip())
if line[0] == "user-agent":
if state == 2:
self._add_entry(entry)
......@@ -128,7 +123,7 @@ class RobotFileParser:
return True
# search for given user agent matches
# the first match counts
url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/"
url = urllib.parse.quote(urllib.parse.urlparse(urllib.parse.unquote(url))[2]) or "/"
for entry in self.entries:
if entry.applies_to(useragent):
return entry.allowance(url)
......@@ -138,7 +133,6 @@ class RobotFileParser:
# agent not found ==> access granted
return True
def __str__(self):
return ''.join([str(entry) + "\n" for entry in self.entries])
......@@ -150,7 +144,7 @@ class RuleLine:
if path == '' and not allowance:
# an empty value means allow all
allowance = True
self.path = urllib.quote(path)
self.path = urllib.parse.quote(path)
self.allowance = allowance
def applies_to(self, filename):
......@@ -195,18 +189,3 @@ class Entry:
if line.applies_to(filename):
return line.allowance
return True
class URLopener(urllib.FancyURLopener):
def __init__(self, *args):
urllib.FancyURLopener.__init__(self, *args)
self.errcode = 200
def prompt_user_passwd(self, host, realm):
## If robots.txt file is accessible only with a password,
## we act as if the file wasn't there.
return None, None
def http_error_default(self, url, fp, errcode, errmsg, headers):
self.errcode = errcode
return urllib.FancyURLopener.http_error_default(self, url, fp, errcode,
errmsg, headers)
......@@ -11,7 +11,8 @@ module. See also the BaseHTTPServer module docs for other API information.
"""
from http.server import BaseHTTPRequestHandler, HTTPServer
import urllib, sys
import sys
import urllib.parse
from wsgiref.handlers import SimpleHandler
__version__ = "0.1"
......@@ -93,7 +94,7 @@ class WSGIRequestHandler(BaseHTTPRequestHandler):
else:
path,query = self.path,''
env['PATH_INFO'] = urllib.unquote(path)
env['PATH_INFO'] = urllib.parse.unquote(path)
env['QUERY_STRING'] = query
host = self.address_string()
......
......@@ -50,7 +50,7 @@ def guess_scheme(environ):
def application_uri(environ):
"""Return the application's base URI (no PATH_INFO or QUERY_STRING)"""
url = environ['wsgi.url_scheme']+'://'
from urllib import quote
from urllib.parse import quote
if environ.get('HTTP_HOST'):
url += environ['HTTP_HOST']
......@@ -70,7 +70,7 @@ def application_uri(environ):
def request_uri(environ, include_query=1):
"""Return the full request URI, optionally including the query string"""
url = application_uri(environ)
from urllib import quote
from urllib.parse import quote
path_info = quote(environ.get('PATH_INFO',''))
if not environ.get('SCRIPT_NAME'):
url += path_info[1:]
......
......@@ -190,8 +190,8 @@ class DOMBuilder:
options.errorHandler = self.errorHandler
fp = input.byteStream
if fp is None and options.systemId:
import urllib2
fp = urllib2.urlopen(input.systemId)
import urllib.request
fp = urllib.request.urlopen(input.systemId)
return self._parse_bytestream(fp, options)
def parseWithContext(self, input, cnode, action):
......@@ -223,14 +223,14 @@ class DOMEntityResolver(object):
source.encoding = self._guess_media_encoding(source)
# determine the base URI is we can
import posixpath, urlparse
parts = urlparse.urlparse(systemId)
import posixpath, urllib.parse
parts = urllib.parse.urlparse(systemId)
scheme, netloc, path, params, query, fragment = parts
# XXX should we check the scheme here as well?
if path and not path.endswith("/"):
path = posixpath.dirname(path) + "/"
parts = scheme, netloc, path, params, query, fragment
source.baseURI = urlparse.urlunparse(parts)
source.baseURI = urllib.parse.urlunparse(parts)
return source
......@@ -242,8 +242,8 @@ class DOMEntityResolver(object):
return self._opener
def _create_opener(self):
import urllib2
return urllib2.build_opener()
import urllib.request
return urllib.request.build_opener()
def _guess_media_encoding(self, source):
info = source.byteStream.info()
......
......@@ -3,7 +3,7 @@ A library of useful helper classes to the SAX classes, for the
convenience of application and driver writers.
"""
import os, urlparse, urllib
import os, urllib.parse, urllib.request
from . import handler
from . import xmlreader
......@@ -289,8 +289,8 @@ def prepare_input_source(source, base = ""):
source.setSystemId(sysidfilename)
f = open(sysidfilename, "rb")
else:
source.setSystemId(urlparse.urljoin(base, sysid))
f = urllib.urlopen(source.getSystemId())
source.setSystemId(urllib.parse.urljoin(base, sysid))
f = urllib.request.urlopen(source.getSystemId())
source.setByteStream(f)
......
......@@ -1160,12 +1160,12 @@ class Transport:
if isinstance(host, tuple):
host, x509 = host
import urllib
auth, host = urllib.splituser(host)
import urllib.parse
auth, host = urllib.parse.splituser(host)
if auth:
import base64
auth = base64.encodestring(urllib.unquote(auth))
auth = base64.encodestring(urllib.parse.unquote(auth))
auth = "".join(auth.split()) # get rid of whitespace
extra_headers = [
("Authorization", "Basic " + auth)
......@@ -1321,11 +1321,11 @@ class ServerProxy:
# establish a "logical" server connection
# get the url
import urllib
type, uri = urllib.splittype(uri)
import urllib.parse
type, uri = urllib.parse.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported XML-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
self.__host, self.__handler = urllib.parse.splithost(uri)
if not self.__handler:
self.__handler = "/RPC2"
......
......@@ -809,7 +809,7 @@ LIBSUBDIRS= tkinter site-packages test test/output test/data \
email email/mime email/test email/test/data \
html json json/tests http dbm xmlrpc \
sqlite3 sqlite3/test \
logging bsddb bsddb/test csv wsgiref \
logging bsddb bsddb/test csv wsgiref urllib \
lib2to3 lib2to3/fixes lib2to3/pgen2 lib2to3/tests \
ctypes ctypes/test ctypes/macholib idlelib idlelib/Icons \
distutils distutils/command distutils/tests $(XMLLIBSUBDIRS) \
......
......@@ -81,6 +81,15 @@ Extension Modules
Library
-------
- a new ``urllib`` package was created. It consists of code from
``urllib``, ``urllib2``, ``urlparse``, and ``robotparser``. The old
modules have all been removed. The new package has five submodules:
``urllib.parse``, ``urllib.request``, ``urllib.response``,
``urllib.error``, and ``urllib.robotparser``. The
``urllib.request.urlopen()`` function uses the url opener from
``urllib2``. (Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.)
- rfc822 has been removed in favor of the email package.
- mimetools has been removed in favor of the email package.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment