Commit 39b198d8 authored by Jeremy Hylton's avatar Jeremy Hylton

Change urllib to use HTTPConnection rather than old HTTP class.

The HTTP class is a backwards compatibility layer for the Python 1.5
API.  (The only remaining use in the std library is xmlrpclib.)

The current change makes urllib issue HTTP/1.0 requests with
HTTPConnection, because is accesses HTTPResponse.fp directly instead
of using the read() method.  Using fp directly interacts poorly with
persistent connections.  There are probably better solutions than the
current one, but this is a start.
parent f066c1ba
...@@ -322,6 +322,13 @@ class HTTPResponse: ...@@ -322,6 +322,13 @@ class HTTPResponse:
# accepts iso-8859-1. # accepts iso-8859-1.
def __init__(self, sock, debuglevel=0, strict=0, method=None): def __init__(self, sock, debuglevel=0, strict=0, method=None):
# XXX If the response includes a content-length header, we
# need to make sure that the client doesn't read more than the
# specified number of bytes. If it does, it will block until
# the server times out and closes the connection. (The only
# applies to HTTP/1.1 connections.) Since some clients access
# self.fp directly rather than calling read(), this is a little
# tricky.
self.fp = sock.makefile("rb", 0) self.fp = sock.makefile("rb", 0)
self.debuglevel = debuglevel self.debuglevel = debuglevel
self.strict = strict self.strict = strict
......
...@@ -81,11 +81,13 @@ def urlopen(url, data=None, proxies=None): ...@@ -81,11 +81,13 @@ def urlopen(url, data=None, proxies=None):
return opener.open(url) return opener.open(url)
else: else:
return opener.open(url, data) return opener.open(url, data)
def urlretrieve(url, filename=None, reporthook=None, data=None): def urlretrieve(url, filename=None, reporthook=None, data=None):
global _urlopener global _urlopener
if not _urlopener: if not _urlopener:
_urlopener = FancyURLopener() _urlopener = FancyURLopener()
return _urlopener.retrieve(url, filename, reporthook, data) return _urlopener.retrieve(url, filename, reporthook, data)
def urlcleanup(): def urlcleanup():
if _urlopener: if _urlopener:
_urlopener.cleanup() _urlopener.cleanup()
...@@ -310,37 +312,44 @@ class URLopener: ...@@ -310,37 +312,44 @@ class URLopener:
auth = base64.b64encode(user_passwd).strip() auth = base64.b64encode(user_passwd).strip()
else: else:
auth = None auth = None
h = httplib.HTTP(host) http_conn = httplib.HTTPConnection(host)
# XXX We should fix urllib so that it works with HTTP/1.1.
http_conn._http_vsn = 10
http_conn._http_vsn_str = "HTTP/1.0"
headers = {}
if proxy_auth:
headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
if auth:
headers["Authorization"] = "Basic %s" % auth
if realhost:
headers["Host"] = realhost
for header, value in self.addheaders:
headers[header] = value
if data is not None: if data is not None:
h.putrequest('POST', selector) headers["Content-Type"] = "application/x-www-form-urlencoded"
h.putheader('Content-Type', 'application/x-www-form-urlencoded') http_conn.request("POST", selector, data, headers)
h.putheader('Content-Length', '%d' % len(data))
else: else:
h.putrequest('GET', selector) http_conn.request("GET", selector, headers=headers)
if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
if auth: h.putheader('Authorization', 'Basic %s' % auth) try:
if realhost: h.putheader('Host', realhost) response = http_conn.getresponse()
for args in self.addheaders: h.putheader(*args) except httplib.BadStatusLine:
h.endheaders()
if data is not None:
h.send(data)
errcode, errmsg, headers = h.getreply()
fp = h.getfile()
if errcode == -1:
if fp: fp.close()
# something went wrong with the HTTP status line # something went wrong with the HTTP status line
raise IOError, ('http protocol error', 0, raise IOError('http protocol error', 0,
'got a bad status line', None) 'got a bad status line', None)
if errcode == 200:
return addinfourl(fp, headers, "http:" + url) if response.status == 200:
return addinfourl(response.fp, response.msg, "http:" + url)
else: else:
if data is None: return self.http_error(
return self.http_error(url, fp, errcode, errmsg, headers) url, response.fp,
else: response.status, response.reason, response.msg, data)
return self.http_error(url, fp, errcode, errmsg, headers, data)
def http_error(self, url, fp, errcode, errmsg, headers, data=None): def http_error(self, url, fp, errcode, errmsg, headers, data=None):
"""Handle http errors. """Handle http errors.
Derived class can override this, or provide specific handlers Derived class can override this, or provide specific handlers
named http_error_DDD where DDD is the 3-digit error code.""" named http_error_DDD where DDD is the 3-digit error code."""
# First check if there's a specific handler for this error # First check if there's a specific handler for this error
...@@ -872,6 +881,8 @@ class ftpwrapper: ...@@ -872,6 +881,8 @@ class ftpwrapper:
class addbase: class addbase:
"""Base class for addinfo and addclosehook.""" """Base class for addinfo and addclosehook."""
# XXX Add a method to expose the timeout on the underlying socket?
def __init__(self, fp): def __init__(self, fp):
self.fp = fp self.fp = fp
self.read = self.fp.read self.read = self.fp.read
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment