Commit db258793 authored by Georg Brandl's avatar Georg Brandl

Patch #1542948: fix urllib2 header casing issue. With new test.

 (backport from rev. 51416)
parent a4b657fa
...@@ -46,6 +46,69 @@ class TrivialTests(unittest.TestCase): ...@@ -46,6 +46,69 @@ class TrivialTests(unittest.TestCase):
self.assertEquals(urllib2.parse_http_list(string), list) self.assertEquals(urllib2.parse_http_list(string), list)
def test_request_headers_dict():
"""
The Request.headers dictionary is not a documented interface. It should
stay that way, because the complete set of headers are only accessible
through the .get_header(), .has_header(), .header_items() interface.
However, .headers pre-dates those methods, and so real code will be using
the dictionary.
The introduction in 2.4 of those methods was a mistake for the same reason:
code that previously saw all (urllib2 user)-provided headers in .headers
now sees only a subset (and the function interface is ugly and incomplete).
A better change would have been to replace .headers dict with a dict
subclass (or UserDict.DictMixin instance?) that preserved the .headers
interface and also provided access to the "unredirected" headers. It's
probably too late to fix that, though.
Check .capitalize() case normalization:
>>> url = "http://example.com"
>>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
'blah'
>>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
'blah'
Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
but that could be changed in future.
"""
def test_request_headers_methods():
"""
Note the case normalization of header names here, to .capitalize()-case.
This should be preserved for backwards-compatibility. (In the HTTP case,
normalization to .title()-case is done by urllib2 before sending headers to
httplib).
>>> url = "http://example.com"
>>> r = Request(url, headers={"Spam-eggs": "blah"})
>>> r.has_header("Spam-eggs")
True
>>> r.header_items()
[('Spam-eggs', 'blah')]
>>> r.add_header("Foo-Bar", "baz")
>>> items = r.header_items()
>>> items.sort()
>>> items
[('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
Note that e.g. r.has_header("spam-EggS") is currently False, and
r.get_header("spam-EggS") returns None, but that could be changed in
future.
>>> r.has_header("Not-there")
False
>>> print r.get_header("Not-there")
None
>>> r.get_header("Not-there", "default")
'default'
"""
def test_password_manager(self): def test_password_manager(self):
""" """
>>> mgr = urllib2.HTTPPasswordMgr() >>> mgr = urllib2.HTTPPasswordMgr()
...@@ -676,11 +739,11 @@ class HandlerTests(unittest.TestCase): ...@@ -676,11 +739,11 @@ class HandlerTests(unittest.TestCase):
r = MockResponse(200, "OK", {}, "") r = MockResponse(200, "OK", {}, "")
newreq = h.do_request_(req) newreq = h.do_request_(req)
if data is None: # GET if data is None: # GET
self.assert_("Content-Length" not in req.unredirected_hdrs) self.assert_("Content-length" not in req.unredirected_hdrs)
self.assert_("Content-Type" not in req.unredirected_hdrs) self.assert_("Content-type" not in req.unredirected_hdrs)
else: # POST else: # POST
self.assertEqual(req.unredirected_hdrs["Content-Length"], "0") self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
self.assertEqual(req.unredirected_hdrs["Content-Type"], self.assertEqual(req.unredirected_hdrs["Content-type"],
"application/x-www-form-urlencoded") "application/x-www-form-urlencoded")
# XXX the details of Host could be better tested # XXX the details of Host could be better tested
self.assertEqual(req.unredirected_hdrs["Host"], "example.com") self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
...@@ -692,8 +755,8 @@ class HandlerTests(unittest.TestCase): ...@@ -692,8 +755,8 @@ class HandlerTests(unittest.TestCase):
req.add_unredirected_header("Host", "baz") req.add_unredirected_header("Host", "baz")
req.add_unredirected_header("Spam", "foo") req.add_unredirected_header("Spam", "foo")
newreq = h.do_request_(req) newreq = h.do_request_(req)
self.assertEqual(req.unredirected_hdrs["Content-Length"], "foo") self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
self.assertEqual(req.unredirected_hdrs["Content-Type"], "bar") self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
self.assertEqual(req.unredirected_hdrs["Host"], "baz") self.assertEqual(req.unredirected_hdrs["Host"], "baz")
self.assertEqual(req.unredirected_hdrs["Spam"], "foo") self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
...@@ -847,7 +910,7 @@ class HandlerTests(unittest.TestCase): ...@@ -847,7 +910,7 @@ class HandlerTests(unittest.TestCase):
407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm) 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
opener.add_handler(auth_handler) opener.add_handler(auth_handler)
opener.add_handler(http_handler) opener.add_handler(http_handler)
self._test_basic_auth(opener, auth_handler, "Proxy-Authorization", self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
realm, http_handler, password_manager, realm, http_handler, password_manager,
"http://acme.example.com:3128/protected", "http://acme.example.com:3128/protected",
"proxy.example.com:3128", "proxy.example.com:3128",
......
...@@ -263,11 +263,11 @@ class Request: ...@@ -263,11 +263,11 @@ class Request:
def add_header(self, key, val): def add_header(self, key, val):
# useful for something like authentication # useful for something like authentication
self.headers[key.title()] = val self.headers[key.capitalize()] = val
def add_unredirected_header(self, key, val): def add_unredirected_header(self, key, val):
# will not be added to a redirected request # will not be added to a redirected request
self.unredirected_hdrs[key.title()] = val self.unredirected_hdrs[key.capitalize()] = val
def has_header(self, header_name): def has_header(self, header_name):
return (header_name in self.headers or return (header_name in self.headers or
...@@ -286,7 +286,7 @@ class Request: ...@@ -286,7 +286,7 @@ class Request:
class OpenerDirector: class OpenerDirector:
def __init__(self): def __init__(self):
client_version = "Python-urllib/%s" % __version__ client_version = "Python-urllib/%s" % __version__
self.addheaders = [('User-Agent', client_version)] self.addheaders = [('User-agent', client_version)]
# manage the individual handlers # manage the individual handlers
self.handlers = [] self.handlers = []
self.handle_open = {} self.handle_open = {}
...@@ -675,7 +675,7 @@ class ProxyHandler(BaseHandler): ...@@ -675,7 +675,7 @@ class ProxyHandler(BaseHandler):
if user and password: if user and password:
user_pass = '%s:%s' % (unquote(user), unquote(password)) user_pass = '%s:%s' % (unquote(user), unquote(password))
creds = base64.encodestring(user_pass).strip() creds = base64.encodestring(user_pass).strip()
req.add_header('Proxy-Authorization', 'Basic ' + creds) req.add_header('Proxy-authorization', 'Basic ' + creds)
hostport = unquote(hostport) hostport = unquote(hostport)
req.set_proxy(hostport, proxy_type) req.set_proxy(hostport, proxy_type)
if orig_type == proxy_type: if orig_type == proxy_type:
...@@ -819,7 +819,7 @@ class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): ...@@ -819,7 +819,7 @@ class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
auth_header = 'Proxy-Authorization' auth_header = 'Proxy-authorization'
def http_error_407(self, req, fp, code, msg, headers): def http_error_407(self, req, fp, code, msg, headers):
# http_error_auth_reqed requires that there is no userinfo component in # http_error_auth_reqed requires that there is no userinfo component in
...@@ -1022,20 +1022,20 @@ class AbstractHTTPHandler(BaseHandler): ...@@ -1022,20 +1022,20 @@ class AbstractHTTPHandler(BaseHandler):
if request.has_data(): # POST if request.has_data(): # POST
data = request.get_data() data = request.get_data()
if not request.has_header('Content-Type'): if not request.has_header('Content-type'):
request.add_unredirected_header( request.add_unredirected_header(
'Content-Type', 'Content-type',
'application/x-www-form-urlencoded') 'application/x-www-form-urlencoded')
if not request.has_header('Content-Length'): if not request.has_header('Content-length'):
request.add_unredirected_header( request.add_unredirected_header(
'Content-Length', '%d' % len(data)) 'Content-length', '%d' % len(data))
scheme, sel = splittype(request.get_selector()) scheme, sel = splittype(request.get_selector())
sel_host, sel_path = splithost(sel) sel_host, sel_path = splithost(sel)
if not request.has_header('Host'): if not request.has_header('Host'):
request.add_unredirected_header('Host', sel_host or host) request.add_unredirected_header('Host', sel_host or host)
for name, value in self.parent.addheaders: for name, value in self.parent.addheaders:
name = name.title() name = name.capitalize()
if not request.has_header(name): if not request.has_header(name):
request.add_unredirected_header(name, value) request.add_unredirected_header(name, value)
...@@ -1067,6 +1067,8 @@ class AbstractHTTPHandler(BaseHandler): ...@@ -1067,6 +1067,8 @@ class AbstractHTTPHandler(BaseHandler):
# So make sure the connection gets closed after the (only) # So make sure the connection gets closed after the (only)
# request. # request.
headers["Connection"] = "close" headers["Connection"] = "close"
headers = dict(
(name.title(), val) for name, val in headers.items())
try: try:
h.request(req.get_method(), req.get_selector(), req.data, headers) h.request(req.get_method(), req.get_selector(), req.data, headers)
r = h.getresponse() r = h.getresponse()
...@@ -1217,7 +1219,7 @@ class FileHandler(BaseHandler): ...@@ -1217,7 +1219,7 @@ class FileHandler(BaseHandler):
modified = email.Utils.formatdate(stats.st_mtime, usegmt=True) modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
mtype = mimetypes.guess_type(file)[0] mtype = mimetypes.guess_type(file)[0]
headers = mimetools.Message(StringIO( headers = mimetools.Message(StringIO(
'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' % 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
(mtype or 'text/plain', size, modified))) (mtype or 'text/plain', size, modified)))
if host: if host:
host, port = splitport(host) host, port = splitport(host)
...@@ -1272,9 +1274,9 @@ class FTPHandler(BaseHandler): ...@@ -1272,9 +1274,9 @@ class FTPHandler(BaseHandler):
headers = "" headers = ""
mtype = mimetypes.guess_type(req.get_full_url())[0] mtype = mimetypes.guess_type(req.get_full_url())[0]
if mtype: if mtype:
headers += "Content-Type: %s\n" % mtype headers += "Content-type: %s\n" % mtype
if retrlen is not None and retrlen >= 0: if retrlen is not None and retrlen >= 0:
headers += "Content-Length: %d\n" % retrlen headers += "Content-length: %d\n" % retrlen
sf = StringIO(headers) sf = StringIO(headers)
headers = mimetools.Message(sf) headers = mimetools.Message(sf)
return addinfourl(fp, headers, req.get_full_url()) return addinfourl(fp, headers, req.get_full_url())
......
...@@ -290,8 +290,8 @@ Library ...@@ -290,8 +290,8 @@ Library
- Bug #978833: Really close underlying socket in _socketobject.close. - Bug #978833: Really close underlying socket in _socketobject.close.
- Bug #1459963: urllib and urllib2 now normalize HTTP header names correctly - Bug #1459963: urllib and urllib2 now normalize HTTP header names with
with title(). title().
- Patch #1525766: In pkgutil.walk_packages, correctly pass the onerror callback - Patch #1525766: In pkgutil.walk_packages, correctly pass the onerror callback
to recursive calls and call it with the failing package name. to recursive calls and call it with the failing package name.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment