Commit ecacc3bc authored by Martin Panter's avatar Martin Panter

Issue #14132, Issue #17214: Merge two redirect handling fixes from 3.5

parents 425a2b3b 66bbffc9
"""Regresssion tests for urllib""" """Regresssion tests for what was in Python 2's "urllib" module"""
import urllib.parse import urllib.parse
import urllib.request import urllib.request
...@@ -86,10 +86,11 @@ def fakehttp(fakedata): ...@@ -86,10 +86,11 @@ def fakehttp(fakedata):
# buffer to store data for verification in urlopen tests. # buffer to store data for verification in urlopen tests.
buf = None buf = None
fakesock = FakeSocket(fakedata)
def connect(self): def connect(self):
self.sock = self.fakesock self.sock = FakeSocket(self.fakedata)
type(self).fakesock = self.sock
FakeHTTPConnection.fakedata = fakedata
return FakeHTTPConnection return FakeHTTPConnection
......
...@@ -1208,6 +1208,57 @@ class HandlerTests(unittest.TestCase): ...@@ -1208,6 +1208,57 @@ class HandlerTests(unittest.TestCase):
fp = o.open('http://www.example.com') fp = o.open('http://www.example.com')
self.assertEqual(fp.geturl(), redirected_url.strip()) self.assertEqual(fp.geturl(), redirected_url.strip())
def test_redirect_no_path(self):
# Issue 14132: Relative redirect strips original path
real_class = http.client.HTTPConnection
response1 = b"HTTP/1.1 302 Found\r\nLocation: ?query\r\n\r\n"
http.client.HTTPConnection = test_urllib.fakehttp(response1)
self.addCleanup(setattr, http.client, "HTTPConnection", real_class)
urls = iter(("/path", "/path?query"))
def request(conn, method, url, *pos, **kw):
self.assertEqual(url, next(urls))
real_class.request(conn, method, url, *pos, **kw)
# Change response for subsequent connection
conn.__class__.fakedata = b"HTTP/1.1 200 OK\r\n\r\nHello!"
http.client.HTTPConnection.request = request
fp = urllib.request.urlopen("http://python.org/path")
self.assertEqual(fp.geturl(), "http://python.org/path?query")
def test_redirect_encoding(self):
# Some characters in the redirect target may need special handling,
# but most ASCII characters should be treated as already encoded
class Handler(urllib.request.HTTPHandler):
def http_open(self, req):
result = self.do_open(self.connection, req)
self.last_buf = self.connection.buf
# Set up a normal response for the next request
self.connection = test_urllib.fakehttp(
b'HTTP/1.1 200 OK\r\n'
b'Content-Length: 3\r\n'
b'\r\n'
b'123'
)
return result
handler = Handler()
opener = urllib.request.build_opener(handler)
tests = (
(b'/p\xC3\xA5-dansk/', b'/p%C3%A5-dansk/'),
(b'/spaced%20path/', b'/spaced%20path/'),
(b'/spaced path/', b'/spaced%20path/'),
(b'/?p\xC3\xA5-dansk', b'/?p%C3%A5-dansk'),
)
for [location, result] in tests:
with self.subTest(repr(location)):
handler.connection = test_urllib.fakehttp(
b'HTTP/1.1 302 Redirect\r\n'
b'Location: ' + location + b'\r\n'
b'\r\n'
)
response = opener.open('http://example.com/')
expected = b'GET ' + result + b' '
request = handler.last_buf
self.assertTrue(request.startswith(expected), repr(request))
def test_proxy(self): def test_proxy(self):
o = OpenerDirector() o = OpenerDirector()
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128")) ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
......
...@@ -91,6 +91,7 @@ import os ...@@ -91,6 +91,7 @@ import os
import posixpath import posixpath
import re import re
import socket import socket
import string
import sys import sys
import time import time
import collections import collections
...@@ -676,8 +677,12 @@ class HTTPRedirectHandler(BaseHandler): ...@@ -676,8 +677,12 @@ class HTTPRedirectHandler(BaseHandler):
# from the user (of urllib.request, in this case). In practice, # from the user (of urllib.request, in this case). In practice,
# essentially all clients do redirect in this case, so we do # essentially all clients do redirect in this case, so we do
# the same. # the same.
# be conciliant with URIs containing a space
# Be conciliant with URIs containing a space. This is mainly
# redundant with the more complete encoding done in http_error_302(),
# but it is kept for compatibility with other callers.
newurl = newurl.replace(' ', '%20') newurl = newurl.replace(' ', '%20')
CONTENT_HEADERS = ("content-length", "content-type") CONTENT_HEADERS = ("content-length", "content-type")
newheaders = dict((k, v) for k, v in req.headers.items() newheaders = dict((k, v) for k, v in req.headers.items()
if k.lower() not in CONTENT_HEADERS) if k.lower() not in CONTENT_HEADERS)
...@@ -712,11 +717,16 @@ class HTTPRedirectHandler(BaseHandler): ...@@ -712,11 +717,16 @@ class HTTPRedirectHandler(BaseHandler):
"%s - Redirection to url '%s' is not allowed" % (msg, newurl), "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
headers, fp) headers, fp)
if not urlparts.path: if not urlparts.path and urlparts.netloc:
urlparts = list(urlparts) urlparts = list(urlparts)
urlparts[2] = "/" urlparts[2] = "/"
newurl = urlunparse(urlparts) newurl = urlunparse(urlparts)
# http.client.parse_headers() decodes as ISO-8859-1. Recover the
# original bytes and percent-encode non-ASCII bytes, and any special
# characters such as the space.
newurl = quote(
newurl, encoding="iso-8859-1", safe=string.punctuation)
newurl = urljoin(req.full_url, newurl) newurl = urljoin(req.full_url, newurl)
# XXX Probably want to forget about the state of the current # XXX Probably want to forget about the state of the current
......
...@@ -277,6 +277,15 @@ Core and Builtins ...@@ -277,6 +277,15 @@ Core and Builtins
Library Library
------- -------
- Issue #14132: Fix urllib.request redirect handling when the target only has
a query string. Original fix by Ján Janech.
- Issue #17214: The "urllib.request" module now percent-encodes non-ASCII
bytes found in redirect target URLs. Some servers send Location header
fields with non-ASCII bytes, but "http.client" requires the request target
to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on
patch by Christian Heimes.
- Issue #27033: The default value of the decode_data parameter for - Issue #27033: The default value of the decode_data parameter for
smtpd.SMTPChannel and smtpd.SMTPServer constructors is changed to False. smtpd.SMTPChannel and smtpd.SMTPServer constructors is changed to False.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment