Commit 024aaa1b authored by Raymond Hettinger's avatar Raymond Hettinger

SF Patch 549151: urllib2 POSTs on redirect

(contributed by John J Lee)
parent 1d5854fd
......@@ -254,12 +254,18 @@ actually retrieve a resource at an \file{https:} URL.
\begin{classdesc}{FancyURLopener}{...}
\class{FancyURLopener} subclasses \class{URLopener} providing default
handling for the following HTTP response codes: 301, 302 or 401. For
301 and 302 response codes, the \mailheader{Location} header is used to
fetch the actual URL. For 401 response codes (authentication
required), basic HTTP authentication is performed. For 301 and 302 response
codes, recursion is bounded by the value of the \var{maxtries} attribute,
which defaults 10.
handling for the following HTTP response codes: 301, 302, 303 and 401.
For 301, 302 and 303 response codes, the \mailheader{Location} header
is used to fetch the actual URL. For 401 response codes
(authentication required), basic HTTP authentication is performed.
For 301, 302 and 303 response codes, recursion is bounded by the value
of the \var{maxtries} attribute, which defaults 10.
\note{According to the letter of \rfc{2616}, 301 and 302 responses to
POST requests must not be automatically redirected without
confirmation by the user. In reality, browsers do allow automatic
redirection of these responses, changing the POST to a GET, and
\module{urllib} reproduces this behaviour.}
The parameters to the constructor are the same as those for
\class{URLopener}.
......
......@@ -217,6 +217,12 @@ by all handlers except HTTP handlers --- and there it should be an
request to be \code{POST} rather than \code{GET}.
\end{methoddesc}
\begin{methoddesc}[Request]{get_method}{}
Return a string indicating the HTTP request method. This is only
meaningful for HTTP requests, and currently always takes one of the
values ("GET", "POST").
\end{methoddesc}
\begin{methoddesc}[Request]{has_data}{}
Return whether the instance has a non-\code{None} data.
\end{methoddesc}
......@@ -394,25 +400,49 @@ Arguments, return values and exceptions raised should be the same as
for \method{http_error_default()}.
\end{methoddesc}
\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
\note{303 redirection is not supported by this version of
\module{urllib2}.}
\note{Some HTTP redirections require action from this module's client
code. If this is the case, \exception{HTTPError} is raised. See
\rfc{2616} for details of the precise meanings of the various
redirection codes.}
\begin{methoddesc}[HTTPRedirectHandler]{redirect_request}{req,
fp, code, msg, hdrs}
Return a \class{Request} or \code{None} in response to a redirect.
This is called by the default implementations of the
\code{http_error_30x()} methods when a redirection is received from
the server. If a redirection should take place, return a new
\class{Request} to allow \code{http_error_30x()} to perform the
redirect. Otherwise, raise \exception{HTTPError} if no other
\class{Handler} should try to handle this URL, or return \code{None}
if you can't but another \class{Handler} might.
\note{The default implementation of this method does not strictly
follow \rfc{2616}: it allows automatic 302 redirection of POST
requests, because essentially all HTTP clients do this.}
\end{methoddesc}
\begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req,
fp, code, msg, hdrs}
Redirect to the \code{Location:} URL. This method is called by
the parent \class{OpenerDirector} when getting an HTTP
permanent-redirect response.
`moved permanently' response.
\end{methoddesc}
\begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req,
fp, code, msg, hdrs}
The same as \method{http_error_301()}, but called for the
temporary-redirect response.
`found' response.
\end{methoddesc}
\begin{methoddesc}[HTTPRedirectHandler]{http_error_303}{req,
fp, code, msg, hdrs}
The same as \method{http_error_301()}, but called for the
`see other' redirect response.
\end{methoddesc}
\subsection{ProxyHandler Objects \label{proxy-handler}}
......
......@@ -586,6 +586,10 @@ class FancyURLopener(URLopener):
"""Error 301 -- also relocated (permanently)."""
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 303 -- also relocated (essentially identical to 302)."""
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 401 -- authentication required.
See this URL for a description of the basic authentication scheme:
......
......@@ -11,8 +11,8 @@ option. The OpenerDirector is a composite object that invokes the
Handlers needed to open the requested URL. For example, the
HTTPHandler performs HTTP GET and POST requests and deals with
non-error returns. The HTTPRedirectHandler automatically deals with
HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
with digest authentication.
HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
deals with digest authentication.
urlopen(url, data=None) -- basic usage is that same as original
urllib. pass the url and optionally data to post to an HTTP URL, and
......@@ -207,6 +207,12 @@ class Request:
return getattr(self, attr)
raise AttributeError, attr
def get_method(self):
if self.has_data():
return "POST"
else:
return "GET"
def add_data(self, data):
self.data = data
......@@ -402,6 +408,26 @@ class HTTPDefaultErrorHandler(BaseHandler):
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
class HTTPRedirectHandler(BaseHandler):
def redirect_request(self, req, fp, code, msg, headers):
"""Return a Request or None in response to a redirect.
This is called by the http_error_30x methods when a redirection
response is received. If a redirection should take place, return a new
Request to allow http_error_30x to perform the redirect. Otherwise,
raise HTTPError if no-one else should try to handle this url. Return
None if you can't but another Handler might.
"""
if (code in (301, 302, 303, 307) and req.method() in ("GET", "HEAD") or
code in (302, 303) and req.method() == "POST"):
# Strictly (according to RFC 2616), 302 in response to a POST
# MUST NOT cause a redirection without confirmation from the user
# (of urllib2, in this case). In practice, essentially all clients
# do redirect in this case, so we do the same.
return Request(newurl, headers=req.headers)
else:
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
# Implementation note: To avoid the server sending us into an
# infinite loop, the request object needs to track what URLs we
# have already seen. Do this by adding a handler-specific
......@@ -418,7 +444,11 @@ class HTTPRedirectHandler(BaseHandler):
# XXX Probably want to forget about the state of the current
# request, although that might interact poorly with other
# handlers that also use handler-specific request attributes
new = Request(newurl, req.get_data(), req.headers)
new = self.redirect_request(req, fp, code, msg, headers)
if new is None:
return
# loop detection
new.error_302_dict = {}
if hasattr(req, 'error_302_dict'):
if len(req.error_302_dict)>10 or \
......@@ -435,7 +465,7 @@ class HTTPRedirectHandler(BaseHandler):
return self.parent.open(new)
http_error_301 = http_error_302
http_error_301 = http_error_303 = http_error_307 = http_error_302
inf_msg = "The HTTP server returned a redirect error that would" \
"lead to an infinite loop.\n" \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment