Commit 74b5c5c7 authored by Raymond Hettinger's avatar Raymond Hettinger

SF Patch 549151: urllib2 POSTs on redirect

(contributed by John J Lee)
parent 7adc162c
...@@ -254,12 +254,18 @@ actually retrieve a resource at an \file{https:} URL. ...@@ -254,12 +254,18 @@ actually retrieve a resource at an \file{https:} URL.
\begin{classdesc}{FancyURLopener}{...} \begin{classdesc}{FancyURLopener}{...}
\class{FancyURLopener} subclasses \class{URLopener} providing default \class{FancyURLopener} subclasses \class{URLopener} providing default
handling for the following HTTP response codes: 301, 302 or 401. For handling for the following HTTP response codes: 301, 302, 303 and 401.
301 and 302 response codes, the \mailheader{Location} header is used to For 301, 302 and 303 response codes, the \mailheader{Location} header
fetch the actual URL. For 401 response codes (authentication is used to fetch the actual URL. For 401 response codes
required), basic HTTP authentication is performed. For 301 and 302 response (authentication required), basic HTTP authentication is performed.
codes, recursion is bounded by the value of the \var{maxtries} attribute, For 301, 302 and 303 response codes, recursion is bounded by the value
which defaults 10. of the \var{maxtries} attribute, which defaults 10.
\note{According to the letter of \rfc{2616}, 301 and 302 responses to
POST requests must not be automatically redirected without
confirmation by the user. In reality, browsers do allow automatic
redirection of these responses, changing the POST to a GET, and
\module{urllib} reproduces this behaviour.}
The parameters to the constructor are the same as those for The parameters to the constructor are the same as those for
\class{URLopener}. \class{URLopener}.
......
...@@ -217,6 +217,12 @@ by all handlers except HTTP handlers --- and there it should be an ...@@ -217,6 +217,12 @@ by all handlers except HTTP handlers --- and there it should be an
request to be \code{POST} rather than \code{GET}. request to be \code{POST} rather than \code{GET}.
\end{methoddesc} \end{methoddesc}
\begin{methoddesc}[Request]{get_method}{}
Return a string indicating the HTTP request method. This is only
meaningful for HTTP requests, and currently always takes one of the
values ("GET", "POST").
\end{methoddesc}
\begin{methoddesc}[Request]{has_data}{} \begin{methoddesc}[Request]{has_data}{}
Return whether the instance has a non-\code{None} data. Return whether the instance has a non-\code{None} data.
\end{methoddesc} \end{methoddesc}
...@@ -394,25 +400,49 @@ Arguments, return values and exceptions raised should be the same as ...@@ -394,25 +400,49 @@ Arguments, return values and exceptions raised should be the same as
for \method{http_error_default()}. for \method{http_error_default()}.
\end{methoddesc} \end{methoddesc}
\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}} \subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
\note{303 redirection is not supported by this version of \note{Some HTTP redirections require action from this module's client
\module{urllib2}.} code. If this is the case, \exception{HTTPError} is raised. See
\rfc{2616} for details of the precise meanings of the various
redirection codes.}
\begin{methoddesc}[HTTPRedirectHandler]{redirect_request}{req,
fp, code, msg, hdrs}
Return a \class{Request} or \code{None} in response to a redirect.
This is called by the default implementations of the
\code{http_error_30x()} methods when a redirection is received from
the server. If a redirection should take place, return a new
\class{Request} to allow \code{http_error_30x()} to perform the
redirect. Otherwise, raise \exception{HTTPError} if no other
\class{Handler} should try to handle this URL, or return \code{None}
if you can't but another \class{Handler} might.
\note{The default implementation of this method does not strictly
follow \rfc{2616}: it allows automatic 302 redirection of POST
requests, because essentially all HTTP clients do this.}
\end{methoddesc}
\begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req, \begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req,
fp, code, msg, hdrs} fp, code, msg, hdrs}
Redirect to the \code{Location:} URL. This method is called by Redirect to the \code{Location:} URL. This method is called by
the parent \class{OpenerDirector} when getting an HTTP the parent \class{OpenerDirector} when getting an HTTP
permanent-redirect response. `moved permanently' response.
\end{methoddesc} \end{methoddesc}
\begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req, \begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req,
fp, code, msg, hdrs} fp, code, msg, hdrs}
The same as \method{http_error_301()}, but called for the The same as \method{http_error_301()}, but called for the
temporary-redirect response. `found' response.
\end{methoddesc} \end{methoddesc}
\begin{methoddesc}[HTTPRedirectHandler]{http_error_303}{req,
fp, code, msg, hdrs}
The same as \method{http_error_301()}, but called for the
`see other' redirect response.
\end{methoddesc}
\subsection{ProxyHandler Objects \label{proxy-handler}} \subsection{ProxyHandler Objects \label{proxy-handler}}
......
...@@ -586,6 +586,10 @@ class FancyURLopener(URLopener): ...@@ -586,6 +586,10 @@ class FancyURLopener(URLopener):
"""Error 301 -- also relocated (permanently).""" """Error 301 -- also relocated (permanently)."""
return self.http_error_302(url, fp, errcode, errmsg, headers, data) return self.http_error_302(url, fp, errcode, errmsg, headers, data)
def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 303 -- also relocated (essentially identical to 302)."""
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 401 -- authentication required. """Error 401 -- authentication required.
See this URL for a description of the basic authentication scheme: See this URL for a description of the basic authentication scheme:
......
...@@ -11,8 +11,8 @@ option. The OpenerDirector is a composite object that invokes the ...@@ -11,8 +11,8 @@ option. The OpenerDirector is a composite object that invokes the
Handlers needed to open the requested URL. For example, the Handlers needed to open the requested URL. For example, the
HTTPHandler performs HTTP GET and POST requests and deals with HTTPHandler performs HTTP GET and POST requests and deals with
non-error returns. The HTTPRedirectHandler automatically deals with non-error returns. The HTTPRedirectHandler automatically deals with
HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
with digest authentication. deals with digest authentication.
urlopen(url, data=None) -- basic usage is that same as original urlopen(url, data=None) -- basic usage is that same as original
urllib. pass the url and optionally data to post to an HTTP URL, and urllib. pass the url and optionally data to post to an HTTP URL, and
...@@ -207,6 +207,12 @@ class Request: ...@@ -207,6 +207,12 @@ class Request:
return getattr(self, attr) return getattr(self, attr)
raise AttributeError, attr raise AttributeError, attr
def get_method(self):
if self.has_data():
return "POST"
else:
return "GET"
def add_data(self, data): def add_data(self, data):
self.data = data self.data = data
...@@ -402,6 +408,26 @@ class HTTPDefaultErrorHandler(BaseHandler): ...@@ -402,6 +408,26 @@ class HTTPDefaultErrorHandler(BaseHandler):
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
class HTTPRedirectHandler(BaseHandler): class HTTPRedirectHandler(BaseHandler):
def redirect_request(self, req, fp, code, msg, headers):
"""Return a Request or None in response to a redirect.
This is called by the http_error_30x methods when a redirection
response is received. If a redirection should take place, return a new
Request to allow http_error_30x to perform the redirect. Otherwise,
raise HTTPError if no-one else should try to handle this url. Return
None if you can't but another Handler might.
"""
if (code in (301, 302, 303, 307) and req.method() in ("GET", "HEAD") or
code in (302, 303) and req.method() == "POST"):
# Strictly (according to RFC 2616), 302 in response to a POST
# MUST NOT cause a redirection without confirmation from the user
# (of urllib2, in this case). In practice, essentially all clients
# do redirect in this case, so we do the same.
return Request(newurl, headers=req.headers)
else:
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
# Implementation note: To avoid the server sending us into an # Implementation note: To avoid the server sending us into an
# infinite loop, the request object needs to track what URLs we # infinite loop, the request object needs to track what URLs we
# have already seen. Do this by adding a handler-specific # have already seen. Do this by adding a handler-specific
...@@ -418,7 +444,11 @@ class HTTPRedirectHandler(BaseHandler): ...@@ -418,7 +444,11 @@ class HTTPRedirectHandler(BaseHandler):
# XXX Probably want to forget about the state of the current # XXX Probably want to forget about the state of the current
# request, although that might interact poorly with other # request, although that might interact poorly with other
# handlers that also use handler-specific request attributes # handlers that also use handler-specific request attributes
new = Request(newurl, req.get_data(), req.headers) new = self.redirect_request(req, fp, code, msg, headers)
if new is None:
return
# loop detection
new.error_302_dict = {} new.error_302_dict = {}
if hasattr(req, 'error_302_dict'): if hasattr(req, 'error_302_dict'):
if len(req.error_302_dict)>10 or \ if len(req.error_302_dict)>10 or \
...@@ -435,7 +465,7 @@ class HTTPRedirectHandler(BaseHandler): ...@@ -435,7 +465,7 @@ class HTTPRedirectHandler(BaseHandler):
return self.parent.open(new) return self.parent.open(new)
http_error_301 = http_error_302 http_error_301 = http_error_303 = http_error_307 = http_error_302
inf_msg = "The HTTP server returned a redirect error that would" \ inf_msg = "The HTTP server returned a redirect error that would" \
"lead to an infinite loop.\n" \ "lead to an infinite loop.\n" \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment