Commit 2a6ba909 authored by Martin v. Löwis's avatar Martin v. Löwis

Patch #963318: Add support for client-side cookie management.

parent 0a6d0ff8
......@@ -233,6 +233,7 @@ and how to embed it in other applications.
\input{libbasehttp}
\input{libsimplehttp}
\input{libcgihttp}
\input{libcookielib}
\input{libcookie}
\input{libxmlrpclib}
\input{libsimplexmlrpc}
......
......@@ -68,6 +68,10 @@ you should not use the \class{SerialCookie} class.
\begin{seealso}
\seemodule{cookielib}{HTTP cookie handling for for web
\emph{clients}. The \module{cookielib} and \module{Cookie}
modules do not depend on each other.}
\seerfc{2109}{HTTP State Management Mechanism}{This is the state
management specification implemented by this module.}
\end{seealso}
......
This diff is collapsed.
This diff is collapsed.
......@@ -569,6 +569,25 @@ For example:
%======================================================================
% whole new modules get described in \subsections here
\subsection{cookielib}
The \module{cookielib} library supports client-side handling for HTTP
cookies, just as the \module{Cookie} provides server-side cookie
support in CGI scripts. This library manages cookies in a way similar
to web browsers. Cookies are stored in cookie jars; the library
transparently stores cookies offered by the web server in the cookie
jar, and fetches the cookie from the jar when connecting to the
server. Similar to web browsers, policy objects control whether
cookies are accepted or not.
In order to store cookies across sessions, two implementations of
cookie jars are provided: one that stores cookies in the Netscape
format, so applications can use the Mozilla or Lynx cookie jars, and
one that stores cookies in the same format as the Perl libwww libary.
\module{urllib2} has been changed to interact with \module{cookielib}:
\class{HTTPCookieProcessor} manages a cookie jar that is used when
accessing URLs.
% ======================================================================
\section{Build and C API Changes}
......
"""Load / save to libwww-perl (LWP) format files.
Actually, the format is slightly extended from that used by LWP's
(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
not recorded by LWP.
It uses the version string "2.0", though really there isn't an LWP Cookies
2.0 format. This indicates that there is extra information in here
(domain_dot and # port_spec) while still being compatible with
libwww-perl, I hope.
"""
import time, re, logging
from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
MISSING_FILENAME_TEXT, join_header_words, split_header_words, \
iso2time, time2isoz
def lwp_cookie_str(cookie):
"""Return string representation of Cookie in an the LWP cookie file format.
Actually, the format is extended a bit -- see module docstring.
"""
h = [(cookie.name, cookie.value),
("path", cookie.path),
("domain", cookie.domain)]
if cookie.port is not None: h.append(("port", cookie.port))
if cookie.path_specified: h.append(("path_spec", None))
if cookie.port_specified: h.append(("port_spec", None))
if cookie.domain_initial_dot: h.append(("domain_dot", None))
if cookie.secure: h.append(("secure", None))
if cookie.expires: h.append(("expires",
time2isoz(float(cookie.expires))))
if cookie.discard: h.append(("discard", None))
if cookie.comment: h.append(("comment", cookie.comment))
if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
keys = cookie._rest.keys()
keys.sort()
for k in keys:
h.append((k, str(cookie._rest[k])))
h.append(("version", str(cookie.version)))
return join_header_words([h])
class LWPCookieJar(FileCookieJar):
"""
The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
"Set-Cookie3" is the format used by the libwww-perl libary, not known
to be compatible with any browser, but which is easy to read and
doesn't lose information about RFC 2965 cookies.
Additional methods
as_lwp_str(ignore_discard=True, ignore_expired=True)
"""
def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
"""Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
ignore_discard and ignore_expires: see docstring for FileCookieJar.save
"""
now = time.time()
r = []
for cookie in self:
if not ignore_discard and cookie.discard:
continue
if not ignore_expires and cookie.is_expired(now):
continue
r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
return "\n".join(r+[""])
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
f = open(filename, "w")
try:
# There really isn't an LWP Cookies 2.0 format, but this indicates
# that there is extra information in here (domain_dot and
# port_spec) while still being compatible with libwww-perl, I hope.
f.write("#LWP-Cookies-2.0\n")
f.write(self.as_lwp_str(ignore_discard, ignore_expires))
finally:
f.close()
def _really_load(self, f, filename, ignore_discard, ignore_expires):
magic = f.readline()
if not re.search(self.magic_re, magic):
msg = "%s does not seem to contain cookies" % filename
raise IOError(msg)
now = time.time()
header = "Set-Cookie3:"
boolean_attrs = ("port_spec", "path_spec", "domain_dot",
"secure", "discard")
value_attrs = ("version",
"port", "path", "domain",
"expires",
"comment", "commenturl")
try:
while 1:
line = f.readline()
if line == "": break
if not line.startswith(header):
continue
line = line[len(header):].strip()
for data in split_header_words([line]):
name, value = data[0]
# name and value are an exception here, since a plain "foo"
# (with no "=", unlike "bar=foo") means a cookie with no
# name and value "foo". With all other cookie-attributes,
# the situation is reversed: "foo" means an attribute named
# "foo" with no value!
if value is None:
name, value = value, name
standard = {}
rest = {}
for k in boolean_attrs:
standard[k] = False
for k, v in data[1:]:
if k is not None:
lc = k.lower()
else:
lc = None
# don't lose case distinction for unknown fields
if (lc in value_attrs) or (lc in boolean_attrs):
k = lc
if k in boolean_attrs:
if v is None: v = True
standard[k] = v
elif k in value_attrs:
standard[k] = v
else:
rest[k] = v
h = standard.get
expires = h("expires")
discard = h("discard")
if expires is not None:
expires = iso2time(expires)
if expires is None:
discard = True
domain = h("domain")
domain_specified = domain.startswith(".")
c = Cookie(h("version"), name, value,
h("port"), h("port_spec"),
domain, domain_specified, h("domain_dot"),
h("path"), h("path_spec"),
h("secure"),
expires,
discard,
h("comment"),
h("commenturl"),
rest)
if not ignore_discard and c.discard:
continue
if not ignore_expires and c.is_expired(now):
continue
self.set_cookie(c)
except:
reraise_unmasked_exceptions((IOError,))
raise IOError("invalid Set-Cookie3 format file %s" % filename)
"""Mozilla / Netscape cookie loading / saving."""
import re, time, logging
from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
MISSING_FILENAME_TEXT
class MozillaCookieJar(FileCookieJar):
"""
WARNING: you may want to backup your browser's cookies file if you use
this class to save cookies. I *think* it works, but there have been
bugs in the past!
This class differs from CookieJar only in the format it uses to save and
load cookies to and from a file. This class uses the Mozilla/Netscape
`cookies.txt' format. lynx uses this file format, too.
Don't expect cookies saved while the browser is running to be noticed by
the browser (in fact, Mozilla on unix will overwrite your saved cookies if
you change them on disk while it's running; on Windows, you probably can't
save at all while the browser is running).
Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
Netscape cookies on saving.
In particular, the cookie version and port number information is lost,
together with information about whether or not Path, Port and Discard were
specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
domain as set in the HTTP header started with a dot (yes, I'm aware some
domains in Netscape files start with a dot and some don't -- trust me, you
really don't want to know any more about this).
Note that though Mozilla and Netscape use the same format, they use
slightly different headers. The class saves cookies using the Netscape
header by default (Mozilla can cope with that).
"""
magic_re = "#( Netscape)? HTTP Cookie File"
header = """\
# Netscape HTTP Cookie File
# http://www.netscape.com/newsref/std/cookie_spec.html
# This is a generated file! Do not edit.
"""
def _really_load(self, f, filename, ignore_discard, ignore_expires):
now = time.time()
magic = f.readline()
if not re.search(self.magic_re, magic):
f.close()
raise IOError(
"%s does not look like a Netscape format cookies file" %
filename)
try:
while 1:
line = f.readline()
if line == "": break
# last field may be absent, so keep any trailing tab
if line.endswith("\n"): line = line[:-1]
# skip comments and blank lines XXX what is $ for?
if (line.strip().startswith("#") or
line.strip().startswith("$") or
line.strip() == ""):
continue
domain, domain_specified, path, secure, expires, name, value = \
line.split("\t")
secure = (secure == "TRUE")
domain_specified = (domain_specified == "TRUE")
if name == "":
name = value
value = None
initial_dot = domain.startswith(".")
assert domain_specified == initial_dot
discard = False
if expires == "":
expires = None
discard = True
# assume path_specified is false
c = Cookie(0, name, value,
None, False,
domain, domain_specified, initial_dot,
path, False,
secure,
expires,
discard,
None,
None,
{})
if not ignore_discard and c.discard:
continue
if not ignore_expires and c.is_expired(now):
continue
self.set_cookie(c)
except:
reraise_unmasked_exceptions((IOError,))
raise IOError("invalid Netscape format file %s: %s" %
(filename, line))
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
f = open(filename, "w")
try:
f.write(self.header)
now = time.time()
for cookie in self:
if not ignore_discard and cookie.discard:
continue
if not ignore_expires and cookie.is_expired(now):
continue
if cookie.secure: secure = "TRUE"
else: secure = "FALSE"
if cookie.domain.startswith("."): initial_dot = "TRUE"
else: initial_dot = "FALSE"
if cookie.expires is not None:
expires = str(cookie.expires)
else:
expires = ""
if cookie.value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas cookielib regards it as a
# cookie with no value.
name = ""
value = cookie.name
else:
name = cookie.name
value = cookie.value
f.write(
"\t".join([cookie.domain, initial_dot, cookie.path,
secure, expires, name, value])+
"\n")
finally:
f.close()
This diff is collapsed.
This diff is collapsed.
......@@ -54,6 +54,10 @@ class MockFile:
def readline(self, count=None): pass
def close(self): pass
class MockHeaders(dict):
def getheaders(self, name):
return self.values()
class MockResponse(StringIO.StringIO):
def __init__(self, code, msg, headers, data, url=None):
StringIO.StringIO.__init__(self, data)
......@@ -63,6 +67,12 @@ class MockResponse(StringIO.StringIO):
def geturl(self):
return self.url
class MockCookieJar:
def add_cookie_header(self, request):
self.ach_req = request
def extract_cookies(self, response, request):
self.ec_req, self.ec_r = request, response
class FakeMethod:
def __init__(self, meth_name, action, handle):
self.meth_name = meth_name
......@@ -474,7 +484,7 @@ class HandlerTests(unittest.TestCase):
for data in "", None: # POST, GET
req = Request("http://example.com/", data)
r = MockResponse(200, "OK", {}, "")
newreq = h.do_request(req)
newreq = h.do_request_(req)
if data is None: # GET
self.assert_("Content-length" not in req.unredirected_hdrs)
self.assert_("Content-type" not in req.unredirected_hdrs)
......@@ -491,7 +501,7 @@ class HandlerTests(unittest.TestCase):
req.add_unredirected_header("Content-type", "bar")
req.add_unredirected_header("Host", "baz")
req.add_unredirected_header("Spam", "foo")
newreq = h.do_request(req)
newreq = h.do_request_(req)
self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
self.assertEqual(req.unredirected_hdrs["Host"], "baz")
......@@ -514,6 +524,21 @@ class HandlerTests(unittest.TestCase):
self.assertEqual(o.proto, "http") # o.error called
self.assertEqual(o.args, (req, r, 201, "Created", {}))
def test_cookies(self):
cj = MockCookieJar()
h = urllib2.HTTPCookieProcessor(cj)
o = h.parent = MockOpener()
req = Request("http://example.com/")
r = MockResponse(200, "OK", {}, "")
newreq = h.http_request(req)
self.assert_(cj.ach_req is req is newreq)
self.assertEquals(req.get_origin_req_host(), "example.com")
self.assert_(not req.is_unverifiable())
newr = h.http_response(req, r)
self.assert_(cj.ec_req is req)
self.assert_(cj.ec_r is r is newr)
def test_redirect(self):
from_url = "http://example.com/a.html"
to_url = "http://example.com/b.html"
......@@ -528,7 +553,8 @@ class HandlerTests(unittest.TestCase):
req.add_header("Nonsense", "viking=withhold")
req.add_unredirected_header("Spam", "spam")
try:
method(req, MockFile(), code, "Blah", {"location": to_url})
method(req, MockFile(), code, "Blah",
MockHeaders({"location": to_url}))
except urllib2.HTTPError:
# 307 in response to POST requires user OK
self.assert_(code == 307 and data is not None)
......@@ -544,38 +570,65 @@ class HandlerTests(unittest.TestCase):
# loop detection
req = Request(from_url)
req.origin_req_host = "example.com"
def redirect(h, req, code, url=to_url):
method = getattr(h, "http_error_%s" % code)
method(req, MockFile(), code, "Blah", {"location": url})
def redirect(h, req, url=to_url):
h.http_error_302(req, MockFile(), 302, "Blah",
MockHeaders({"location": url}))
# Note that the *original* request shares the same record of
# redirections with the sub-requests caused by the redirections.
# once
redirect(h, req, 302)
# twice: loop detected
self.assertRaises(urllib2.HTTPError, redirect, h, req, 302)
# and again
self.assertRaises(urllib2.HTTPError, redirect, h, req, 302)
# but this is a different redirect code, so OK...
redirect(h, req, 301)
self.assertRaises(urllib2.HTTPError, redirect, h, req, 301)
# order doesn't matter
redirect(h, req, 303)
redirect(h, req, 307)
self.assertRaises(urllib2.HTTPError, redirect, h, req, 303)
# detect infinite loop redirect of a URL to itself
req = Request(from_url, origin_req_host="example.com")
count = 0
try:
while 1:
redirect(h, req, "http://example.com/")
count = count + 1
except urllib2.HTTPError:
# don't stop until max_repeats, because cookies may introduce state
self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
# detect endless non-repeating chain of redirects
req = Request(from_url)
req.origin_req_host = "example.com"
req = Request(from_url, origin_req_host="example.com")
count = 0
try:
while 1:
redirect(h, req, 302, "http://example.com/%d" % count)
redirect(h, req, "http://example.com/%d" % count)
count = count + 1
except urllib2.HTTPError:
self.assertEqual(count,
urllib2.HTTPRedirectHandler.max_redirections)
def test_cookie_redirect(self):
class MockHTTPHandler(urllib2.HTTPHandler):
def __init__(self): self._count = 0
def http_open(self, req):
import mimetools
from StringIO import StringIO
if self._count == 0:
self._count = self._count + 1
msg = mimetools.Message(
StringIO("Location: http://www.cracker.com/\r\n\r\n"))
return self.parent.error(
"http", req, MockFile(), 302, "Found", msg)
else:
self.req = req
msg = mimetools.Message(StringIO("\r\n\r\n"))
return MockResponse(200, "OK", msg, "", req.get_full_url())
# cookies shouldn't leak into redirected requests
from cookielib import CookieJar
from urllib2 import build_opener, HTTPHandler, HTTPError, \
HTTPCookieProcessor
from test_cookielib import interact_netscape
cj = CookieJar()
interact_netscape(cj, "http://www.example.com/", "spam=eggs")
hh = MockHTTPHandler()
cp = HTTPCookieProcessor(cj)
o = build_opener(hh, cp)
o.open("http://www.example.com/")
self.assert_(not hh.req.has_header("Cookie"))
class MiscTests(unittest.TestCase):
......
......@@ -106,6 +106,7 @@ import sys
import time
import urlparse
import bisect
import cookielib
try:
from cStringIO import StringIO
......@@ -176,7 +177,8 @@ class GopherError(URLError):
class Request:
def __init__(self, url, data=None, headers={}):
def __init__(self, url, data=None, headers={},
origin_req_host=None, unverifiable=False):
# unwrap('<URL:type://host/path>') --> 'type://host/path'
self.__original = unwrap(url)
self.type = None
......@@ -188,6 +190,10 @@ class Request:
for key, value in headers.items():
self.add_header(key, value)
self.unredirected_hdrs = {}
if origin_req_host is None:
origin_req_host = cookielib.request_host(self)
self.origin_req_host = origin_req_host
self.unverifiable = unverifiable
def __getattr__(self, attr):
# XXX this is a fallback mechanism to guard against these
......@@ -242,6 +248,12 @@ class Request:
self.host, self.type = host, type
self.__r_host = self.__original
def get_origin_req_host(self):
return self.origin_req_host
def is_unverifiable(self):
return self.unverifiable
def add_header(self, key, val):
# useful for something like authentication
self.headers[key.capitalize()] = val
......@@ -254,6 +266,15 @@ class Request:
return bool(header_name in self.headers or
header_name in self.unredirected_hdrs)
def get_header(self, header_name, default=None):
return self.headers.get(
header_name,
self.unredirected_hdrs.get(header_name, default))
def header_items(self):
hdrs = self.unredirected_hdrs.copy()
hdrs.update(self.headers)
return hdrs.items()
class OpenerDirector:
def __init__(self):
......@@ -460,7 +481,11 @@ class HTTPDefaultErrorHandler(BaseHandler):
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
class HTTPRedirectHandler(BaseHandler):
# maximum number of redirections before assuming we're in a loop
# maximum number of redirections to any single URL
# this is needed because of the state that cookies introduce
max_repeats = 4
# maximum total number of redirections (regardless of URL) before
# assuming we're in a loop
max_redirections = 10
def redirect_request(self, req, fp, code, msg, headers, newurl):
......@@ -481,7 +506,10 @@ class HTTPRedirectHandler(BaseHandler):
# from the user (of urllib2, in this case). In practice,
# essentially all clients do redirect in this case, so we
# do the same.
return Request(newurl, headers=req.headers)
return Request(newurl,
headers=req.headers,
origin_req_host=req.get_origin_req_host(),
unverifiable=True)
else:
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
......@@ -490,10 +518,12 @@ class HTTPRedirectHandler(BaseHandler):
# have already seen. Do this by adding a handler-specific
# attribute to the Request object.
def http_error_302(self, req, fp, code, msg, headers):
# Some servers (incorrectly) return multiple Location headers
# (so probably same goes for URI). Use first header.
if 'location' in headers:
newurl = headers['location']
newurl = headers.getheaders('location')[0]
elif 'uri' in headers:
newurl = headers['uri']
newurl = headers.getheaders('uri')[0]
else:
return
newurl = urlparse.urljoin(req.get_full_url(), newurl)
......@@ -506,20 +536,16 @@ class HTTPRedirectHandler(BaseHandler):
return
# loop detection
# .redirect_dict has a key (url, code) if url was previously
# visited as a result of a redirection with that code. The
# code is needed in addition to the URL because visiting a URL
# twice isn't necessarily a loop: there is more than one way
# to redirect (301, 302, 303, 307, refresh).
key = (newurl, code)
# .redirect_dict has a key url if url was previously visited.
if hasattr(req, 'redirect_dict'):
visited = new.redirect_dict = req.redirect_dict
if key in visited or len(visited) >= self.max_redirections:
if (visited.get(newurl, 0) >= self.max_repeats or
len(visited) >= self.max_redirections):
raise HTTPError(req.get_full_url(), code,
self.inf_msg + msg, headers, fp)
else:
visited = new.redirect_dict = req.redirect_dict = {}
visited[key] = None
visited[newurl] = visited.get(newurl, 0) + 1
# Don't close the fp until we are sure that we won't use it
# with HTTPError.
......@@ -912,7 +938,7 @@ class AbstractHTTPHandler(BaseHandler):
def set_http_debuglevel(self, level):
self._debuglevel = level
def do_request(self, request):
def do_request_(self, request):
host = request.get_host()
if not host:
raise URLError('no host given')
......@@ -987,7 +1013,7 @@ class HTTPHandler(AbstractHTTPHandler):
def http_open(self, req):
return self.do_open(httplib.HTTPConnection, req)
http_request = AbstractHTTPHandler.do_request
http_request = AbstractHTTPHandler.do_request_
if hasattr(httplib, 'HTTPS'):
class HTTPSHandler(AbstractHTTPHandler):
......@@ -995,7 +1021,24 @@ if hasattr(httplib, 'HTTPS'):
def https_open(self, req):
return self.do_open(httplib.HTTPSConnection, req)
https_request = AbstractHTTPHandler.do_request
https_request = AbstractHTTPHandler.do_request_
class HTTPCookieProcessor(BaseHandler):
def __init__(self, cookiejar=None):
if cookiejar is None:
cookiejar = CookieJar()
self.cookiejar = cookiejar
def http_request(self, request):
self.cookiejar.add_cookie_header(request)
return request
def http_response(self, request, response):
self.cookiejar.extract_cookies(response, request)
return response
https_request = http_request
https_response = http_response
class UnknownHandler(BaseHandler):
def unknown_open(self, req):
......
......@@ -311,6 +311,10 @@ Extension modules
Library
-------
- Added a new module: cookielib. Automatic cookie handling for HTTP
clients. Also, support for cookielib has been added to urllib2, so
urllib2.urlopen() can transparently handle cookies.
- stringprep.py now uses built-in set() instead of sets.Set().
- Bug #876278: Unbounded recursion in modulefinder
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment