Commit 7d8acc41 authored by Jason Madden's avatar Jason Madden

Comply with PEP3333

Don't subclass dict for environ by default, but leave that option
available. When printing, make sure that we have a secure environ.
Unfortunately, this is relatively fragile but it's the only way to not
break WebOb/Pyramid.
parent f689004a
...@@ -53,12 +53,10 @@ ...@@ -53,12 +53,10 @@
- Servers: Default to AF_INET6 when binding to all addresses (e.g., - Servers: Default to AF_INET6 when binding to all addresses (e.g.,
""). This supports both IPv4 and IPv6 connections (except on ""). This supports both IPv4 and IPv6 connections (except on
Windows). Original change in :pr:`495` by Felix Kaiser. Windows). Original change in :pr:`495` by Felix Kaiser.
- Security: The pywsgi ``environ`` dict doesn't print its contents by - Security: Errors logged by :class:`~gevent.pywsgi.WSGIHandler` no
default anymore, which could have lead to potential secure longer print the entire WSGI environment by default. This avoids
information disclosure. Note that this is done using a subclass of possible information disclosure vulnerabilities. Originally reported
``dict`` which is technically not compliant with PEP3333; in :pr:`779` by sean-peters-au and changed in :pr:`781`.
applications can configure pywsgi to use a ``dict`` again if required.
1.1.0 (Mar 5, 2016) 1.1.0 (Mar 5, 2016)
=================== ===================
......
...@@ -481,9 +481,13 @@ class Hub(RawGreenlet): ...@@ -481,9 +481,13 @@ class Hub(RawGreenlet):
resolver_class = resolver_config(resolver_class, 'GEVENT_RESOLVER') resolver_class = resolver_config(resolver_class, 'GEVENT_RESOLVER')
threadpool_class = config('gevent.threadpool.ThreadPool', 'GEVENT_THREADPOOL') threadpool_class = config('gevent.threadpool.ThreadPool', 'GEVENT_THREADPOOL')
backend = config(None, 'GEVENT_BACKEND') backend = config(None, 'GEVENT_BACKEND')
format_context = 'pprint.pformat'
threadpool_size = 10 threadpool_size = 10
# using pprint.pformat can override custom __repr__ methods on dict/list
# subclasses, which can be a security concern
format_context = 'pprint.saferepr'
def __init__(self, loop=None, default=None): def __init__(self, loop=None, default=None):
RawGreenlet.__init__(self) RawGreenlet.__init__(self)
if hasattr(loop, 'run'): if hasattr(loop, 'run'):
......
...@@ -45,6 +45,7 @@ __all__ = [ ...@@ -45,6 +45,7 @@ __all__ = [
'LoggingLogAdapter', 'LoggingLogAdapter',
'Environ', 'Environ',
'SecureEnviron', 'SecureEnviron',
'WSGISecureEnviron',
] ]
...@@ -950,7 +951,10 @@ class WSGIHandler(object): ...@@ -950,7 +951,10 @@ class WSGIHandler(object):
# TODO: Shouldn't we dump this to wsgi.errors? If we did that now, it would # TODO: Shouldn't we dump this to wsgi.errors? If we did that now, it would
# wind up getting logged twice # wind up getting logged twice
if not issubclass(t, GreenletExit): if not issubclass(t, GreenletExit):
self.server.loop.handle_error(self.environ, t, v, tb) context = self.environ
if not isinstance(context, self.server.secure_environ_class):
context = self.server.secure_environ_class(context)
self.server.loop.handle_error(context, t, v, tb)
def handle_error(self, t, v, tb): def handle_error(self, t, v, tb):
# Called for internal, unexpected errors, NOT invalid client input # Called for internal, unexpected errors, NOT invalid client input
...@@ -1211,12 +1215,13 @@ class SecureEnviron(Environ): ...@@ -1211,12 +1215,13 @@ class SecureEnviron(Environ):
default_secure_repr = True default_secure_repr = True
default_whitelist_keys = () default_whitelist_keys = ()
default_print_masked_keys = True
# Allow instances to override the class values, # Allow instances to override the class values,
# but inherit from the class if not present. Keeps instances # but inherit from the class if not present. Keeps instances
# small since we can't combine __slots__ with class attributes # small since we can't combine __slots__ with class attributes
# of the same name. # of the same name.
__slots__ = ('secure_repr', 'whitelist_keys',) __slots__ = ('secure_repr', 'whitelist_keys', 'print_masked_keys')
def __getattr__(self, name): def __getattr__(self, name):
if name in SecureEnviron.__slots__: if name in SecureEnviron.__slots__:
...@@ -1225,13 +1230,40 @@ class SecureEnviron(Environ): ...@@ -1225,13 +1230,40 @@ class SecureEnviron(Environ):
def __repr__(self): def __repr__(self):
if self.secure_repr: if self.secure_repr:
if self.whitelist_keys: whitelist = self.whitelist_keys
return repr({k: self[k] if k in self.whitelist_keys else "<MASKED>" for k in self}) print_masked = self.print_masked_keys
if whitelist:
safe = {k: self[k] if k in whitelist else "<MASKED>"
for k in self
if k in whitelist or print_masked}
safe_repr = repr(safe)
if not print_masked and len(safe) != len(self):
safe_repr = safe_repr[:-1] + ", (hidden keys: %d)}" % (len(self) - len(safe))
return safe_repr
return "<pywsgi.SecureEnviron dict (keys: %d) at %s>" % (len(self), id(self)) return "<pywsgi.SecureEnviron dict (keys: %d) at %s>" % (len(self), id(self))
return Environ.__repr__(self) return Environ.__repr__(self)
__str__ = __repr__ __str__ = __repr__
class WSGISecureEnviron(SecureEnviron):
"""
Specializes the default list of whitelisted keys to a few
common WSGI variables.
Example::
>>> environ = WSGISecureEnviron(REMOTE_ADDR='::1', HTTP_AUTHORIZATION='secret')
>>> environ
{'REMOTE_ADDR': '::1', (hidden keys: 1)}
>>> import pprint
>>> pprint.pprint(environ)
{'REMOTE_ADDR': '::1', (hidden keys: 1)}
>>> print(pprint.pformat(environ))
{'REMOTE_ADDR': '::1', (hidden keys: 1)}
"""
default_whitelist_keys = ('REMOTE_ADDR', 'REMOTE_PORT', 'HTTP_HOST')
default_print_masked_keys = False
class WSGIServer(StreamServer): class WSGIServer(StreamServer):
""" """
...@@ -1294,11 +1326,17 @@ class WSGIServer(StreamServer): ...@@ -1294,11 +1326,17 @@ class WSGIServer(StreamServer):
error_log = None error_log = None
#: The class of environ objects passed to the handlers. #: The class of environ objects passed to the handlers.
#: Must be a dict subclass. By default this will be :class:`SecureEnviron`, #: Must be a dict subclass. For compliance with :pep:`3333`
#: but this can be customized in a subclass or per-instance. #: and libraries like WebOb, this is simply :class:`dict`
#: but this can be customized in a subclass or per-instance
#: (probably to :class:`WSGISecureEnviron`).
#: #:
#: .. versionadded:: 1.2a1 #: .. versionadded:: 1.2a1
environ_class = SecureEnviron environ_class = dict
# Undocumented internal detail: the class that WSGIHandler._log_error
# will cast to before passing to the loop.
secure_environ_class = WSGISecureEnviron
base_env = {'GATEWAY_INTERFACE': 'CGI/1.1', base_env = {'GATEWAY_INTERFACE': 'CGI/1.1',
'SERVER_SOFTWARE': 'gevent/%d.%d Python/%d.%d' % (gevent.version_info[:2] + sys.version_info[:2]), 'SERVER_SOFTWARE': 'gevent/%d.%d Python/%d.%d' % (gevent.version_info[:2] + sys.version_info[:2]),
...@@ -1354,7 +1392,7 @@ class WSGIServer(StreamServer): ...@@ -1354,7 +1392,7 @@ class WSGIServer(StreamServer):
self.max_accept = 1 self.max_accept = 1
def get_environ(self): def get_environ(self):
return self.environ.copy() return self.environ_class(self.environ)
def init_socket(self): def init_socket(self):
StreamServer.init_socket(self) StreamServer.init_socket(self)
......
...@@ -397,7 +397,7 @@ class TestCase(TestCaseMetaClass("NewBase", (BaseTestCase,), {})): ...@@ -397,7 +397,7 @@ class TestCase(TestCaseMetaClass("NewBase", (BaseTestCase,), {})):
finally: finally:
self._error = self._none self._error = self._none
def assert_error(self, type=None, value=None, error=None): def assert_error(self, type=None, value=None, error=None, where_type=None):
if error is None: if error is None:
error = self.get_error() error = self.get_error()
if type is not None: if type is not None:
...@@ -407,6 +407,8 @@ class TestCase(TestCaseMetaClass("NewBase", (BaseTestCase,), {})): ...@@ -407,6 +407,8 @@ class TestCase(TestCaseMetaClass("NewBase", (BaseTestCase,), {})):
assert str(error[2]) == value, error assert str(error[2]) == value, error
else: else:
assert error[2] is value, error assert error[2] is value, error
if where_type is not None:
self.assertIsInstance(error[0], where_type)
return error return error
if RUNNING_ON_APPVEYOR: if RUNNING_ON_APPVEYOR:
......
...@@ -39,27 +39,7 @@ except ImportError: ...@@ -39,27 +39,7 @@ except ImportError:
from io import BytesIO as StringIO from io import BytesIO as StringIO
import weakref import weakref
import wsgiref.validate from wsgiref.validate import validator
def validator(application):
# The wsgiref validator wants to enforce that the
# type(environ) is dict (which is specified in the
# PEP). But we use a subclass by default.
# Override this check.
valid_application = wsgiref.validate.validator(application)
def dict_env_application(environ, start_response):
ce = wsgiref.validate.check_environ
def check_environ(environ):
return ce(dict(environ))
wsgiref.validate.check_environ = check_environ
try:
return valid_application(environ, start_response)
finally:
wsgiref.validate.check_environ = ce
return dict_env_application
import greentest import greentest
import gevent import gevent
...@@ -798,12 +778,14 @@ class TestNonLatin1HeaderFromApplication(TestCase): ...@@ -798,12 +778,14 @@ class TestNonLatin1HeaderFromApplication(TestCase):
def test(self): def test(self):
sock = self.connect() sock = self.connect()
self.expect_one_error()
sock.sendall(b'''GET / HTTP/1.1\r\n\r\n''') sock.sendall(b'''GET / HTTP/1.1\r\n\r\n''')
if self.should_error: if self.should_error:
read_http(sock.makefile(), code=500, reason='Internal Server Error') read_http(sock.makefile(), code=500, reason='Internal Server Error')
self.assert_error(where_type=pywsgi.SecureEnviron)
self.assertEqual(len(self.errors), 1) self.assertEqual(len(self.errors), 1)
t, v = self.errors[0] t, v = self.errors[0]
self.assertTrue(isinstance(v, UnicodeError)) self.assertIsInstance(v, UnicodeError)
else: else:
read_http(sock.makefile(), code=200, reason='PASSED') read_http(sock.makefile(), code=200, reason='PASSED')
self.assertEqual(len(self.errors), 0) self.assertEqual(len(self.errors), 0)
...@@ -952,7 +934,7 @@ class TestFirstEmptyYield(TestCase): ...@@ -952,7 +934,7 @@ class TestFirstEmptyYield(TestCase):
read_http(fd, body='hello', chunks=chunks) read_http(fd, body='hello', chunks=chunks)
garbage = fd.read() garbage = fd.read()
self.assert_(garbage == b"", "got garbage: %r" % garbage) self.assertTrue(garbage == b"", "got garbage: %r" % garbage)
class TestEmptyYield304(TestCase): class TestEmptyYield304(TestCase):
...@@ -1604,6 +1586,14 @@ class TestLogging(TestCase): ...@@ -1604,6 +1586,14 @@ class TestLogging(TestCase):
class TestEnviron(TestCase): class TestEnviron(TestCase):
# The wsgiref validator asserts type(environ) is dict.
# https://mail.python.org/pipermail/web-sig/2016-March/005455.html
validator = None
def init_server(self, application):
super(TestEnviron, self).init_server(application)
self.server.environ_class = pywsgi.SecureEnviron
def application(self, env, start_response): def application(self, env, start_response):
self.assertIsInstance(env, pywsgi.SecureEnviron) self.assertIsInstance(env, pywsgi.SecureEnviron)
start_response('200 OK', [('Content-Type', 'text/plain')]) start_response('200 OK', [('Content-Type', 'text/plain')])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment