Merge pull request #734 from gevent/issue-289

Fix #289. Avoid OverflowError in Py2 for large WSGI uploads.

Merge pull request #734 from gevent/issue-289
Fix #289. Avoid OverflowError in Py2 for large WSGI uploads.
a65501a1 · Jason Madden · 0fcb1ea5 · c551fb8e · a65501a1 · a65501a1
Commit a65501a1 authored Feb 08, 2016 by Jason Madden
Show whitespace changes
Inline Side-by-side

Showing with 68 additions and 3 deletions

changelog.rst changelog.rst +5 -0

gevent/pywsgi.py gevent/pywsgi.py +26 -1

greentest/test__pywsgi.py greentest/test__pywsgi.py +37 -2

No files found.
--- a/changelog.rst
+++ b/changelog.rst
@@ -10,6 +10,11 @@
 - Python 2: Using the blocking API at import time when multiple
  greenlets are also importing should not lead to ``LoopExit``.
  Reported in :issue:`798` by Garrett Heel.
+- Python 2: Don't raise ``OverflowError`` when using the ``readline``
+  method of the WSGI input stream without a size hint or with a large
+  size hint when the client is uploading a large amount of data.
+  Reported in :issue:`289` by ggjjlldd, with contributions by Nathan
+  Hoad.

 1.1rc3 (Jan 04, 2016)
 =====================

--- a/gevent/pywsgi.py
+++ b/gevent/pywsgi.py
@@ -145,6 +145,7 @@ class Input(object):
            # Either Content-Length or "Transfer-Encoding: chunked" must be present in a request with a body
            # if it was chunked, then this function would have not been called
            return b''
+
        self._send_100_continue()
        left = content_length - self.position
        if length is None:
@@ -153,7 +154,31 @@ class Input(object):
            length = left
        if not length:
            return b''
+
+        # On Python 2, self.rfile is usually socket.makefile(), which
+        # uses cStringIO.StringIO. If *length* is greater than the C
+        # sizeof(int) (typically 32 bits signed), parsing the argument to
+        # readline raises OverflowError. StringIO.read(), OTOH, uses
+        # PySize_t, typically a long (64 bits). In a bare readline()
+        # case, because the header lines we're trying to read with
+        # readline are typically expected to be small, we can correct
+        # that failure by simply doing a smaller call to readline and
+        # appending; failures in read we let propagate.
+        try:
            read = reader(length)
+        except OverflowError:
+            if not use_readline:
+                # Expecting to read more than 64 bits of data. Ouch!
+                raise
+            # We could loop on calls to smaller readline(), appending them
+            # until we actually get a newline. For uses in this module,
+            # we expect the actual length to be small, but WSGI applications
+            # are allowed to pass in an arbitrary length. (This loop isn't optimal,
+            # but even client applications *probably* have short lines.)
+            read = b''
+            while len(read) < length and not read.endswith(b'\n'):
+                read += reader(MAX_REQUEST_LINE)
+
        self.position += len(read)
        if len(read) < length:
            if (use_readline and not read.endswith(b"\n")) or not use_readline:

--- a/greentest/test__pywsgi.py
+++ b/greentest/test__pywsgi.py
@@ -29,7 +29,11 @@ except ImportError:
 import os
 import sys
 try:
-    from StringIO import StringIO
+    # On Python 2, we want the C-optimized version if
+    # available; it has different corner-case behaviour than
+    # the Python implementation, and it used by socket.makefile
+    # by default.
+    from cStringIO import StringIO
 except ImportError:
    from io import BytesIO as StringIO
 import weakref
@@ -42,7 +46,7 @@ except ImportError:

 import greentest
 import gevent
-from gevent.hub import PY3
+from gevent.hub import PY3, PYPY
 from gevent import socket
 from gevent import pywsgi
 from gevent.pywsgi import Input
@@ -1432,6 +1436,37 @@ class TestInputRaw(greentest.BaseTestCase):
        i = self.make_input("2\r\n1", chunked_input=True)
        self.assertRaises(IOError, i.readline)

+    def test_32bit_overflow(self):
+        # https://github.com/gevent/gevent/issues/289
+        # Should not raise an OverflowError on Python 2
+        data = b'asdf\nghij\n'
+        long_data = b'a' * (pywsgi.MAX_REQUEST_LINE + 10)
+        long_data += b'\n'
+        data = data + long_data
+        partial_data = b'qjk\n' # Note terminating \n
+        n = 25 * 1000000000
+        if hasattr(n, 'bit_length'):
+            self.assertEqual(n.bit_length(), 35)
+        if not PY3 and not PYPY:
+            # Make sure we have the impl we think we do
+            self.assertRaises(OverflowError, StringIO(data).readline, n)
+
+        i = self.make_input(data, content_length=n)
+        # No size hint, but we have too large a content_length to fit
+        self.assertEqual(i.readline(), b'asdf\n')
+        # Large size hint
+        self.assertEqual(i.readline(n), b'ghij\n')
+        self.assertEqual(i.readline(n), long_data)
+
+        # Now again with the real content length, assuring we can't read past it
+        i = self.make_input(data + partial_data, content_length=len(data) + 1)
+        self.assertEqual(i.readline(), b'asdf\n')
+        self.assertEqual(i.readline(n), b'ghij\n')
+        self.assertEqual(i.readline(n), long_data)
+        # Now we've reached content_length so we shouldn't be able to
+        # read anymore except the one byte remaining
+        self.assertEqual(i.readline(n), b'q')
+

 class Test414(TestCase):