Commit 6d43a1f4 authored by Jason Madden's avatar Jason Madden

Make universal newlines with FileObjectPosix return str on Python 2 instead of unicode. Fixes #1039

parent 0933fdf8
......@@ -59,6 +59,16 @@
an encoding has been specified. Initial patch in :pr:`939` by
William Grzybowski.
- :meth:`gevent.subprocess.Popen.communicate` (and in general,
accessing ``Popen.stdout`` and ``Popen.stderr``) returns the correct
type of str (bytes) in universal newline mode under Python 2.
Previously it always returned unicode strings. Reported in
:issue:`1039` by Michal Petrucha.
- :class:`gevent.fileobject.FileObjectPosix` returns native strings in
universal newline mode on Python 2. This is consistent with what
:class:`.FileObjectThread` does. See :issue:`1039`.
- Monkey-patching after the :mod:`ssl` module has been imported now
prints a warning because this can produce ``RecursionError``.
......
......@@ -53,6 +53,9 @@ class FileObjectBase(object):
# Whether we are translating universal newlines or not.
_translate = False
_translate_encoding = None
_translate_errors = None
def __init__(self, io, closefd):
"""
:param io: An io.IOBase-like object.
......@@ -63,8 +66,9 @@ class FileObjectBase(object):
self._close = closefd
if self._translate:
# This automatically handles delegation.
self.translate_newlines(None)
# This automatically handles delegation by assigning to
# self.io
self.translate_newlines(None, self._translate_encoding, self._translate_errors)
else:
self._do_delegate_methods()
......
......@@ -206,12 +206,20 @@ class FileObjectPosix(FileObjectBase):
put in non-blocking mode using :func:`gevent.os.make_nonblocking`.
:keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb"
(where the "b" or "U" can be omitted).
If "U" is part of the mode, IO will be done on text, otherwise bytes.
If "U" is part of the mode, universal newlines will be used. On Python 2,
if 't' is not in the mode, this will result in returning byte (native) strings;
putting 't' in the mode will return text strings. This may cause
:exc:`UnicodeDecodeError` to be raised.
:keyword int bufsize: If given, the size of the buffer to use. The default
value means to use a platform-specific default
Other values are interpreted as for the :mod:`io` package.
Buffering is ignored in text mode.
.. versionchanged:: 1.3a1
On Python 2, enabling universal newlines no longer forces unicode
IO.
.. versionchanged:: 1.2a1
A bufsize of 0 in write mode is no longer forced to be 1.
......@@ -234,10 +242,30 @@ class FileObjectPosix(FileObjectBase):
mode = (mode or 'rb').replace('b', '')
if 'U' in mode:
self._translate = True
if bytes is str and 't' not in mode:
# We're going to be producing unicode objects, but
# universal newlines doesn't do that in the stdlib,
# so fix that to return str objects. The fix is two parts:
# first, set an encoding on the stream that can round-trip
# all bytes, and second, decode all bytes once they've been read.
self._translate_encoding = 'latin-1'
import functools
def wrap_method(m):
if m.__name__.startswith("read"):
@functools.wraps(m)
def wrapped(*args, **kwargs):
result = m(*args, **kwargs)
assert isinstance(result, unicode)
return result.encode('latin-1')
return wrapped
return m
self._wrap_method = wrap_method
mode = mode.replace('U', '')
else:
self._translate = False
mode = mode.replace('t', '')
if len(mode) != 1 and mode not in 'rw': # pragma: no cover
# Python 3 builtin `open` raises a ValueError for invalid modes;
# Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
......
......@@ -85,6 +85,28 @@ class Test(greentest.TestCase):
else:
self.assertEqual(stderr, b"pineapple")
def test_communicate_universal(self):
# Native string all the things. See https://github.com/gevent/gevent/issues/1039
p = subprocess.Popen(
[
sys.executable, "-c",
'import sys,os;'
'sys.stderr.write("pineapple\\r\\n\\xff\\xff\\xf2\\xf9\\r\\n");'
'sys.stdout.write(sys.stdin.read())'
],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
(stdout, stderr) = p.communicate('banana\r\n\xff\xff\xf2\xf9\r\n')
self.assertIsInstance(stdout, str)
self.assertIsInstance(stderr, str)
self.assertEqual(stdout,
'banana\n\xff\xff\xf2\xf9\n')
self.assertEqual(stderr,
'pineapple\n\xff\xff\xf2\xf9\n')
def test_universal1(self):
p = subprocess.Popen([sys.executable, "-c",
'import sys,os;' + SETBINARY +
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment