Commit be540604 authored by Jason Madden's avatar Jason Madden Committed by GitHub

Merge pull request #1040 from gevent/issue1039

Make universal newlines with FileObjectPosix return str on Python 2 i…
parents 0933fdf8 9f28ebf4
...@@ -59,6 +59,16 @@ ...@@ -59,6 +59,16 @@
an encoding has been specified. Initial patch in :pr:`939` by an encoding has been specified. Initial patch in :pr:`939` by
William Grzybowski. William Grzybowski.
- :meth:`gevent.subprocess.Popen.communicate` (and in general,
accessing ``Popen.stdout`` and ``Popen.stderr``) returns the correct
type of str (bytes) in universal newline mode under Python 2.
Previously it always returned unicode strings. Reported in
:issue:`1039` by Michal Petrucha.
- :class:`gevent.fileobject.FileObjectPosix` returns native strings in
universal newline mode on Python 2. This is consistent with what
:class:`.FileObjectThread` does. See :issue:`1039`.
- Monkey-patching after the :mod:`ssl` module has been imported now - Monkey-patching after the :mod:`ssl` module has been imported now
prints a warning because this can produce ``RecursionError``. prints a warning because this can produce ``RecursionError``.
......
...@@ -53,6 +53,9 @@ class FileObjectBase(object): ...@@ -53,6 +53,9 @@ class FileObjectBase(object):
# Whether we are translating universal newlines or not. # Whether we are translating universal newlines or not.
_translate = False _translate = False
_translate_encoding = None
_translate_errors = None
def __init__(self, io, closefd): def __init__(self, io, closefd):
""" """
:param io: An io.IOBase-like object. :param io: An io.IOBase-like object.
...@@ -63,8 +66,9 @@ class FileObjectBase(object): ...@@ -63,8 +66,9 @@ class FileObjectBase(object):
self._close = closefd self._close = closefd
if self._translate: if self._translate:
# This automatically handles delegation. # This automatically handles delegation by assigning to
self.translate_newlines(None) # self.io
self.translate_newlines(None, self._translate_encoding, self._translate_errors)
else: else:
self._do_delegate_methods() self._do_delegate_methods()
......
...@@ -206,12 +206,20 @@ class FileObjectPosix(FileObjectBase): ...@@ -206,12 +206,20 @@ class FileObjectPosix(FileObjectBase):
put in non-blocking mode using :func:`gevent.os.make_nonblocking`. put in non-blocking mode using :func:`gevent.os.make_nonblocking`.
:keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb" :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb"
(where the "b" or "U" can be omitted). (where the "b" or "U" can be omitted).
If "U" is part of the mode, IO will be done on text, otherwise bytes. If "U" is part of the mode, universal newlines will be used. On Python 2,
if 't' is not in the mode, this will result in returning byte (native) strings;
putting 't' in the mode will return text strings. This may cause
:exc:`UnicodeDecodeError` to be raised.
:keyword int bufsize: If given, the size of the buffer to use. The default :keyword int bufsize: If given, the size of the buffer to use. The default
value means to use a platform-specific default value means to use a platform-specific default
Other values are interpreted as for the :mod:`io` package. Other values are interpreted as for the :mod:`io` package.
Buffering is ignored in text mode. Buffering is ignored in text mode.
.. versionchanged:: 1.3a1
On Python 2, enabling universal newlines no longer forces unicode
IO.
.. versionchanged:: 1.2a1 .. versionchanged:: 1.2a1
A bufsize of 0 in write mode is no longer forced to be 1. A bufsize of 0 in write mode is no longer forced to be 1.
...@@ -234,10 +242,31 @@ class FileObjectPosix(FileObjectBase): ...@@ -234,10 +242,31 @@ class FileObjectPosix(FileObjectBase):
mode = (mode or 'rb').replace('b', '') mode = (mode or 'rb').replace('b', '')
if 'U' in mode: if 'U' in mode:
self._translate = True self._translate = True
if bytes is str and 't' not in mode:
# We're going to be producing unicode objects, but
# universal newlines doesn't do that in the stdlib,
# so fix that to return str objects. The fix is two parts:
# first, set an encoding on the stream that can round-trip
# all bytes, and second, decode all bytes once they've been read.
self._translate_encoding = 'latin-1'
import functools
def wrap_method(m):
if m.__name__.startswith("read"):
@functools.wraps(m)
def wrapped(*args, **kwargs):
result = m(*args, **kwargs)
assert isinstance(result, unicode)
return result.encode('latin-1')
return wrapped
return m
self._wrap_method = wrap_method
mode = mode.replace('U', '') mode = mode.replace('U', '')
else: else:
self._translate = False self._translate = False
mode = mode.replace('t', '')
if len(mode) != 1 and mode not in 'rw': # pragma: no cover if len(mode) != 1 and mode not in 'rw': # pragma: no cover
# Python 3 builtin `open` raises a ValueError for invalid modes; # Python 3 builtin `open` raises a ValueError for invalid modes;
# Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
......
...@@ -85,6 +85,30 @@ class Test(greentest.TestCase): ...@@ -85,6 +85,30 @@ class Test(greentest.TestCase):
else: else:
self.assertEqual(stderr, b"pineapple") self.assertEqual(stderr, b"pineapple")
@greentest.skipIf(subprocess.mswindows,
"Windows does weird things here")
def test_communicate_universal(self):
# Native string all the things. See https://github.com/gevent/gevent/issues/1039
p = subprocess.Popen(
[
sys.executable, "-c",
'import sys,os;'
'sys.stderr.write("pineapple\\r\\n\\xff\\xff\\xf2\\xf9\\r\\n");'
'sys.stdout.write(sys.stdin.read())'
],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
(stdout, stderr) = p.communicate('banana\r\n\xff\xff\xf2\xf9\r\n')
self.assertIsInstance(stdout, str)
self.assertIsInstance(stderr, str)
self.assertEqual(stdout,
'banana\n\xff\xff\xf2\xf9\n')
self.assertEqual(stderr,
'pineapple\n\xff\xff\xf2\xf9\n')
def test_universal1(self): def test_universal1(self):
p = subprocess.Popen([sys.executable, "-c", p = subprocess.Popen([sys.executable, "-c",
'import sys,os;' + SETBINARY + 'import sys,os;' + SETBINARY +
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment