Commit b2ac4d69 authored by Gregory P. Smith's avatar Gregory P. Smith

Fixes issue #12268 for file readline, readlines and read() and readinto methods.

They no longer lose data when an underlying read system call is interrupted.
IOError is no longer raised due to a read system call returning EINTR from
within these methods.
parent ed04f42b
...@@ -2,6 +2,9 @@ import sys ...@@ -2,6 +2,9 @@ import sys
import os import os
import unittest import unittest
import itertools import itertools
import select
import signal
import subprocess
import time import time
from array import array from array import array
from weakref import proxy from weakref import proxy
...@@ -602,6 +605,148 @@ class FileThreadingTests(unittest.TestCase): ...@@ -602,6 +605,148 @@ class FileThreadingTests(unittest.TestCase):
self._test_close_open_io(io_func) self._test_close_open_io(io_func)
@unittest.skipUnless(os.name == 'posix', 'test requires a posix system.')
class TestFileSignalEINTR(unittest.TestCase):
def _test_reading(self, data_to_write, read_and_verify_code, method_name,
universal_newlines=False):
"""Generic buffered read method test harness to verify EINTR behavior.
Also validates that Python signal handlers are run during the read.
Args:
data_to_write: String to write to the child process for reading
before sending it a signal, confirming the signal was handled,
writing a final newline char and closing the infile pipe.
read_and_verify_code: Single "line" of code to read from a file
object named 'infile' and validate the result. This will be
executed as part of a python subprocess fed data_to_write.
method_name: The name of the read method being tested, for use in
an error message on failure.
universal_newlines: If True, infile will be opened in universal
newline mode in the child process.
"""
if universal_newlines:
# Test the \r\n -> \n conversion while we're at it.
data_to_write = data_to_write.replace('\n', '\r\n')
infile_setup_code = 'infile = os.fdopen(sys.stdin.fileno(), "rU")'
else:
infile_setup_code = 'infile = sys.stdin'
# Total pipe IO in this function is smaller than the minimum posix OS
# pipe buffer size of 512 bytes. No writer should block.
assert len(data_to_write) < 512, 'data_to_write must fit in pipe buf.'
child_code = (
'import os, signal, sys ;'
'signal.signal('
'signal.SIGINT, lambda s, f: sys.stderr.write("$\\n")) ;'
+ infile_setup_code + ' ;' +
'assert isinstance(infile, file) ;'
'sys.stderr.write("Go.\\n") ;'
+ read_and_verify_code)
reader_process = subprocess.Popen(
[sys.executable, '-c', child_code],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# Wait for the signal handler to be installed.
go = reader_process.stderr.read(4)
if go != 'Go.\n':
reader_process.kill()
self.fail('Error from %s process while awaiting "Go":\n%s' % (
method_name, go+reader_process.stderr.read()))
reader_process.stdin.write(data_to_write)
signals_sent = 0
rlist = []
# We don't know when the read_and_verify_code in our child is actually
# executing within the read system call we want to interrupt. This
# loop waits for a bit before sending the first signal to increase
# the likelihood of that. Implementations without correct EINTR
# and signal handling usually fail this test.
while not rlist:
rlist, _, _ = select.select([reader_process.stderr], (), (), 0.05)
reader_process.send_signal(signal.SIGINT)
# Give the subprocess time to handle it before we loop around and
# send another one. On OSX the second signal happening close to
# immediately after the first was causing the subprocess to crash
# via the OS's default SIGINT handler.
time.sleep(0.1)
signals_sent += 1
if signals_sent > 200:
reader_process.kill()
self.fail("failed to handle signal during %s." % method_name)
# This assumes anything unexpected that writes to stderr will also
# write a newline. That is true of the traceback printing code.
signal_line = reader_process.stderr.readline()
if signal_line != '$\n':
reader_process.kill()
self.fail('Error from %s process while awaiting signal:\n%s' % (
method_name, signal_line+reader_process.stderr.read()))
# We append a newline to our input so that a readline call can
# end on its own before the EOF is seen.
stdout, stderr = reader_process.communicate(input='\n')
if reader_process.returncode != 0:
self.fail('%s() process exited rc=%d.\nSTDOUT:\n%s\nSTDERR:\n%s' % (
method_name, reader_process.returncode, stdout, stderr))
def test_readline(self, universal_newlines=False):
"""file.readline must handle signals and not lose data."""
self._test_reading(
data_to_write='hello, world!',
read_and_verify_code=(
'line = infile.readline() ;'
'expected_line = "hello, world!\\n" ;'
'assert line == expected_line, ('
'"read %r expected %r" % (line, expected_line))'
),
method_name='readline',
universal_newlines=universal_newlines)
def test_readline_with_universal_newlines(self):
self.test_readline(universal_newlines=True)
def test_readlines(self, universal_newlines=False):
"""file.readlines must handle signals and not lose data."""
self._test_reading(
data_to_write='hello\nworld!',
read_and_verify_code=(
'lines = infile.readlines() ;'
'expected_lines = ["hello\\n", "world!\\n"] ;'
'assert lines == expected_lines, ('
'"readlines returned wrong data.\\n" '
'"got lines %r\\nexpected %r" '
'% (lines, expected_lines))'
),
method_name='readlines',
universal_newlines=universal_newlines)
def test_readlines_with_universal_newlines(self):
self.test_readlines(universal_newlines=True)
def test_readall(self):
"""Unbounded file.read() must handle signals and not lose data."""
self._test_reading(
data_to_write='hello, world!abcdefghijklm',
read_and_verify_code=(
'data = infile.read() ;'
'expected_data = "hello, world!abcdefghijklm\\n";'
'assert data == expected_data, ('
'"read %r expected %r" % (data, expected_data))'
),
method_name='unbounded read')
def test_readinto(self):
"""file.readinto must handle signals and not lose data."""
self._test_reading(
data_to_write='hello, world!',
read_and_verify_code=(
'data = bytearray(50) ;'
'num_read = infile.readinto(data) ;'
'expected_data = "hello, world!\\n";'
'assert data[:num_read] == expected_data, ('
'"read %r expected %r" % (data, expected_data))'
),
method_name='readinto')
class StdoutTests(unittest.TestCase): class StdoutTests(unittest.TestCase):
def test_move_stdout_on_write(self): def test_move_stdout_on_write(self):
...@@ -678,7 +823,7 @@ def test_main(): ...@@ -678,7 +823,7 @@ def test_main():
# So get rid of it no matter what. # So get rid of it no matter what.
try: try:
run_unittest(AutoFileTests, OtherFileTests, FileSubclassTests, run_unittest(AutoFileTests, OtherFileTests, FileSubclassTests,
FileThreadingTests, StdoutTests) FileThreadingTests, TestFileSignalEINTR, StdoutTests)
finally: finally:
if os.path.exists(TESTFN): if os.path.exists(TESTFN):
os.unlink(TESTFN) os.unlink(TESTFN)
......
...@@ -9,6 +9,11 @@ What's New in Python 2.7.4 ...@@ -9,6 +9,11 @@ What's New in Python 2.7.4
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #12268: File readline, readlines and read() methods no longer lose
data when an underlying read system call is interrupted. IOError is no
longer raised due to a read system call returning EINTR from within these
methods.
- Issue #10053: Don't close FDs when FileIO.__init__ fails. Loosely based on - Issue #10053: Don't close FDs when FileIO.__init__ fails. Loosely based on
the work by Hirokazu Yamamoto. the work by Hirokazu Yamamoto.
......
...@@ -1080,12 +1080,23 @@ file_read(PyFileObject *f, PyObject *args) ...@@ -1080,12 +1080,23 @@ file_read(PyFileObject *f, PyObject *args)
return NULL; return NULL;
bytesread = 0; bytesread = 0;
for (;;) { for (;;) {
int interrupted;
FILE_BEGIN_ALLOW_THREADS(f) FILE_BEGIN_ALLOW_THREADS(f)
errno = 0; errno = 0;
chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread, chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
buffersize - bytesread, f->f_fp, (PyObject *)f); buffersize - bytesread, f->f_fp, (PyObject *)f);
interrupted = ferror(f->f_fp) && errno == EINTR;
FILE_END_ALLOW_THREADS(f) FILE_END_ALLOW_THREADS(f)
if (interrupted) {
clearerr(f->f_fp);
if (PyErr_CheckSignals()) {
Py_DECREF(v);
return NULL;
}
}
if (chunksize == 0) { if (chunksize == 0) {
if (interrupted)
continue;
if (!ferror(f->f_fp)) if (!ferror(f->f_fp))
break; break;
clearerr(f->f_fp); clearerr(f->f_fp);
...@@ -1100,7 +1111,7 @@ file_read(PyFileObject *f, PyObject *args) ...@@ -1100,7 +1111,7 @@ file_read(PyFileObject *f, PyObject *args)
return NULL; return NULL;
} }
bytesread += chunksize; bytesread += chunksize;
if (bytesread < buffersize) { if (bytesread < buffersize && !interrupted) {
clearerr(f->f_fp); clearerr(f->f_fp);
break; break;
} }
...@@ -1141,12 +1152,23 @@ file_readinto(PyFileObject *f, PyObject *args) ...@@ -1141,12 +1152,23 @@ file_readinto(PyFileObject *f, PyObject *args)
ntodo = pbuf.len; ntodo = pbuf.len;
ndone = 0; ndone = 0;
while (ntodo > 0) { while (ntodo > 0) {
int interrupted;
FILE_BEGIN_ALLOW_THREADS(f) FILE_BEGIN_ALLOW_THREADS(f)
errno = 0; errno = 0;
nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp, nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
(PyObject *)f); (PyObject *)f);
interrupted = ferror(f->f_fp) && errno == EINTR;
FILE_END_ALLOW_THREADS(f) FILE_END_ALLOW_THREADS(f)
if (interrupted) {
clearerr(f->f_fp);
if (PyErr_CheckSignals()) {
PyBuffer_Release(&pbuf);
return NULL;
}
}
if (nnow == 0) { if (nnow == 0) {
if (interrupted)
continue;
if (!ferror(f->f_fp)) if (!ferror(f->f_fp))
break; break;
PyErr_SetFromErrno(PyExc_IOError); PyErr_SetFromErrno(PyExc_IOError);
...@@ -1434,8 +1456,25 @@ get_line(PyFileObject *f, int n) ...@@ -1434,8 +1456,25 @@ get_line(PyFileObject *f, int n)
*buf++ = c; *buf++ = c;
if (c == '\n') break; if (c == '\n') break;
} }
if ( c == EOF && skipnextlf ) if (c == EOF) {
newlinetypes |= NEWLINE_CR; if (ferror(fp) && errno == EINTR) {
FUNLOCKFILE(fp);
FILE_ABORT_ALLOW_THREADS(f)
f->f_newlinetypes = newlinetypes;
f->f_skipnextlf = skipnextlf;
if (PyErr_CheckSignals()) {
Py_DECREF(v);
return NULL;
}
/* We executed Python signal handlers and got no exception.
* Now back to reading the line where we left off. */
clearerr(fp);
continue;
}
if (skipnextlf)
newlinetypes |= NEWLINE_CR;
}
} else /* If not universal newlines use the normal loop */ } else /* If not universal newlines use the normal loop */
while ((c = GETC(fp)) != EOF && while ((c = GETC(fp)) != EOF &&
(*buf++ = c) != '\n' && (*buf++ = c) != '\n' &&
...@@ -1449,6 +1488,16 @@ get_line(PyFileObject *f, int n) ...@@ -1449,6 +1488,16 @@ get_line(PyFileObject *f, int n)
break; break;
if (c == EOF) { if (c == EOF) {
if (ferror(fp)) { if (ferror(fp)) {
if (errno == EINTR) {
if (PyErr_CheckSignals()) {
Py_DECREF(v);
return NULL;
}
/* We executed Python signal handlers and got no exception.
* Now back to reading the line where we left off. */
clearerr(fp);
continue;
}
PyErr_SetFromErrno(PyExc_IOError); PyErr_SetFromErrno(PyExc_IOError);
clearerr(fp); clearerr(fp);
Py_DECREF(v); Py_DECREF(v);
...@@ -1624,7 +1673,7 @@ file_readlines(PyFileObject *f, PyObject *args) ...@@ -1624,7 +1673,7 @@ file_readlines(PyFileObject *f, PyObject *args)
size_t totalread = 0; size_t totalread = 0;
char *p, *q, *end; char *p, *q, *end;
int err; int err;
int shortread = 0; int shortread = 0; /* bool, did the previous read come up short? */
if (f->f_fp == NULL) if (f->f_fp == NULL)
return err_closed(); return err_closed();
...@@ -1654,6 +1703,14 @@ file_readlines(PyFileObject *f, PyObject *args) ...@@ -1654,6 +1703,14 @@ file_readlines(PyFileObject *f, PyObject *args)
sizehint = 0; sizehint = 0;
if (!ferror(f->f_fp)) if (!ferror(f->f_fp))
break; break;
if (errno == EINTR) {
if (PyErr_CheckSignals()) {
goto error;
}
clearerr(f->f_fp);
shortread = 0;
continue;
}
PyErr_SetFromErrno(PyExc_IOError); PyErr_SetFromErrno(PyExc_IOError);
clearerr(f->f_fp); clearerr(f->f_fp);
goto error; goto error;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment