Commit 711af3ae authored by Antoine Pitrou's avatar Antoine Pitrou

#5502: accelerate binary buffered IO (especially small operations).

On a suggestion by Victor Stinner.
parent aa4398b6
...@@ -174,7 +174,7 @@ PyTypeObject PyBufferedIOBase_Type = { ...@@ -174,7 +174,7 @@ PyTypeObject PyBufferedIOBase_Type = {
0, /* tp_alloc */ 0, /* tp_alloc */
0, /* tp_new */ 0, /* tp_new */
}; };
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD
...@@ -184,6 +184,10 @@ typedef struct { ...@@ -184,6 +184,10 @@ typedef struct {
int readable; int readable;
int writable; int writable;
/* True if this is a vanilla Buffered object (rather than a user derived
class) *and* the raw stream is a vanilla FileIO object. */
int fast_closed_checks;
/* Absolute position inside the raw stream (-1 if unknown). */ /* Absolute position inside the raw stream (-1 if unknown). */
Py_off_t abs_pos; Py_off_t abs_pos;
...@@ -268,6 +272,18 @@ typedef struct { ...@@ -268,6 +272,18 @@ typedef struct {
return -1; \ return -1; \
} }
#define IS_CLOSED(self) \
(self->fast_closed_checks \
? _PyFileIO_closed(self->raw) \
: BufferedIOMixin_closed(self))
#define CHECK_CLOSED(self, error_msg) \
if (IS_CLOSED(self)) { \
PyErr_SetString(PyExc_ValueError, error_msg); \
return NULL; \
}
#define VALID_READ_BUFFER(self) \ #define VALID_READ_BUFFER(self) \
(self->readable && self->read_end != -1) (self->readable && self->read_end != -1)
...@@ -466,8 +482,8 @@ BufferedIOMixin_isatty(BufferedObject *self, PyObject *args) ...@@ -466,8 +482,8 @@ BufferedIOMixin_isatty(BufferedObject *self, PyObject *args)
CHECK_INITIALIZED(self) CHECK_INITIALIZED(self)
return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_isatty, NULL); return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_isatty, NULL);
} }
/* Forward decls */ /* Forward decls */
static PyObject * static PyObject *
_BufferedWriter_flush_unlocked(BufferedObject *, int); _BufferedWriter_flush_unlocked(BufferedObject *, int);
...@@ -480,7 +496,11 @@ _BufferedWriter_reset_buf(BufferedObject *self); ...@@ -480,7 +496,11 @@ _BufferedWriter_reset_buf(BufferedObject *self);
static PyObject * static PyObject *
_BufferedReader_peek_unlocked(BufferedObject *self, Py_ssize_t); _BufferedReader_peek_unlocked(BufferedObject *self, Py_ssize_t);
static PyObject * static PyObject *
_BufferedReader_read_unlocked(BufferedObject *self, Py_ssize_t); _BufferedReader_read_all(BufferedObject *self);
static PyObject *
_BufferedReader_read_fast(BufferedObject *self, Py_ssize_t);
static PyObject *
_BufferedReader_read_generic(BufferedObject *self, Py_ssize_t);
/* /*
...@@ -509,8 +529,8 @@ _Buffered_check_blocking_error(void) ...@@ -509,8 +529,8 @@ _Buffered_check_blocking_error(void)
static Py_off_t static Py_off_t
_Buffered_raw_tell(BufferedObject *self) _Buffered_raw_tell(BufferedObject *self)
{ {
PyObject *res;
Py_off_t n; Py_off_t n;
PyObject *res;
res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_tell, NULL); res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_tell, NULL);
if (res == NULL) if (res == NULL)
return -1; return -1;
...@@ -604,10 +624,7 @@ Buffered_flush(BufferedObject *self, PyObject *args) ...@@ -604,10 +624,7 @@ Buffered_flush(BufferedObject *self, PyObject *args)
PyObject *res; PyObject *res;
CHECK_INITIALIZED(self) CHECK_INITIALIZED(self)
if (BufferedIOMixin_closed(self)) { CHECK_CLOSED(self, "flush of closed file")
PyErr_SetString(PyExc_ValueError, "flush of closed file");
return NULL;
}
ENTER_BUFFERED(self) ENTER_BUFFERED(self)
res = _BufferedWriter_flush_unlocked(self, 0); res = _BufferedWriter_flush_unlocked(self, 0);
...@@ -667,14 +684,23 @@ Buffered_read(BufferedObject *self, PyObject *args) ...@@ -667,14 +684,23 @@ Buffered_read(BufferedObject *self, PyObject *args)
return NULL; return NULL;
} }
if (BufferedIOMixin_closed(self)) { CHECK_CLOSED(self, "read of closed file")
PyErr_SetString(PyExc_ValueError, "read of closed file");
return NULL;
}
if (n == -1) {
/* The number of bytes is unspecified, read until the end of stream */
ENTER_BUFFERED(self)
res = _BufferedReader_read_all(self);
LEAVE_BUFFERED(self)
}
else {
res = _BufferedReader_read_fast(self, n);
if (res == Py_None) {
Py_DECREF(res);
ENTER_BUFFERED(self) ENTER_BUFFERED(self)
res = _BufferedReader_read_unlocked(self, n); res = _BufferedReader_read_generic(self, n);
LEAVE_BUFFERED(self) LEAVE_BUFFERED(self)
}
}
return res; return res;
} }
...@@ -775,35 +801,31 @@ _Buffered_readline(BufferedObject *self, Py_ssize_t limit) ...@@ -775,35 +801,31 @@ _Buffered_readline(BufferedObject *self, Py_ssize_t limit)
Py_ssize_t n, written = 0; Py_ssize_t n, written = 0;
const char *start, *s, *end; const char *start, *s, *end;
if (BufferedIOMixin_closed(self)) { CHECK_CLOSED(self, "readline of closed file")
PyErr_SetString(PyExc_ValueError, "readline of closed file");
return NULL;
}
ENTER_BUFFERED(self)
/* First, try to find a line in the buffer */ /* First, try to find a line in the buffer. This can run unlocked because
the calls to the C API are simple enough that they can't trigger
any thread switch. */
n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
if (limit >= 0 && n > limit) if (limit >= 0 && n > limit)
n = limit; n = limit;
start = self->buffer + self->pos; start = self->buffer + self->pos;
end = start + n; s = memchr(start, '\n', n);
s = start; if (s != NULL) {
while (s < end) { res = PyBytes_FromStringAndSize(start, s - start + 1);
if (*s++ == '\n') {
res = PyBytes_FromStringAndSize(start, s - start);
if (res != NULL) if (res != NULL)
self->pos += s - start; self->pos += s - start + 1;
goto end; goto end_unlocked;
}
} }
if (n == limit) { if (n == limit) {
res = PyBytes_FromStringAndSize(start, n); res = PyBytes_FromStringAndSize(start, n);
if (res != NULL) if (res != NULL)
self->pos += n; self->pos += n;
goto end; goto end_unlocked;
} }
ENTER_BUFFERED(self)
/* Now we try to get some more from the raw stream */ /* Now we try to get some more from the raw stream */
if (self->writable) { if (self->writable) {
res = _BufferedWriter_flush_unlocked(self, 1); res = _BufferedWriter_flush_unlocked(self, 1);
...@@ -875,6 +897,7 @@ found: ...@@ -875,6 +897,7 @@ found:
end: end:
LEAVE_BUFFERED(self) LEAVE_BUFFERED(self)
end_unlocked:
Py_XDECREF(chunks); Py_XDECREF(chunks);
return res; return res;
} }
...@@ -918,23 +941,26 @@ Buffered_seek(BufferedObject *self, PyObject *args) ...@@ -918,23 +941,26 @@ Buffered_seek(BufferedObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "O|i:seek", &targetobj, &whence)) { if (!PyArg_ParseTuple(args, "O|i:seek", &targetobj, &whence)) {
return NULL; return NULL;
} }
if (whence < 0 || whence > 2) { if (whence < 0 || whence > 2) {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"whence must be between 0 and 2, not %d", whence); "whence must be between 0 and 2, not %d", whence);
return NULL; return NULL;
} }
CHECK_CLOSED(self, "seek of closed file")
target = PyNumber_AsOff_t(targetobj, PyExc_ValueError); target = PyNumber_AsOff_t(targetobj, PyExc_ValueError);
if (target == -1 && PyErr_Occurred()) if (target == -1 && PyErr_Occurred())
return NULL; return NULL;
ENTER_BUFFERED(self)
if (whence != 2 && self->readable) { if (whence != 2 && self->readable) {
Py_off_t current, avail; Py_off_t current, avail;
/* Check if seeking leaves us inside the current buffer, /* Check if seeking leaves us inside the current buffer,
so as to return quickly if possible. so as to return quickly if possible. Also, we needn't take the
lock in this fast path.
Don't know how to do that when whence == 2, though. */ Don't know how to do that when whence == 2, though. */
/* NOTE: RAW_TELL() can release the GIL but the object is in a stable
state at this point. */
current = RAW_TELL(self); current = RAW_TELL(self);
avail = READAHEAD(self); avail = READAHEAD(self);
if (avail > 0) { if (avail > 0) {
...@@ -945,12 +971,13 @@ Buffered_seek(BufferedObject *self, PyObject *args) ...@@ -945,12 +971,13 @@ Buffered_seek(BufferedObject *self, PyObject *args)
offset = target; offset = target;
if (offset >= -self->pos && offset <= avail) { if (offset >= -self->pos && offset <= avail) {
self->pos += offset; self->pos += offset;
res = PyLong_FromOff_t(current - avail + offset); return PyLong_FromOff_t(current - avail + offset);
goto end;
} }
} }
} }
ENTER_BUFFERED(self)
/* Fallback: invoke raw seek() method and clear buffer */ /* Fallback: invoke raw seek() method and clear buffer */
if (self->writable) { if (self->writable) {
res = _BufferedWriter_flush_unlocked(self, 0); res = _BufferedWriter_flush_unlocked(self, 0);
...@@ -1094,6 +1121,9 @@ BufferedReader_init(BufferedObject *self, PyObject *args, PyObject *kwds) ...@@ -1094,6 +1121,9 @@ BufferedReader_init(BufferedObject *self, PyObject *args, PyObject *kwds)
return -1; return -1;
_BufferedReader_reset_buf(self); _BufferedReader_reset_buf(self);
self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedReader_Type &&
Py_TYPE(raw) == &PyFileIO_Type);
self->ok = 1; self->ok = 1;
return 0; return 0;
} }
...@@ -1150,21 +1180,17 @@ _BufferedReader_fill_buffer(BufferedObject *self) ...@@ -1150,21 +1180,17 @@ _BufferedReader_fill_buffer(BufferedObject *self)
} }
static PyObject * static PyObject *
_BufferedReader_read_unlocked(BufferedObject *self, Py_ssize_t n) _BufferedReader_read_all(BufferedObject *self)
{ {
PyObject *data, *res = NULL; Py_ssize_t current_size;
Py_ssize_t current_size, remaining, written; PyObject *res, *data = NULL;
char *out;
/* Special case for when the number of bytes to read is unspecified. */
if (n == -1) {
PyObject *chunks = PyList_New(0); PyObject *chunks = PyList_New(0);
if (chunks == NULL) if (chunks == NULL)
return NULL; return NULL;
/* First copy what we have in the current buffer. */ /* First copy what we have in the current buffer. */
current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
data = NULL;
if (current_size) { if (current_size) {
data = PyBytes_FromStringAndSize( data = PyBytes_FromStringAndSize(
self->buffer + self->pos, current_size); self->buffer + self->pos, current_size);
...@@ -1221,22 +1247,40 @@ _BufferedReader_read_unlocked(BufferedObject *self, Py_ssize_t n) ...@@ -1221,22 +1247,40 @@ _BufferedReader_read_unlocked(BufferedObject *self, Py_ssize_t n)
if (self->abs_pos != -1) if (self->abs_pos != -1)
self->abs_pos += PyBytes_GET_SIZE(data); self->abs_pos += PyBytes_GET_SIZE(data);
} }
} }
/* Read n bytes from the buffer if it can, otherwise return None.
This function is simple enough that it can run unlocked. */
static PyObject *
_BufferedReader_read_fast(BufferedObject *self, Py_ssize_t n)
{
Py_ssize_t current_size;
/* The number of bytes to read is specified, return at most n bytes. */
current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
if (n <= current_size) { if (n <= current_size) {
/* Fast path: the data to read is fully buffered. */ /* Fast path: the data to read is fully buffered. */
res = PyBytes_FromStringAndSize(self->buffer + self->pos, n); PyObject *res = PyBytes_FromStringAndSize(self->buffer + self->pos, n);
if (res == NULL) if (res != NULL)
goto error;
self->pos += n; self->pos += n;
return res; return res;
} }
Py_RETURN_NONE;
}
/* Slow path: read from the stream until enough bytes are read, /* Generic read function: read from the stream until enough bytes are read,
* or until an EOF occurs or until read() would block. * or until an EOF occurs or until read() would block.
*/ */
static PyObject *
_BufferedReader_read_generic(BufferedObject *self, Py_ssize_t n)
{
PyObject *res = NULL;
Py_ssize_t current_size, remaining, written;
char *out;
current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
if (n <= current_size)
return _BufferedReader_read_fast(self, n);
res = PyBytes_FromStringAndSize(NULL, n); res = PyBytes_FromStringAndSize(NULL, n);
if (res == NULL) if (res == NULL)
goto error; goto error;
...@@ -1479,6 +1523,9 @@ BufferedWriter_init(BufferedObject *self, PyObject *args, PyObject *kwds) ...@@ -1479,6 +1523,9 @@ BufferedWriter_init(BufferedObject *self, PyObject *args, PyObject *kwds)
_BufferedWriter_reset_buf(self); _BufferedWriter_reset_buf(self);
self->pos = 0; self->pos = 0;
self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedWriter_Type &&
Py_TYPE(raw) == &PyFileIO_Type);
self->ok = 1; self->ok = 1;
return 0; return 0;
} }
...@@ -1583,7 +1630,7 @@ BufferedWriter_write(BufferedObject *self, PyObject *args) ...@@ -1583,7 +1630,7 @@ BufferedWriter_write(BufferedObject *self, PyObject *args)
return NULL; return NULL;
} }
if (BufferedIOMixin_closed(self)) { if (IS_CLOSED(self)) {
PyErr_SetString(PyExc_ValueError, "write to closed file"); PyErr_SetString(PyExc_ValueError, "write to closed file");
PyBuffer_Release(&buf); PyBuffer_Release(&buf);
return NULL; return NULL;
...@@ -2066,6 +2113,9 @@ BufferedRandom_init(BufferedObject *self, PyObject *args, PyObject *kwds) ...@@ -2066,6 +2113,9 @@ BufferedRandom_init(BufferedObject *self, PyObject *args, PyObject *kwds)
_BufferedWriter_reset_buf(self); _BufferedWriter_reset_buf(self);
self->pos = 0; self->pos = 0;
self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedRandom_Type &&
Py_TYPE(raw) == &PyFileIO_Type);
self->ok = 1; self->ok = 1;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment