Commit 972ee13e authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #5506: BytesIO objects now have a getbuffer() method exporting a

view of their contents without duplicating them.  The view is both readable
and writable.
parent 6285774f
...@@ -518,6 +518,24 @@ In many situations, buffered I/O streams will provide higher performance ...@@ -518,6 +518,24 @@ In many situations, buffered I/O streams will provide higher performance
:class:`BytesIO` provides or overrides these methods in addition to those :class:`BytesIO` provides or overrides these methods in addition to those
from :class:`BufferedIOBase` and :class:`IOBase`: from :class:`BufferedIOBase` and :class:`IOBase`:
.. method:: getbuffer()
Return a readable and writable view over the contents of the buffer
without copying them. Also, mutating the view will transparently
update the contents of the buffer::
>>> b = io.BytesIO(b"abcdef")
>>> view = b.getbuffer()
>>> view[2:4] = b"56"
>>> b.getvalue()
b'ab56ef'
.. note::
As long as the view exists, the :class:`BytesIO` object cannot be
resized.
.. versionadded:: 3.2
.. method:: getvalue() .. method:: getvalue()
Return ``bytes`` containing the entire contents of the buffer. Return ``bytes`` containing the entire contents of the buffer.
......
...@@ -785,6 +785,11 @@ class BytesIO(BufferedIOBase): ...@@ -785,6 +785,11 @@ class BytesIO(BufferedIOBase):
raise ValueError("getvalue on closed file") raise ValueError("getvalue on closed file")
return bytes(self._buffer) return bytes(self._buffer)
def getbuffer(self):
"""Return a readable and writable view of the buffer.
"""
return memoryview(self._buffer)
def read(self, n=None): def read(self, n=None):
if self.closed: if self.closed:
raise ValueError("read from closed file") raise ValueError("read from closed file")
......
...@@ -384,7 +384,31 @@ class MemoryTestMixin: ...@@ -384,7 +384,31 @@ class MemoryTestMixin:
del __main__.PickleTestMemIO del __main__.PickleTestMemIO
class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase): class BytesIOMixin:
def test_getbuffer(self):
memio = self.ioclass(b"1234567890")
buf = memio.getbuffer()
self.assertEqual(bytes(buf), b"1234567890")
memio.seek(5)
buf = memio.getbuffer()
self.assertEqual(bytes(buf), b"1234567890")
# Trying to change the size of the BytesIO while a buffer is exported
# raises a BufferError.
self.assertRaises(BufferError, memio.write, b'x' * 100)
self.assertRaises(BufferError, memio.truncate)
# Mutating the buffer updates the BytesIO
buf[3:6] = b"abc"
self.assertEqual(bytes(buf), b"123abc7890")
self.assertEqual(memio.getvalue(), b"123abc7890")
# After the buffer gets released, we can resize the BytesIO again
del buf
support.gc_collect()
memio.truncate()
class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin,
BytesIOMixin, unittest.TestCase):
UnsupportedOperation = pyio.UnsupportedOperation UnsupportedOperation = pyio.UnsupportedOperation
......
...@@ -13,6 +13,10 @@ Core and Builtins ...@@ -13,6 +13,10 @@ Core and Builtins
Library Library
------- -------
- Issue #5506: BytesIO objects now have a getbuffer() method exporting a
view of their contents without duplicating them. The view is both readable
and writable.
- Issue #7566: Implement os.path.sameopenfile for Windows. - Issue #7566: Implement os.path.sameopenfile for Windows.
- Issue #9293: I/O streams now raise ``io.UnsupportedOperation`` when an - Issue #9293: I/O streams now raise ``io.UnsupportedOperation`` when an
......
...@@ -710,6 +710,8 @@ PyInit__io(void) ...@@ -710,6 +710,8 @@ PyInit__io(void)
/* BytesIO */ /* BytesIO */
PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type; PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type;
ADD_TYPE(&PyBytesIO_Type, "BytesIO"); ADD_TYPE(&PyBytesIO_Type, "BytesIO");
if (PyType_Ready(&_PyBytesIOBuffer_Type) < 0)
goto fail;
/* StringIO */ /* StringIO */
PyStringIO_Type.tp_base = &PyTextIOBase_Type; PyStringIO_Type.tp_base = &PyTextIOBase_Type;
......
...@@ -169,3 +169,5 @@ extern PyObject *_PyIO_str_write; ...@@ -169,3 +169,5 @@ extern PyObject *_PyIO_str_write;
extern PyObject *_PyIO_empty_str; extern PyObject *_PyIO_empty_str;
extern PyObject *_PyIO_empty_bytes; extern PyObject *_PyIO_empty_bytes;
extern PyObject *_PyIO_zero; extern PyObject *_PyIO_zero;
extern PyTypeObject _PyBytesIOBuffer_Type;
...@@ -10,8 +10,15 @@ typedef struct { ...@@ -10,8 +10,15 @@ typedef struct {
size_t buf_size; size_t buf_size;
PyObject *dict; PyObject *dict;
PyObject *weakreflist; PyObject *weakreflist;
Py_ssize_t exports;
} bytesio; } bytesio;
typedef struct {
PyObject_HEAD
bytesio *source;
} bytesiobuf;
#define CHECK_CLOSED(self) \ #define CHECK_CLOSED(self) \
if ((self)->buf == NULL) { \ if ((self)->buf == NULL) { \
PyErr_SetString(PyExc_ValueError, \ PyErr_SetString(PyExc_ValueError, \
...@@ -19,6 +26,14 @@ typedef struct { ...@@ -19,6 +26,14 @@ typedef struct {
return NULL; \ return NULL; \
} }
#define CHECK_EXPORTS(self) \
if ((self)->exports > 0) { \
PyErr_SetString(PyExc_BufferError, \
"Existing exports of data: object cannot be re-sized"); \
return NULL; \
}
/* Internal routine to get a line from the buffer of a BytesIO /* Internal routine to get a line from the buffer of a BytesIO
object. Returns the length between the current position to the object. Returns the length between the current position to the
next newline character. */ next newline character. */
...@@ -173,6 +188,30 @@ bytesio_flush(bytesio *self) ...@@ -173,6 +188,30 @@ bytesio_flush(bytesio *self)
Py_RETURN_NONE; Py_RETURN_NONE;
} }
PyDoc_STRVAR(getbuffer_doc,
"getbuffer() -> bytes.\n"
"\n"
"Get a read-write view over the contents of the BytesIO object.");
static PyObject *
bytesio_getbuffer(bytesio *self)
{
PyTypeObject *type = &_PyBytesIOBuffer_Type;
bytesiobuf *buf;
PyObject *view;
CHECK_CLOSED(self);
buf = (bytesiobuf *) type->tp_alloc(type, 0);
if (buf == NULL)
return NULL;
Py_INCREF(self);
buf->source = self;
view = PyMemoryView_FromObject((PyObject *) buf);
Py_DECREF(buf);
return view;
}
PyDoc_STRVAR(getval_doc, PyDoc_STRVAR(getval_doc,
"getvalue() -> bytes.\n" "getvalue() -> bytes.\n"
"\n" "\n"
...@@ -422,6 +461,7 @@ bytesio_truncate(bytesio *self, PyObject *args) ...@@ -422,6 +461,7 @@ bytesio_truncate(bytesio *self, PyObject *args)
PyObject *arg = Py_None; PyObject *arg = Py_None;
CHECK_CLOSED(self); CHECK_CLOSED(self);
CHECK_EXPORTS(self);
if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
return NULL; return NULL;
...@@ -543,6 +583,7 @@ bytesio_write(bytesio *self, PyObject *obj) ...@@ -543,6 +583,7 @@ bytesio_write(bytesio *self, PyObject *obj)
PyObject *result = NULL; PyObject *result = NULL;
CHECK_CLOSED(self); CHECK_CLOSED(self);
CHECK_EXPORTS(self);
if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0) if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0)
return NULL; return NULL;
...@@ -664,6 +705,7 @@ bytesio_setstate(bytesio *self, PyObject *state) ...@@ -664,6 +705,7 @@ bytesio_setstate(bytesio *self, PyObject *state)
Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
return NULL; return NULL;
} }
CHECK_EXPORTS(self);
/* Reset the object to its default state. This is only needed to handle /* Reset the object to its default state. This is only needed to handle
the case of repeated calls to __setstate__. */ the case of repeated calls to __setstate__. */
self->string_size = 0; self->string_size = 0;
...@@ -724,6 +766,11 @@ static void ...@@ -724,6 +766,11 @@ static void
bytesio_dealloc(bytesio *self) bytesio_dealloc(bytesio *self)
{ {
_PyObject_GC_UNTRACK(self); _PyObject_GC_UNTRACK(self);
if (self->exports > 0) {
PyErr_SetString(PyExc_SystemError,
"deallocated BytesIO object has exported buffers");
PyErr_Print();
}
if (self->buf != NULL) { if (self->buf != NULL) {
PyMem_Free(self->buf); PyMem_Free(self->buf);
self->buf = NULL; self->buf = NULL;
...@@ -818,6 +865,7 @@ static struct PyMethodDef bytesio_methods[] = { ...@@ -818,6 +865,7 @@ static struct PyMethodDef bytesio_methods[] = {
{"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc}, {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc},
{"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc}, {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc},
{"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc}, {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc},
{"getbuffer", (PyCFunction)bytesio_getbuffer, METH_NOARGS, getbuffer_doc},
{"getvalue", (PyCFunction)bytesio_getvalue, METH_NOARGS, getval_doc}, {"getvalue", (PyCFunction)bytesio_getvalue, METH_NOARGS, getval_doc},
{"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc}, {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc},
{"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc}, {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc},
...@@ -873,3 +921,96 @@ PyTypeObject PyBytesIO_Type = { ...@@ -873,3 +921,96 @@ PyTypeObject PyBytesIO_Type = {
0, /*tp_alloc*/ 0, /*tp_alloc*/
bytesio_new, /*tp_new*/ bytesio_new, /*tp_new*/
}; };
/*
* Implementation of the small intermediate object used by getbuffer().
* getbuffer() returns a memoryview over this object, which should make it
* invisible from Python code.
*/
static int
bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
{
int ret;
void *ptr;
bytesio *b = (bytesio *) obj->source;
if (view == NULL) {
b->exports++;
return 0;
}
ptr = (void *) obj;
ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size,
0, flags);
if (ret >= 0) {
b->exports++;
}
return ret;
}
static void
bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view)
{
bytesio *b = (bytesio *) obj->source;
b->exports--;
}
static int
bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg)
{
Py_VISIT(self->source);
return 0;
}
static void
bytesiobuf_dealloc(bytesiobuf *self)
{
Py_CLEAR(self->source);
Py_TYPE(self)->tp_free(self);
}
static PyBufferProcs bytesiobuf_as_buffer = {
(getbufferproc) bytesiobuf_getbuffer,
(releasebufferproc) bytesiobuf_releasebuffer,
};
PyTypeObject _PyBytesIOBuffer_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_io._BytesIOBuffer", /*tp_name*/
sizeof(bytesiobuf), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)bytesiobuf_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_reserved*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
&bytesiobuf_as_buffer, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
0, /*tp_doc*/
(traverseproc)bytesiobuf_traverse, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
0, /*tp_methods*/
0, /*tp_members*/
0, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
0, /*tp_dictoffset*/
0, /*tp_init*/
0, /*tp_alloc*/
0, /*tp_new*/
};
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment