Commit 276f1d51 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #7358: cStringIO.StringIO now supports writing to and reading from

a stream larger than 2 GiB on 64-bit systems.
parent beaa3ada
...@@ -5,6 +5,7 @@ import StringIO ...@@ -5,6 +5,7 @@ import StringIO
import cStringIO import cStringIO
import types import types
import array import array
import sys
from test import test_support from test import test_support
...@@ -105,6 +106,45 @@ class TestGenericStringIO(unittest.TestCase): ...@@ -105,6 +106,45 @@ class TestGenericStringIO(unittest.TestCase):
self._fp.close() self._fp.close()
self.assertRaises(ValueError, self._fp.getvalue) self.assertRaises(ValueError, self._fp.getvalue)
@test_support.bigmemtest(test_support._2G + 2**26, memuse=2.001)
def test_reads_from_large_stream(self, size):
linesize = 2**26 # 64 MiB
lines = ['x' * (linesize - 1) + '\n'] * (size // linesize) + \
['y' * (size % linesize)]
f = self.MODULE.StringIO(''.join(lines))
for i, expected in enumerate(lines):
line = f.read(len(expected))
self.assertEqual(len(line), len(expected))
self.assertEqual(line, expected)
self.assertEqual(f.read(), '')
f.seek(0)
for i, expected in enumerate(lines):
line = f.readline()
self.assertEqual(len(line), len(expected))
self.assertEqual(line, expected)
self.assertEqual(f.readline(), '')
f.seek(0)
self.assertEqual(f.readlines(), lines)
self.assertEqual(f.readlines(), [])
f.seek(0)
self.assertEqual(f.readlines(size), lines)
self.assertEqual(f.readlines(), [])
# In worst case cStringIO requires 2 + 1 + 1/2 + 1/2**2 + ... = 4
# bytes per input character.
@test_support.bigmemtest(test_support._2G, memuse=4)
def test_writes_to_large_stream(self, size):
s = 'x' * 2**26 # 64 MiB
f = self.MODULE.StringIO()
n = size
while n > len(s):
f.write(s)
n -= len(s)
s = None
f.write('x' * n)
self.assertEqual(len(f.getvalue()), size)
class TestStringIO(TestGenericStringIO): class TestStringIO(TestGenericStringIO):
MODULE = StringIO MODULE = StringIO
......
...@@ -202,6 +202,9 @@ Core and Builtins ...@@ -202,6 +202,9 @@ Core and Builtins
Library Library
------- -------
- Issue #7358: cStringIO.StringIO now supports writing to and reading from
a stream larger than 2 GiB on 64-bit systems.
- Issue #10355: In SpooledTemporaryFile class mode and name properties and - Issue #10355: In SpooledTemporaryFile class mode and name properties and
xreadlines method now work for unrolled files. encoding and newlines xreadlines method now work for unrolled files. encoding and newlines
properties now removed as they have no sense and always produced properties now removed as they have no sense and always produced
......
...@@ -170,10 +170,15 @@ IO_cread(PyObject *self, char **output, Py_ssize_t n) { ...@@ -170,10 +170,15 @@ IO_cread(PyObject *self, char **output, Py_ssize_t n) {
n = l; n = l;
if (n < 0) n=0; if (n < 0) n=0;
} }
if (n > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"length too large");
return -1;
}
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos; *output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
((IOobject*)self)->pos += n; ((IOobject*)self)->pos += n;
return n; return (int)n;
} }
static PyObject * static PyObject *
...@@ -192,26 +197,33 @@ PyDoc_STRVAR(IO_readline__doc__, "readline() -- Read one line"); ...@@ -192,26 +197,33 @@ PyDoc_STRVAR(IO_readline__doc__, "readline() -- Read one line");
static int static int
IO_creadline(PyObject *self, char **output) { IO_creadline(PyObject *self, char **output) {
char *n, *s; char *n, *start, *end;
Py_ssize_t l; Py_ssize_t len;
if (!IO__opencheck(IOOOBJECT(self))) return -1; if (!IO__opencheck(IOOOBJECT(self))) return -1;
for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos, n = start = ((IOobject*)self)->buf + ((IOobject*)self)->pos;
s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size; end = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
n < s && *n != '\n'; n++); while (n < end && *n != '\n')
n++;
if (n < s) n++; if (n < end) n++;
*output=((IOobject*)self)->buf + ((IOobject*)self)->pos; len = n - start;
l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos; if (len > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"length too large");
return -1;
}
*output=start;
assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - l); assert(IOOOBJECT(self)->pos <= PY_SSIZE_T_MAX - len);
assert(IOOOBJECT(self)->pos >= 0); assert(IOOOBJECT(self)->pos >= 0);
assert(IOOOBJECT(self)->string_size >= 0); assert(IOOOBJECT(self)->string_size >= 0);
((IOobject*)self)->pos += l; ((IOobject*)self)->pos += len;
return (int)l; return (int)len;
} }
static PyObject * static PyObject *
...@@ -239,9 +251,9 @@ IO_readlines(IOobject *self, PyObject *args) { ...@@ -239,9 +251,9 @@ IO_readlines(IOobject *self, PyObject *args) {
int n; int n;
char *output; char *output;
PyObject *result, *line; PyObject *result, *line;
int hint = 0, length = 0; Py_ssize_t hint = 0, length = 0;
if (!PyArg_ParseTuple(args, "|i:readlines", &hint)) return NULL; if (!PyArg_ParseTuple(args, "|n:readlines", &hint)) return NULL;
result = PyList_New(0); result = PyList_New(0);
if (!result) if (!result)
...@@ -377,31 +389,41 @@ PyDoc_STRVAR(O_write__doc__, ...@@ -377,31 +389,41 @@ PyDoc_STRVAR(O_write__doc__,
static int static int
O_cwrite(PyObject *self, const char *c, Py_ssize_t l) { O_cwrite(PyObject *self, const char *c, Py_ssize_t len) {
Py_ssize_t newl; Py_ssize_t newpos;
Oobject *oself; Oobject *oself;
char *newbuf; char *newbuf;
if (!IO__opencheck(IOOOBJECT(self))) return -1; if (!IO__opencheck(IOOOBJECT(self))) return -1;
oself = (Oobject *)self; oself = (Oobject *)self;
newl = oself->pos+l; if (len > INT_MAX) {
if (newl >= oself->buf_size) { PyErr_SetString(PyExc_OverflowError,
oself->buf_size *= 2; "length too large");
if (oself->buf_size <= newl) { return -1;
assert(newl + 1 < INT_MAX); }
oself->buf_size = (int)(newl+1); assert(len >= 0);
if (oself->pos >= PY_SSIZE_T_MAX - len) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
return -1;
}
newpos = oself->pos + len;
if (newpos >= oself->buf_size) {
size_t newsize = oself->buf_size;
newsize *= 2;
if (newsize <= (size_t)newpos || newsize > PY_SSIZE_T_MAX) {
assert(newpos < PY_SSIZE_T_MAX - 1);
newsize = newpos + 1;
} }
newbuf = (char*)realloc(oself->buf, oself->buf_size); newbuf = (char*)realloc(oself->buf, newsize);
if (!newbuf) { if (!newbuf) {
PyErr_SetString(PyExc_MemoryError,"out of memory"); PyErr_SetString(PyExc_MemoryError,"out of memory");
free(oself->buf);
oself->buf = 0;
oself->buf_size = oself->pos = 0;
return -1; return -1;
} }
oself->buf_size = (Py_ssize_t)newsize;
oself->buf = newbuf; oself->buf = newbuf;
} }
if (oself->string_size < oself->pos) { if (oself->string_size < oself->pos) {
/* In case of overseek, pad with null bytes the buffer region between /* In case of overseek, pad with null bytes the buffer region between
...@@ -416,16 +438,15 @@ O_cwrite(PyObject *self, const char *c, Py_ssize_t l) { ...@@ -416,16 +438,15 @@ O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
(oself->pos - oself->string_size) * sizeof(char)); (oself->pos - oself->string_size) * sizeof(char));
} }
memcpy(oself->buf+oself->pos,c,l); memcpy(oself->buf + oself->pos, c, len);
assert(oself->pos + l < INT_MAX); oself->pos = newpos;
oself->pos += (int)l;
if (oself->string_size < oself->pos) { if (oself->string_size < oself->pos) {
oself->string_size = oself->pos; oself->string_size = oself->pos;
} }
return (int)l; return (int)len;
} }
static PyObject * static PyObject *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment