Commit 95bc0e47 authored by Benjamin Peterson's avatar Benjamin Peterson

use Py_ssize_t for file offset and length computations in iteration (closes #22526)

parent ece9d5a9
...@@ -436,6 +436,18 @@ class OtherFileTests(unittest.TestCase): ...@@ -436,6 +436,18 @@ class OtherFileTests(unittest.TestCase):
finally: finally:
f.close() f.close()
@test_support.precisionbigmemtest(2**31, 1)
def test_very_long_line(self, maxsize):
# Issue #22526
with open(TESTFN, "wb") as fp:
fp.write("\0"*2**31)
with open(TESTFN, "rb") as fp:
for l in fp:
pass
self.assertEqual(len(l), 2**31)
self.assertEqual(l.count("\0"), 2**31)
l = None
class FileSubclassTests(unittest.TestCase): class FileSubclassTests(unittest.TestCase):
def testExit(self): def testExit(self):
......
...@@ -10,6 +10,8 @@ What's New in Python 2.7.9? ...@@ -10,6 +10,8 @@ What's New in Python 2.7.9?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #22526: Fix iterating through files with lines longer than 2^31 bytes.
- Issue #22519: Fix overflow checking in PyString_Repr. - Issue #22519: Fix overflow checking in PyString_Repr.
- Issue #22518: Fix integer overflow issues in latin-1 encoding. - Issue #22518: Fix integer overflow issues in latin-1 encoding.
......
...@@ -2236,7 +2236,7 @@ drop_readahead(PyFileObject *f) ...@@ -2236,7 +2236,7 @@ drop_readahead(PyFileObject *f)
(unless at EOF) and no more than bufsize. Returns negative value on (unless at EOF) and no more than bufsize. Returns negative value on
error, will set MemoryError if bufsize bytes cannot be allocated. */ error, will set MemoryError if bufsize bytes cannot be allocated. */
static int static int
readahead(PyFileObject *f, int bufsize) readahead(PyFileObject *f, Py_ssize_t bufsize)
{ {
Py_ssize_t chunksize; Py_ssize_t chunksize;
...@@ -2274,7 +2274,7 @@ readahead(PyFileObject *f, int bufsize) ...@@ -2274,7 +2274,7 @@ readahead(PyFileObject *f, int bufsize)
logarithmic buffer growth to about 50 even when reading a 1gb line. */ logarithmic buffer growth to about 50 even when reading a 1gb line. */
static PyStringObject * static PyStringObject *
readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize)
{ {
PyStringObject* s; PyStringObject* s;
char *bufptr; char *bufptr;
...@@ -2294,10 +2294,10 @@ readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) ...@@ -2294,10 +2294,10 @@ readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
bufptr++; /* Count the '\n' */ bufptr++; /* Count the '\n' */
len = bufptr - f->f_bufptr; len = bufptr - f->f_bufptr;
s = (PyStringObject *) s = (PyStringObject *)
PyString_FromStringAndSize(NULL, skip+len); PyString_FromStringAndSize(NULL, skip + len);
if (s == NULL) if (s == NULL)
return NULL; return NULL;
memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len); memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len);
f->f_bufptr = bufptr; f->f_bufptr = bufptr;
if (bufptr == f->f_bufend) if (bufptr == f->f_bufend)
drop_readahead(f); drop_readahead(f);
...@@ -2305,14 +2305,13 @@ readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) ...@@ -2305,14 +2305,13 @@ readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
bufptr = f->f_bufptr; bufptr = f->f_bufptr;
buf = f->f_buf; buf = f->f_buf;
f->f_buf = NULL; /* Force new readahead buffer */ f->f_buf = NULL; /* Force new readahead buffer */
assert(skip+len < INT_MAX); assert(len <= PY_SSIZE_T_MAX - skip);
s = readahead_get_line_skip( s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2));
f, (int)(skip+len), bufsize + (bufsize>>2) );
if (s == NULL) { if (s == NULL) {
PyMem_Free(buf); PyMem_Free(buf);
return NULL; return NULL;
} }
memcpy(PyString_AS_STRING(s)+skip, bufptr, len); memcpy(PyString_AS_STRING(s) + skip, bufptr, len);
PyMem_Free(buf); PyMem_Free(buf);
} }
return s; return s;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment