Commit ecc26923 authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #4681: Allow mmap() to work on file sizes and offsets larger than

4GB, even on 32-bit builds.  Initial patch by Ross Lagerwall, adapted for
32-bit Windows.
parent ce58dc7b
from test.support import TESTFN, run_unittest, import_module from test.support import TESTFN, run_unittest, import_module, unlink, requires
import unittest import unittest
import os import os
import re import re
...@@ -646,9 +646,56 @@ class MmapTests(unittest.TestCase): ...@@ -646,9 +646,56 @@ class MmapTests(unittest.TestCase):
"wrong exception raised in context manager") "wrong exception raised in context manager")
self.assertTrue(m.closed, "context manager failed") self.assertTrue(m.closed, "context manager failed")
class LargeMmapTests(unittest.TestCase):
def setUp(self):
unlink(TESTFN)
def tearDown(self):
unlink(TESTFN)
def _working_largefile(self):
# Only run if the current filesystem supports large files.
f = open(TESTFN, 'wb', buffering=0)
try:
f.seek(0x80000001)
f.write(b'x')
f.flush()
except (IOError, OverflowError):
raise unittest.SkipTest("filesystem does not have largefile support")
finally:
f.close()
unlink(TESTFN)
def test_large_offset(self):
if sys.platform[:3] == 'win' or sys.platform == 'darwin':
requires('largefile',
'test requires %s bytes and a long time to run' % str(0x180000000))
self._working_largefile()
with open(TESTFN, 'wb') as f:
f.seek(0x14FFFFFFF)
f.write(b" ")
with open(TESTFN, 'rb') as f:
with mmap.mmap(f.fileno(), 0, offset=0x140000000, access=mmap.ACCESS_READ) as m:
self.assertEqual(m[0xFFFFFFF], 32)
def test_large_filesize(self):
if sys.platform[:3] == 'win' or sys.platform == 'darwin':
requires('largefile',
'test requires %s bytes and a long time to run' % str(0x180000000))
self._working_largefile()
with open(TESTFN, 'wb') as f:
f.seek(0x17FFFFFFF)
f.write(b" ")
with open(TESTFN, 'rb') as f:
with mmap.mmap(f.fileno(), 0x10000, access=mmap.ACCESS_READ) as m:
self.assertEqual(m.size(), 0x180000000)
def test_main(): def test_main():
run_unittest(MmapTests) run_unittest(MmapTests, LargeMmapTests)
if __name__ == '__main__': if __name__ == '__main__':
test_main() test_main()
...@@ -27,6 +27,10 @@ Core and Builtins ...@@ -27,6 +27,10 @@ Core and Builtins
Library Library
------- -------
- Issue #4681: Allow mmap() to work on file sizes and offsets larger than
4GB, even on 32-bit builds. Initial patch by Ross Lagerwall, adapted for
32-bit Windows.
- Issue #11169: compileall module uses repr() to format filenames and paths to - Issue #11169: compileall module uses repr() to format filenames and paths to
escape surrogate characters and show spaces. escape surrogate characters and show spaces.
......
...@@ -90,7 +90,11 @@ typedef struct { ...@@ -90,7 +90,11 @@ typedef struct {
char * data; char * data;
size_t size; size_t size;
size_t pos; /* relative to offset */ size_t pos; /* relative to offset */
size_t offset; #ifdef MS_WINDOWS
PY_LONG_LONG offset;
#else
off_t offset;
#endif
int exports; int exports;
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
...@@ -433,7 +437,11 @@ mmap_size_method(mmap_object *self, ...@@ -433,7 +437,11 @@ mmap_size_method(mmap_object *self,
PyErr_SetFromErrno(mmap_module_error); PyErr_SetFromErrno(mmap_module_error);
return NULL; return NULL;
} }
return PyLong_FromSsize_t(buf.st_size); #ifdef HAVE_LARGEFILE_SUPPORT
return PyLong_FromLongLong(buf.st_size);
#else
return PyLong_FromLong(buf.st_size);
#endif
} }
#endif /* UNIX */ #endif /* UNIX */
} }
...@@ -467,17 +475,10 @@ mmap_resize_method(mmap_object *self, ...@@ -467,17 +475,10 @@ mmap_resize_method(mmap_object *self,
CloseHandle(self->map_handle); CloseHandle(self->map_handle);
self->map_handle = NULL; self->map_handle = NULL;
/* Move to the desired EOF position */ /* Move to the desired EOF position */
#if SIZEOF_SIZE_T > 4
newSizeHigh = (DWORD)((self->offset + new_size) >> 32); newSizeHigh = (DWORD)((self->offset + new_size) >> 32);
newSizeLow = (DWORD)((self->offset + new_size) & 0xFFFFFFFF); newSizeLow = (DWORD)((self->offset + new_size) & 0xFFFFFFFF);
off_hi = (DWORD)(self->offset >> 32); off_hi = (DWORD)(self->offset >> 32);
off_lo = (DWORD)(self->offset & 0xFFFFFFFF); off_lo = (DWORD)(self->offset & 0xFFFFFFFF);
#else
newSizeHigh = 0;
newSizeLow = (DWORD)(self->offset + new_size);
off_hi = 0;
off_lo = (DWORD)self->offset;
#endif
SetFilePointer(self->file_handle, SetFilePointer(self->file_handle,
newSizeLow, &newSizeHigh, FILE_BEGIN); newSizeLow, &newSizeHigh, FILE_BEGIN);
/* Change the size of the file */ /* Change the size of the file */
...@@ -1051,6 +1052,12 @@ _GetMapSize(PyObject *o, const char* param) ...@@ -1051,6 +1052,12 @@ _GetMapSize(PyObject *o, const char* param)
} }
#ifdef UNIX #ifdef UNIX
#ifdef HAVE_LARGEFILE_SUPPORT
#define _Py_PARSE_OFF_T "L"
#else
#define _Py_PARSE_OFF_T "l"
#endif
static PyObject * static PyObject *
new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
{ {
...@@ -1058,8 +1065,9 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) ...@@ -1058,8 +1065,9 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
struct stat st; struct stat st;
#endif #endif
mmap_object *m_obj; mmap_object *m_obj;
PyObject *map_size_obj = NULL, *offset_obj = NULL; PyObject *map_size_obj = NULL;
Py_ssize_t map_size, offset; Py_ssize_t map_size;
off_t offset = 0;
int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ; int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ;
int devzero = -1; int devzero = -1;
int access = (int)ACCESS_DEFAULT; int access = (int)ACCESS_DEFAULT;
...@@ -1067,16 +1075,18 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) ...@@ -1067,16 +1075,18 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
"flags", "prot", "flags", "prot",
"access", "offset", NULL}; "access", "offset", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|iiiO", keywords, if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|iii" _Py_PARSE_OFF_T, keywords,
&fd, &map_size_obj, &flags, &prot, &fd, &map_size_obj, &flags, &prot,
&access, &offset_obj)) &access, &offset))
return NULL; return NULL;
map_size = _GetMapSize(map_size_obj, "size"); map_size = _GetMapSize(map_size_obj, "size");
if (map_size < 0) if (map_size < 0)
return NULL; return NULL;
offset = _GetMapSize(offset_obj, "offset"); if (offset < 0) {
if (offset < 0) PyErr_SetString(PyExc_OverflowError,
"memory mapped offset must be positive");
return NULL; return NULL;
}
if ((access != (int)ACCESS_DEFAULT) && if ((access != (int)ACCESS_DEFAULT) &&
((flags != MAP_SHARED) || (prot != (PROT_WRITE | PROT_READ)))) ((flags != MAP_SHARED) || (prot != (PROT_WRITE | PROT_READ))))
...@@ -1121,8 +1131,14 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) ...@@ -1121,8 +1131,14 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
"mmap offset is greater than file size"); "mmap offset is greater than file size");
return NULL; return NULL;
} }
map_size = st.st_size - offset; off_t calc_size = st.st_size - offset;
} else if ((size_t)offset + (size_t)map_size > st.st_size) { map_size = calc_size;
if (map_size != calc_size) {
PyErr_SetString(PyExc_ValueError,
"mmap length is too large");
return NULL;
}
} else if (offset + (size_t)map_size > st.st_size) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"mmap length is greater than file size"); "mmap length is greater than file size");
return NULL; return NULL;
...@@ -1183,12 +1199,19 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) ...@@ -1183,12 +1199,19 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
#endif /* UNIX */ #endif /* UNIX */
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
/* A note on sizes and offsets: while the actual map size must hold in a
Py_ssize_t, both the total file size and the start offset can be longer
than a Py_ssize_t, so we use PY_LONG_LONG which is always 64-bit.
*/
static PyObject * static PyObject *
new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
{ {
mmap_object *m_obj; mmap_object *m_obj;
PyObject *map_size_obj = NULL, *offset_obj = NULL; PyObject *map_size_obj = NULL;
Py_ssize_t map_size, offset; Py_ssize_t map_size;
PY_LONG_LONG offset = 0, size;
DWORD off_hi; /* upper 32 bits of offset */ DWORD off_hi; /* upper 32 bits of offset */
DWORD off_lo; /* lower 32 bits of offset */ DWORD off_lo; /* lower 32 bits of offset */
DWORD size_hi; /* upper 32 bits of size */ DWORD size_hi; /* upper 32 bits of size */
...@@ -1203,9 +1226,9 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) ...@@ -1203,9 +1226,9 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
"tagname", "tagname",
"access", "offset", NULL }; "access", "offset", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|ziO", keywords, if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|ziL", keywords,
&fileno, &map_size_obj, &fileno, &map_size_obj,
&tagname, &access, &offset_obj)) { &tagname, &access, &offset)) {
return NULL; return NULL;
} }
...@@ -1230,9 +1253,11 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) ...@@ -1230,9 +1253,11 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
map_size = _GetMapSize(map_size_obj, "size"); map_size = _GetMapSize(map_size_obj, "size");
if (map_size < 0) if (map_size < 0)
return NULL; return NULL;
offset = _GetMapSize(offset_obj, "offset"); if (offset < 0) {
if (offset < 0) PyErr_SetString(PyExc_OverflowError,
"memory mapped offset must be positive");
return NULL; return NULL;
}
/* assume -1 and 0 both mean invalid filedescriptor /* assume -1 and 0 both mean invalid filedescriptor
to 'anonymously' map memory. to 'anonymously' map memory.
...@@ -1296,28 +1321,26 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) ...@@ -1296,28 +1321,26 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
return PyErr_SetFromWindowsErr(dwErr); return PyErr_SetFromWindowsErr(dwErr);
} }
#if SIZEOF_SIZE_T > 4 size = (((PY_LONG_LONG) high) << 32) + low;
m_obj->size = (((size_t)high)<<32) + low; if (offset >= size) {
#else
if (high)
/* File is too large to map completely */
m_obj->size = (size_t)-1;
else
m_obj->size = low;
#endif
if (offset >= m_obj->size) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"mmap offset is greater than file size"); "mmap offset is greater than file size");
Py_DECREF(m_obj); Py_DECREF(m_obj);
return NULL; return NULL;
} }
m_obj->size -= offset; if (offset - size > PY_SSIZE_T_MAX)
/* Map area too large to fit in memory */
m_obj->size = (Py_ssize_t) -1;
else
m_obj->size = (Py_ssize_t) (size - offset);
} else { } else {
m_obj->size = map_size; m_obj->size = map_size;
size = offset + map_size;
} }
} }
else { else {
m_obj->size = map_size; m_obj->size = map_size;
size = offset + map_size;
} }
/* set the initial position */ /* set the initial position */
...@@ -1338,22 +1361,10 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) ...@@ -1338,22 +1361,10 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
m_obj->tagname = NULL; m_obj->tagname = NULL;
m_obj->access = (access_mode)access; m_obj->access = (access_mode)access;
/* DWORD is a 4-byte int. If we're on a box where size_t consumes size_hi = (DWORD)(size >> 32);
* more than 4 bytes, we need to break it apart. Else (size_t size_lo = (DWORD)(size & 0xFFFFFFFF);
* consumes 4 bytes), C doesn't define what happens if we shift
* right by 32, so we need different code.
*/
#if SIZEOF_SIZE_T > 4
size_hi = (DWORD)((offset + m_obj->size) >> 32);
size_lo = (DWORD)((offset + m_obj->size) & 0xFFFFFFFF);
off_hi = (DWORD)(offset >> 32); off_hi = (DWORD)(offset >> 32);
off_lo = (DWORD)(offset & 0xFFFFFFFF); off_lo = (DWORD)(offset & 0xFFFFFFFF);
#else
size_hi = 0;
size_lo = (DWORD)(offset + m_obj->size);
off_hi = 0;
off_lo = (DWORD)offset;
#endif
/* For files, it would be sufficient to pass 0 as size. /* For files, it would be sufficient to pass 0 as size.
For anonymous maps, we have to pass the size explicitly. */ For anonymous maps, we have to pass the size explicitly. */
m_obj->map_handle = CreateFileMapping(m_obj->file_handle, m_obj->map_handle = CreateFileMapping(m_obj->file_handle,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment