Commit e099b374 authored by Benjamin Peterson's avatar Benjamin Peterson

#5391 make mmap work exclusively with bytes

parent f3abcc9d
...@@ -6,13 +6,13 @@ ...@@ -6,13 +6,13 @@
:synopsis: Interface to memory-mapped files for Unix and Windows. :synopsis: Interface to memory-mapped files for Unix and Windows.
Memory-mapped file objects behave like both strings and like file objects. Memory-mapped file objects behave like both :class:`bytes` and like file
Unlike normal string objects, however, these are mutable. You can use mmap objects. Unlike normal :class:`bytes` objects, however, these are mutable.
objects in most places where strings are expected; for example, you can use You can use mmap objects in most places where :class:`bytes` are expected; for
the :mod:`re` module to search through a memory-mapped file. Since they're example, you can use the :mod:`re` module to search through a memory-mapped file.
mutable, you can change a single character by doing ``obj[index] = 'a'``, or Since they're mutable, you can change a single byte by doing ``obj[index] = 97``,
change a substring by assigning to a slice: ``obj[i1:i2] = '...'``. You can or change a subsequence by assigning to a slice: ``obj[i1:i2] = b'...'``.
also read and write data starting at the current file position, and You can also read and write data starting at the current file position, and
:meth:`seek` through the file to different positions. :meth:`seek` through the file to different positions.
A memory-mapped file is created by the :class:`mmap` constructor, which is A memory-mapped file is created by the :class:`mmap` constructor, which is
...@@ -94,21 +94,21 @@ To map anonymous memory, -1 should be passed as the fileno along with the length ...@@ -94,21 +94,21 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
# write a simple example file # write a simple example file
with open("hello.txt", "wb") as f: with open("hello.txt", "wb") as f:
f.write("Hello Python!\n") f.write(b"Hello Python!\n")
with open("hello.txt", "r+b") as f: with open("hello.txt", "r+b") as f:
# memory-map the file, size 0 means whole file # memory-map the file, size 0 means whole file
map = mmap.mmap(f.fileno(), 0) map = mmap.mmap(f.fileno(), 0)
# read content via standard file methods # read content via standard file methods
print(map.readline()) # prints "Hello Python!" print(map.readline()) # prints b"Hello Python!\n"
# read content via slice notation # read content via slice notation
print(map[:5]) # prints "Hello" print(map[:5]) # prints b"Hello"
# update content using slice notation; # update content using slice notation;
# note that new content must have same size # note that new content must have same size
map[6:] = " world!\n" map[6:] = b" world!\n"
# ... and read again using standard file methods # ... and read again using standard file methods
map.seek(0) map.seek(0)
print(map.readline()) # prints "Hello world!" print(map.readline()) # prints b"Hello world!\n"
# close the map # close the map
map.close() map.close()
...@@ -120,7 +120,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length ...@@ -120,7 +120,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
import os import os
map = mmap.mmap(-1, 13) map = mmap.mmap(-1, 13)
map.write("Hello world!") map.write(b"Hello world!")
pid = os.fork() pid = os.fork()
...@@ -140,10 +140,10 @@ To map anonymous memory, -1 should be passed as the fileno along with the length ...@@ -140,10 +140,10 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
result in an exception being raised. result in an exception being raised.
.. method:: find(string[, start[, end]]) .. method:: find(sub[, start[, end]])
Returns the lowest index in the object where the substring *string* is Returns the lowest index in the object where the subsequence *sub* is
found, such that *string* is contained in the range [*start*, *end*]. found, such that *sub* is contained in the range [*start*, *end*].
Optional arguments *start* and *end* are interpreted as in slice notation. Optional arguments *start* and *end* are interpreted as in slice notation.
Returns ``-1`` on failure. Returns ``-1`` on failure.
...@@ -172,15 +172,15 @@ To map anonymous memory, -1 should be passed as the fileno along with the length ...@@ -172,15 +172,15 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
.. method:: read(num) .. method:: read(num)
Return a string containing up to *num* bytes starting from the current Return a :class:`bytes` containing up to *num* bytes starting from the
file position; the file position is updated to point after the bytes that current file position; the file position is updated to point after the
were returned. bytes that were returned.
.. method:: read_byte() .. method:: read_byte()
Returns a string of length 1 containing the character at the current file Returns a byte at the current file position as an integer, and advances
position, and advances the file position by 1. the file position by 1.
.. method:: readline() .. method:: readline()
...@@ -196,10 +196,10 @@ To map anonymous memory, -1 should be passed as the fileno along with the length ...@@ -196,10 +196,10 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
throw a :exc:`TypeError` exception. throw a :exc:`TypeError` exception.
.. method:: rfind(string[, start[, end]]) .. method:: rfind(sub[, start[, end]])
Returns the highest index in the object where the substring *string* is Returns the highest index in the object where the subsequence *sub* is
found, such that *string* is contained in the range [*start*, *end*]. found, such that *sub* is contained in the range [*start*, *end*].
Optional arguments *start* and *end* are interpreted as in slice notation. Optional arguments *start* and *end* are interpreted as in slice notation.
Returns ``-1`` on failure. Returns ``-1`` on failure.
...@@ -223,9 +223,9 @@ To map anonymous memory, -1 should be passed as the fileno along with the length ...@@ -223,9 +223,9 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
Returns the current position of the file pointer. Returns the current position of the file pointer.
.. method:: write(string) .. method:: write(bytes)
Write the bytes in *string* into memory at the current position of the Write the bytes in *bytes* into memory at the current position of the
file pointer; the file position is updated to point after the bytes that file pointer; the file position is updated to point after the bytes that
were written. If the mmap was created with :const:`ACCESS_READ`, then were written. If the mmap was created with :const:`ACCESS_READ`, then
writing to it will throw a :exc:`TypeError` exception. writing to it will throw a :exc:`TypeError` exception.
...@@ -233,7 +233,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length ...@@ -233,7 +233,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
.. method:: write_byte(byte) .. method:: write_byte(byte)
Write the single-character string *byte* into memory at the current Write the the integer *byte* into memory at the current
position of the file pointer; the file position is advanced by ``1``. If position of the file pointer; the file position is advanced by ``1``. If
the mmap was created with :const:`ACCESS_READ`, then writing to it will the mmap was created with :const:`ACCESS_READ`, then writing to it will
throw a :exc:`TypeError` exception. throw a :exc:`TypeError` exception.
......
...@@ -37,7 +37,7 @@ class MmapTests(unittest.TestCase): ...@@ -37,7 +37,7 @@ class MmapTests(unittest.TestCase):
# Simple sanity checks # Simple sanity checks
tp = str(type(m)) # SF bug 128713: segfaulted on Linux tp = str(type(m)) # SF bug 128713: segfaulted on Linux
self.assertEqual(m.find('foo'), PAGESIZE) self.assertEqual(m.find(b'foo'), PAGESIZE)
self.assertEqual(len(m), 2*PAGESIZE) self.assertEqual(len(m), 2*PAGESIZE)
...@@ -262,38 +262,38 @@ class MmapTests(unittest.TestCase): ...@@ -262,38 +262,38 @@ class MmapTests(unittest.TestCase):
def test_find_end(self): def test_find_end(self):
# test the new 'end' parameter works as expected # test the new 'end' parameter works as expected
f = open(TESTFN, 'w+') f = open(TESTFN, 'wb+')
data = 'one two ones' data = b'one two ones'
n = len(data) n = len(data)
f.write(data) f.write(data)
f.flush() f.flush()
m = mmap.mmap(f.fileno(), n) m = mmap.mmap(f.fileno(), n)
f.close() f.close()
self.assertEqual(m.find('one'), 0) self.assertEqual(m.find(b'one'), 0)
self.assertEqual(m.find('ones'), 8) self.assertEqual(m.find(b'ones'), 8)
self.assertEqual(m.find('one', 0, -1), 0) self.assertEqual(m.find(b'one', 0, -1), 0)
self.assertEqual(m.find('one', 1), 8) self.assertEqual(m.find(b'one', 1), 8)
self.assertEqual(m.find('one', 1, -1), 8) self.assertEqual(m.find(b'one', 1, -1), 8)
self.assertEqual(m.find('one', 1, -2), -1) self.assertEqual(m.find(b'one', 1, -2), -1)
def test_rfind(self): def test_rfind(self):
# test the new 'end' parameter works as expected # test the new 'end' parameter works as expected
f = open(TESTFN, 'w+') f = open(TESTFN, 'wb+')
data = 'one two ones' data = b'one two ones'
n = len(data) n = len(data)
f.write(data) f.write(data)
f.flush() f.flush()
m = mmap.mmap(f.fileno(), n) m = mmap.mmap(f.fileno(), n)
f.close() f.close()
self.assertEqual(m.rfind('one'), 8) self.assertEqual(m.rfind(b'one'), 8)
self.assertEqual(m.rfind('one '), 0) self.assertEqual(m.rfind(b'one '), 0)
self.assertEqual(m.rfind('one', 0, -1), 8) self.assertEqual(m.rfind(b'one', 0, -1), 8)
self.assertEqual(m.rfind('one', 0, -2), 0) self.assertEqual(m.rfind(b'one', 0, -2), 0)
self.assertEqual(m.rfind('one', 1, -1), 8) self.assertEqual(m.rfind(b'one', 1, -1), 8)
self.assertEqual(m.rfind('one', 1, -2), -1) self.assertEqual(m.rfind(b'one', 1, -2), -1)
def test_double_close(self): def test_double_close(self):
...@@ -506,21 +506,15 @@ class MmapTests(unittest.TestCase): ...@@ -506,21 +506,15 @@ class MmapTests(unittest.TestCase):
# Test write_byte() # Test write_byte()
for i in range(len(data)): for i in range(len(data)):
self.assertEquals(m.tell(), i) self.assertEquals(m.tell(), i)
m.write_byte(data[i:i+1]) m.write_byte(data[i])
self.assertEquals(m.tell(), i+1) self.assertEquals(m.tell(), i+1)
self.assertRaises(ValueError, m.write_byte, b"x") self.assertRaises(ValueError, m.write_byte, b"x"[0])
self.assertEquals(m[:], data) self.assertEquals(m[:], data)
# Test read_byte() # Test read_byte()
m.seek(0) m.seek(0)
for i in range(len(data)): for i in range(len(data)):
self.assertEquals(m.tell(), i) self.assertEquals(m.tell(), i)
# XXX: Disable this test for now because it's not clear self.assertEquals(m.read_byte(), data[i])
# which type of object m.read_byte returns. Currently, it
# returns 1-length str (unicode).
if 0:
self.assertEquals(m.read_byte(), data[i:i+1])
else:
m.read_byte()
self.assertEquals(m.tell(), i+1) self.assertEquals(m.tell(), i+1)
self.assertRaises(ValueError, m.read_byte) self.assertRaises(ValueError, m.read_byte)
# Test read() # Test read()
......
...@@ -11,6 +11,7 @@ What's New in Python 3.1 alpha 2? ...@@ -11,6 +11,7 @@ What's New in Python 3.1 alpha 2?
Core and Builtins Core and Builtins
----------------- -----------------
- Implement PEP 378, Format Specifier for Thousands Separator, for - Implement PEP 378, Format Specifier for Thousands Separator, for
integers. integers.
...@@ -128,6 +129,8 @@ Library ...@@ -128,6 +129,8 @@ Library
Extension Modules Extension Modules
----------------- -----------------
- Issue #5391: mmap now deals exclusively with bytes.
- Issue #5463: In struct module, remove deprecated overflow wrapping - Issue #5463: In struct module, remove deprecated overflow wrapping
when packing an integer: struct.pack('=L', -1) now raises when packing an integer: struct.pack('=L', -1) now raises
struct.error instead of returning b'\xff\xff\xff\xff'. The struct.error instead of returning b'\xff\xff\xff\xff'. The
......
...@@ -204,7 +204,7 @@ mmap_read_byte_method(mmap_object *self, ...@@ -204,7 +204,7 @@ mmap_read_byte_method(mmap_object *self,
if (self->pos < self->size) { if (self->pos < self->size) {
char value = self->data[self->pos]; char value = self->data[self->pos];
self->pos += 1; self->pos += 1;
return Py_BuildValue("c", value); return Py_BuildValue("b", value);
} else { } else {
PyErr_SetString(PyExc_ValueError, "read byte out of range"); PyErr_SetString(PyExc_ValueError, "read byte out of range");
return NULL; return NULL;
...@@ -264,7 +264,7 @@ mmap_gfind(mmap_object *self, ...@@ -264,7 +264,7 @@ mmap_gfind(mmap_object *self,
Py_ssize_t len; Py_ssize_t len;
CHECK_VALID(NULL); CHECK_VALID(NULL);
if (!PyArg_ParseTuple(args, reverse ? "s#|nn:rfind" : "s#|nn:find", if (!PyArg_ParseTuple(args, reverse ? "y#|nn:rfind" : "y#|nn:find",
&needle, &len, &start, &end)) { &needle, &len, &start, &end)) {
return NULL; return NULL;
} else { } else {
...@@ -348,7 +348,7 @@ mmap_write_method(mmap_object *self, ...@@ -348,7 +348,7 @@ mmap_write_method(mmap_object *self,
char *data; char *data;
CHECK_VALID(NULL); CHECK_VALID(NULL);
if (!PyArg_ParseTuple(args, "s#:write", &data, &length)) if (!PyArg_ParseTuple(args, "y#:write", &data, &length))
return(NULL); return(NULL);
if (!is_writable(self)) if (!is_writable(self))
...@@ -371,7 +371,7 @@ mmap_write_byte_method(mmap_object *self, ...@@ -371,7 +371,7 @@ mmap_write_byte_method(mmap_object *self,
char value; char value;
CHECK_VALID(NULL); CHECK_VALID(NULL);
if (!PyArg_ParseTuple(args, "c:write_byte", &value)) if (!PyArg_ParseTuple(args, "b:write_byte", &value))
return(NULL); return(NULL);
if (!is_writable(self)) if (!is_writable(self))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment