Commit f5f20ef0 authored by David Howells's avatar David Howells Committed by Linus Torvalds

[PATCH] Permit nommu MAP_SHARED of memory backed files

The attached patch applies some further fixes and extensions to the nommu mmap
implementation:

 (1) /proc/maps distinguishes shareable private mappings and real shared
     mappings by marking the former with 's' and the latter with 'S'.

 (2) Rearrange and optimise the checking portion of do_mmap_pgoff() to make it
     easier to follow.

 (3) Only set VM_SHARED on MAP_SHARED mappings. Its presence indicates that the
     backing memory is supplied by the underlying file or chardev.

     VM_MAYSHARE indicates that a VMA may be shared if it's a private VMA. The
     memory for a private VMA is allocated by do_mmap_pgoff() from a kmalloc
     slab and then the file contents are read into it before returning.

 (4) Permit MAP_SHARED + PROT_WRITE on memory-backed files[*] and chardevs to
     indicate a contiguous area of memory when its get_unmapped_area() is
     called if the backing fs/chardev is willing.

     [*] file->f_mapping->backing_dev_info->memory_backed == 1

 (5) Require chardevs and files that support to provide a get_unmapped_area()
     file operation.

 (6) Made sure a private mapping of /dev/zero is possible. Shared mappings of
     /dev/zero are not currently supported because this'd need greater
     interaction of mmap with the chardev driver than is currently supported.

 (7) Add in some extra checks from mm/mmap.c: security, file having write
     access for a writable shared mapping, file not being in append mode.

 (8) Only account the mapping memory if it's allocated here; memory belonging
     to a shared chardev or file is not accounted.

With this patch it should be possible to map contiguous flash files directly
out of ROM simply by providing get_unmapped_area() for a read-only/shared
mapping.

I think that it might be worth splitting do_mmap_pgoff() up into smaller
subfunctions: one to handle the checking, one to handle shared mappings and
one to handle private mappings.
Signed-Off-By: default avatarDavid Howells <dhowells@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 6fc96ef2
......@@ -62,7 +62,7 @@ static int nommu_vma_list_show(struct seq_file *m, void *v)
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? 's' : 'p',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
vma->vm_pgoff << PAGE_SHIFT,
MAJOR(dev), MINOR(dev), ino, &len);
......
......@@ -21,6 +21,9 @@
#include <linux/ptrace.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/mount.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <asm/uaccess.h>
......@@ -372,31 +375,15 @@ unsigned long do_mmap_pgoff(struct file *file,
struct rb_node *rb;
unsigned int vm_flags;
void *result;
int ret, chrdev;
/*
* Get the !CONFIG_MMU specific checks done first
*/
chrdev = 0;
if (file)
chrdev = S_ISCHR(file->f_dentry->d_inode->i_mode);
if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && file && !chrdev) {
printk("MAP_SHARED not completely supported (cannot detect page dirtying)\n");
return -EINVAL;
}
int ret, membacked;
/* do the simple checks first */
if (flags & MAP_FIXED || addr) {
/* printk("can't do fixed-address/overlay mmap of RAM\n"); */
printk(KERN_DEBUG "%d: Can't do fixed-address/overlay mmap of RAM\n",
current->pid);
return -EINVAL;
}
/*
* now all the standard checks
*/
if (file && (!file->f_op || !file->f_op->mmap))
return -ENODEV;
if (PAGE_ALIGN(len) == 0)
return addr;
......@@ -407,55 +394,129 @@ unsigned long do_mmap_pgoff(struct file *file,
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
return -EINVAL;
/* we're going to need to record the mapping if it works */
vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
if (!vml)
goto error_getting_vml;
memset(vml, 0, sizeof(*vml));
/* validate file mapping requests */
membacked = 0;
if (file) {
/* files must support mmap */
if (!file->f_op || !file->f_op->mmap)
return -ENODEV;
if ((prot & PROT_EXEC) &&
(file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
return -EPERM;
/* work out if what we've got could possibly be shared
* - we support chardevs that provide their own "memory"
* - we support files/blockdevs that are memory backed
*/
if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
membacked = 1;
}
else {
struct address_space *mapping = file->f_mapping;
if (!mapping)
mapping = file->f_dentry->d_inode->i_mapping;
if (mapping && mapping->backing_dev_info)
membacked = mapping->backing_dev_info->memory_backed;
}
if (flags & MAP_SHARED) {
/* do checks for writing, appending and locking */
if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
return -EACCES;
if (IS_APPEND(file->f_dentry->d_inode) &&
(file->f_mode & FMODE_WRITE))
return -EACCES;
if (locks_verify_locked(file->f_dentry->d_inode))
return -EAGAIN;
/* Do simple checking here so the lower-level routines won't have
if (!membacked) {
printk("MAP_SHARED not completely supported on !MMU\n");
return -EINVAL;
}
/* we require greater support from the driver or
* filesystem - we ask it to tell us what memory to
* use */
if (!file->f_op->get_unmapped_area)
return -ENODEV;
}
else {
/* we read private files into memory we allocate */
if (!file->f_op->read)
return -ENODEV;
}
}
/* handle PROT_EXEC implication by PROT_READ */
if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)))
prot |= PROT_EXEC;
/* do simple checking here so the lower-level routines won't have
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */
| VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (!chrdev) {
if (!membacked) {
/* share any file segment that's mapped read-only */
if (((flags & MAP_PRIVATE) && !(prot & PROT_WRITE) && file) ||
((flags & MAP_SHARED) && !(prot & PROT_WRITE) && file))
vm_flags |= VM_SHARED | VM_MAYSHARE;
vm_flags |= VM_MAYSHARE;
/* refuse to let anyone share files with this process if it's being traced -
* otherwise breakpoints set in it may interfere with another untraced process
*/
if (!chrdev && current->ptrace & PT_PTRACED)
if (current->ptrace & PT_PTRACED)
vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
}
else {
/* permit sharing of character devices at any time */
/* permit sharing of character devices and ramfs files at any time for
* anything other than a privately writable mapping
*/
if (!(flags & MAP_PRIVATE) || !(prot & PROT_WRITE)) {
vm_flags |= VM_MAYSHARE;
if (flags & MAP_SHARED)
vm_flags |= VM_SHARED;
}
}
/* allow the security API to have its say */
ret = security_file_mmap(file, prot, flags);
if (ret)
return ret;
/* we're going to need to record the mapping if it works */
vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
if (!vml)
goto error_getting_vml;
memset(vml, 0, sizeof(*vml));
/* if we want to share, we need to search for VMAs created by another mmap() call that
* overlap with our proposed mapping
* - we can only share with an exact match on regular files
* - shared mappings on character devices are permitted to overlap inexactly as far as we
* are concerned, but in that case, sharing is handled in the driver rather than here
*/
down_write(&nommu_vma_sem);
if (!chrdev && vm_flags & VM_SHARED) {
/* if we want to share, we need to search for VMAs created by another
* mmap() call that overlap with our proposed mapping
* - we can only share with an exact match on most regular files
* - shared mappings on character devices and memory backed files are
* permitted to overlap inexactly as far as we are concerned for in
* these cases, sharing is handled in the driver or filesystem rather
* than here
*/
if (vm_flags & VM_MAYSHARE) {
unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long vmpglen;
for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
vma = rb_entry(rb, struct vm_area_struct, vm_rb);
if (!(vma->vm_flags & VM_SHARED))
if (!(vma->vm_flags & VM_MAYSHARE))
continue;
/* search for overlapping mappings on the same file */
if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode)
continue;
......@@ -466,8 +527,9 @@ unsigned long do_mmap_pgoff(struct file *file,
if (pgoff >= vma->vm_pgoff + vmpglen)
continue;
/* handle inexact matches between mappings */
if (vmpglen != pglen || vma->vm_pgoff != pgoff) {
if (flags & MAP_SHARED)
if (!membacked)
goto sharing_violation;
continue;
}
......@@ -481,11 +543,13 @@ unsigned long do_mmap_pgoff(struct file *file,
}
}
vma = NULL;
/* obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space
* - this is the hook for quasi-memory character devices
*/
if (file && file->f_op && file->f_op->get_unmapped_area)
if (file && file->f_op->get_unmapped_area)
addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags);
if (IS_ERR((void *) addr)) {
......@@ -511,18 +575,12 @@ unsigned long do_mmap_pgoff(struct file *file,
vml->vma = vma;
/*
* determine the object being mapped and call the appropriate
* specific mapper.
/* determine the object being mapped and call the appropriate specific
* mapper.
*/
if (file) {
ret = -ENODEV;
if (!file->f_op)
goto error;
#ifdef MAGIC_ROM_PTR
/* First, try simpler routine designed to give us a ROM pointer. */
if (file->f_op->romptr && !(prot & PROT_WRITE)) {
ret = file->f_op->romptr(file, vma);
#ifdef DEBUG
......@@ -536,9 +594,9 @@ unsigned long do_mmap_pgoff(struct file *file,
goto error;
} else
#endif /* MAGIC_ROM_PTR */
/* Then try full mmap routine, which might return a RAM pointer,
or do something truly complicated. */
/* Then try full mmap routine, which might return a RAM
* pointer, or do something truly complicated
*/
if (file->f_op->mmap) {
ret = file->f_op->mmap(file, vma);
......@@ -556,11 +614,15 @@ unsigned long do_mmap_pgoff(struct file *file,
goto error;
}
/* An ENOSYS error indicates that mmap isn't possible (as opposed to
tried but failed) so we'll fall through to the copy. */
/* An ENOSYS error indicates that mmap isn't possible (as
* opposed to tried but failed) so we'll fall through to the
* copy. */
}
/* allocate some memory to hold the mapping */
/* allocate some memory to hold the mapping
* - note that this may not return a page-aligned address if the object
* we're allocating is smaller than a page
*/
ret = -ENOMEM;
result = kmalloc(len, GFP_KERNEL);
if (!result) {
......@@ -602,8 +664,10 @@ unsigned long do_mmap_pgoff(struct file *file,
flush_icache_range((unsigned long) result, (unsigned long) result + len);
done:
if (!(vma->vm_flags & VM_SHARED)) {
realalloc += kobjsize(result);
askedalloc += len;
}
realalloc += kobjsize(vma);
askedalloc += sizeof(*vma);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment