Commit f5f20ef0 authored by David Howells's avatar David Howells Committed by Linus Torvalds

[PATCH] Permit nommu MAP_SHARED of memory backed files

The attached patch applies some further fixes and extensions to the nommu mmap
implementation:

 (1) /proc/maps distinguishes shareable private mappings and real shared
     mappings by marking the former with 's' and the latter with 'S'.

 (2) Rearrange and optimise the checking portion of do_mmap_pgoff() to make it
     easier to follow.

 (3) Only set VM_SHARED on MAP_SHARED mappings. Its presence indicates that the
     backing memory is supplied by the underlying file or chardev.

     VM_MAYSHARE indicates that a VMA may be shared if it's a private VMA. The
     memory for a private VMA is allocated by do_mmap_pgoff() from a kmalloc
     slab and then the file contents are read into it before returning.

 (4) Permit MAP_SHARED + PROT_WRITE on memory-backed files[*] and chardevs to
     indicate a contiguous area of memory when its get_unmapped_area() is
     called if the backing fs/chardev is willing.

     [*] file->f_mapping->backing_dev_info->memory_backed == 1

 (5) Require chardevs and files that support to provide a get_unmapped_area()
     file operation.

 (6) Made sure a private mapping of /dev/zero is possible. Shared mappings of
     /dev/zero are not currently supported because this'd need greater
     interaction of mmap with the chardev driver than is currently supported.

 (7) Add in some extra checks from mm/mmap.c: security, file having write
     access for a writable shared mapping, file not being in append mode.

 (8) Only account the mapping memory if it's allocated here; memory belonging
     to a shared chardev or file is not accounted.

With this patch it should be possible to map contiguous flash files directly
out of ROM simply by providing get_unmapped_area() for a read-only/shared
mapping.

I think that it might be worth splitting do_mmap_pgoff() up into smaller
subfunctions: one to handle the checking, one to handle shared mappings and
one to handle private mappings.
Signed-Off-By: default avatarDavid Howells <dhowells@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 6fc96ef2
...@@ -62,7 +62,7 @@ static int nommu_vma_list_show(struct seq_file *m, void *v) ...@@ -62,7 +62,7 @@ static int nommu_vma_list_show(struct seq_file *m, void *v)
flags & VM_READ ? 'r' : '-', flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-', flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-', flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? 's' : 'p', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
vma->vm_pgoff << PAGE_SHIFT, vma->vm_pgoff << PAGE_SHIFT,
MAJOR(dev), MINOR(dev), ino, &len); MAJOR(dev), MINOR(dev), ino, &len);
......
...@@ -21,6 +21,9 @@ ...@@ -21,6 +21,9 @@
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/mount.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -372,31 +375,15 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -372,31 +375,15 @@ unsigned long do_mmap_pgoff(struct file *file,
struct rb_node *rb; struct rb_node *rb;
unsigned int vm_flags; unsigned int vm_flags;
void *result; void *result;
int ret, chrdev; int ret, membacked;
/*
* Get the !CONFIG_MMU specific checks done first
*/
chrdev = 0;
if (file)
chrdev = S_ISCHR(file->f_dentry->d_inode->i_mode);
if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && file && !chrdev) {
printk("MAP_SHARED not completely supported (cannot detect page dirtying)\n");
return -EINVAL;
}
/* do the simple checks first */
if (flags & MAP_FIXED || addr) { if (flags & MAP_FIXED || addr) {
/* printk("can't do fixed-address/overlay mmap of RAM\n"); */ printk(KERN_DEBUG "%d: Can't do fixed-address/overlay mmap of RAM\n",
current->pid);
return -EINVAL; return -EINVAL;
} }
/*
* now all the standard checks
*/
if (file && (!file->f_op || !file->f_op->mmap))
return -ENODEV;
if (PAGE_ALIGN(len) == 0) if (PAGE_ALIGN(len) == 0)
return addr; return addr;
...@@ -407,55 +394,129 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -407,55 +394,129 @@ unsigned long do_mmap_pgoff(struct file *file,
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
return -EINVAL; return -EINVAL;
/* we're going to need to record the mapping if it works */ /* validate file mapping requests */
vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL); membacked = 0;
if (!vml) if (file) {
goto error_getting_vml; /* files must support mmap */
memset(vml, 0, sizeof(*vml)); if (!file->f_op || !file->f_op->mmap)
return -ENODEV;
if ((prot & PROT_EXEC) &&
(file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
return -EPERM;
/* work out if what we've got could possibly be shared
* - we support chardevs that provide their own "memory"
* - we support files/blockdevs that are memory backed
*/
if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
membacked = 1;
}
else {
struct address_space *mapping = file->f_mapping;
if (!mapping)
mapping = file->f_dentry->d_inode->i_mapping;
if (mapping && mapping->backing_dev_info)
membacked = mapping->backing_dev_info->memory_backed;
}
if (flags & MAP_SHARED) {
/* do checks for writing, appending and locking */
if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
return -EACCES;
if (IS_APPEND(file->f_dentry->d_inode) &&
(file->f_mode & FMODE_WRITE))
return -EACCES;
if (locks_verify_locked(file->f_dentry->d_inode))
return -EAGAIN;
/* Do simple checking here so the lower-level routines won't have if (!membacked) {
printk("MAP_SHARED not completely supported on !MMU\n");
return -EINVAL;
}
/* we require greater support from the driver or
* filesystem - we ask it to tell us what memory to
* use */
if (!file->f_op->get_unmapped_area)
return -ENODEV;
}
else {
/* we read private files into memory we allocate */
if (!file->f_op->read)
return -ENODEV;
}
}
/* handle PROT_EXEC implication by PROT_READ */
if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)))
prot |= PROT_EXEC;
/* do simple checking here so the lower-level routines won't have
* to. we assume access permissions have been handled by the open * to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here. * of the memory object, so we don't do any here.
*/ */
vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */ vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */
| VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (!chrdev) { if (!membacked) {
/* share any file segment that's mapped read-only */ /* share any file segment that's mapped read-only */
if (((flags & MAP_PRIVATE) && !(prot & PROT_WRITE) && file) || if (((flags & MAP_PRIVATE) && !(prot & PROT_WRITE) && file) ||
((flags & MAP_SHARED) && !(prot & PROT_WRITE) && file)) ((flags & MAP_SHARED) && !(prot & PROT_WRITE) && file))
vm_flags |= VM_SHARED | VM_MAYSHARE; vm_flags |= VM_MAYSHARE;
/* refuse to let anyone share files with this process if it's being traced - /* refuse to let anyone share files with this process if it's being traced -
* otherwise breakpoints set in it may interfere with another untraced process * otherwise breakpoints set in it may interfere with another untraced process
*/ */
if (!chrdev && current->ptrace & PT_PTRACED) if (current->ptrace & PT_PTRACED)
vm_flags &= ~(VM_SHARED | VM_MAYSHARE); vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
} }
else { else {
/* permit sharing of character devices at any time */ /* permit sharing of character devices and ramfs files at any time for
* anything other than a privately writable mapping
*/
if (!(flags & MAP_PRIVATE) || !(prot & PROT_WRITE)) {
vm_flags |= VM_MAYSHARE; vm_flags |= VM_MAYSHARE;
if (flags & MAP_SHARED) if (flags & MAP_SHARED)
vm_flags |= VM_SHARED; vm_flags |= VM_SHARED;
} }
}
/* allow the security API to have its say */
ret = security_file_mmap(file, prot, flags);
if (ret)
return ret;
/* we're going to need to record the mapping if it works */
vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
if (!vml)
goto error_getting_vml;
memset(vml, 0, sizeof(*vml));
/* if we want to share, we need to search for VMAs created by another mmap() call that
* overlap with our proposed mapping
* - we can only share with an exact match on regular files
* - shared mappings on character devices are permitted to overlap inexactly as far as we
* are concerned, but in that case, sharing is handled in the driver rather than here
*/
down_write(&nommu_vma_sem); down_write(&nommu_vma_sem);
if (!chrdev && vm_flags & VM_SHARED) {
/* if we want to share, we need to search for VMAs created by another
* mmap() call that overlap with our proposed mapping
* - we can only share with an exact match on most regular files
* - shared mappings on character devices and memory backed files are
* permitted to overlap inexactly as far as we are concerned for in
* these cases, sharing is handled in the driver or filesystem rather
* than here
*/
if (vm_flags & VM_MAYSHARE) {
unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long vmpglen; unsigned long vmpglen;
for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
vma = rb_entry(rb, struct vm_area_struct, vm_rb); vma = rb_entry(rb, struct vm_area_struct, vm_rb);
if (!(vma->vm_flags & VM_SHARED)) if (!(vma->vm_flags & VM_MAYSHARE))
continue; continue;
/* search for overlapping mappings on the same file */
if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode) if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode)
continue; continue;
...@@ -466,8 +527,9 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -466,8 +527,9 @@ unsigned long do_mmap_pgoff(struct file *file,
if (pgoff >= vma->vm_pgoff + vmpglen) if (pgoff >= vma->vm_pgoff + vmpglen)
continue; continue;
/* handle inexact matches between mappings */
if (vmpglen != pglen || vma->vm_pgoff != pgoff) { if (vmpglen != pglen || vma->vm_pgoff != pgoff) {
if (flags & MAP_SHARED) if (!membacked)
goto sharing_violation; goto sharing_violation;
continue; continue;
} }
...@@ -481,11 +543,13 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -481,11 +543,13 @@ unsigned long do_mmap_pgoff(struct file *file,
} }
} }
vma = NULL;
/* obtain the address to map to. we verify (or select) it and ensure /* obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space * that it represents a valid section of the address space
* - this is the hook for quasi-memory character devices * - this is the hook for quasi-memory character devices
*/ */
if (file && file->f_op && file->f_op->get_unmapped_area) if (file && file->f_op->get_unmapped_area)
addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags); addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags);
if (IS_ERR((void *) addr)) { if (IS_ERR((void *) addr)) {
...@@ -511,18 +575,12 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -511,18 +575,12 @@ unsigned long do_mmap_pgoff(struct file *file,
vml->vma = vma; vml->vma = vma;
/* /* determine the object being mapped and call the appropriate specific
* determine the object being mapped and call the appropriate * mapper.
* specific mapper.
*/ */
if (file) { if (file) {
ret = -ENODEV;
if (!file->f_op)
goto error;
#ifdef MAGIC_ROM_PTR #ifdef MAGIC_ROM_PTR
/* First, try simpler routine designed to give us a ROM pointer. */ /* First, try simpler routine designed to give us a ROM pointer. */
if (file->f_op->romptr && !(prot & PROT_WRITE)) { if (file->f_op->romptr && !(prot & PROT_WRITE)) {
ret = file->f_op->romptr(file, vma); ret = file->f_op->romptr(file, vma);
#ifdef DEBUG #ifdef DEBUG
...@@ -536,9 +594,9 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -536,9 +594,9 @@ unsigned long do_mmap_pgoff(struct file *file,
goto error; goto error;
} else } else
#endif /* MAGIC_ROM_PTR */ #endif /* MAGIC_ROM_PTR */
/* Then try full mmap routine, which might return a RAM pointer, /* Then try full mmap routine, which might return a RAM
or do something truly complicated. */ * pointer, or do something truly complicated
*/
if (file->f_op->mmap) { if (file->f_op->mmap) {
ret = file->f_op->mmap(file, vma); ret = file->f_op->mmap(file, vma);
...@@ -556,11 +614,15 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -556,11 +614,15 @@ unsigned long do_mmap_pgoff(struct file *file,
goto error; goto error;
} }
/* An ENOSYS error indicates that mmap isn't possible (as opposed to /* An ENOSYS error indicates that mmap isn't possible (as
tried but failed) so we'll fall through to the copy. */ * opposed to tried but failed) so we'll fall through to the
* copy. */
} }
/* allocate some memory to hold the mapping */ /* allocate some memory to hold the mapping
* - note that this may not return a page-aligned address if the object
* we're allocating is smaller than a page
*/
ret = -ENOMEM; ret = -ENOMEM;
result = kmalloc(len, GFP_KERNEL); result = kmalloc(len, GFP_KERNEL);
if (!result) { if (!result) {
...@@ -602,8 +664,10 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -602,8 +664,10 @@ unsigned long do_mmap_pgoff(struct file *file,
flush_icache_range((unsigned long) result, (unsigned long) result + len); flush_icache_range((unsigned long) result, (unsigned long) result + len);
done: done:
if (!(vma->vm_flags & VM_SHARED)) {
realalloc += kobjsize(result); realalloc += kobjsize(result);
askedalloc += len; askedalloc += len;
}
realalloc += kobjsize(vma); realalloc += kobjsize(vma);
askedalloc += sizeof(*vma); askedalloc += sizeof(*vma);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment