Commit c7a491f0 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 20 i_mmap_shared into i_mmap

From: Hugh Dickins <hugh@veritas.com>

Why should struct address_space have separate i_mmap and i_mmap_shared
prio_trees (separating !VM_SHARED and VM_SHARED vmas)?  No good reason, the
same processing is usually needed on both.  Merge i_mmap_shared into i_mmap,
but keep i_mmap_writable count of VM_SHARED vmas (those capable of dirtying
the underlying file) for the mapping_writably_mapped test.

The VM_MAYSHARE test in the arm and parisc loops is not necessarily what they
will want to use in the end: it's provided as a harmless example of what might
be appropriate, but maintainers are likely to revise it later (that parisc
loop is currently being changed in the parisc tree anyway).

On the way, remove the now out-of-date comments on vm_area_struct size.
parent b1efdc30
...@@ -322,10 +322,10 @@ maps this page at its virtual address. ...@@ -322,10 +322,10 @@ maps this page at its virtual address.
about doing this. about doing this.
The idea is, first at flush_dcache_page() time, if The idea is, first at flush_dcache_page() time, if
page->mapping->i_mmap{,_shared} are empty lists, just mark the page->mapping->i_mmap is an empty tree and ->i_mmap_nonlinear
architecture private page flag bit. Later, in an empty list, just mark the architecture private page flag bit.
update_mmu_cache(), a check is made of this flag bit, and if Later, in update_mmu_cache(), a check is made of this flag bit,
set the flush is done and the flag bit is cleared. and if set the flush is done and the flag bit is cleared.
IMPORTANT NOTE: It is often important, if you defer the flush, IMPORTANT NOTE: It is often important, if you defer the flush,
that the actual flush occurs on the same CPU that the actual flush occurs on the same CPU
......
...@@ -94,13 +94,15 @@ void __flush_dcache_page(struct page *page) ...@@ -94,13 +94,15 @@ void __flush_dcache_page(struct page *page)
* and invalidate any user data. * and invalidate any user data.
*/ */
pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap_shared, while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap,
&iter, pgoff, pgoff)) != NULL) { &iter, pgoff, pgoff)) != NULL) {
/* /*
* If this VMA is not in our MM, we can ignore it. * If this VMA is not in our MM, we can ignore it.
*/ */
if (mpnt->vm_mm != mm) if (mpnt->vm_mm != mm)
continue; continue;
if (!(mpnt->vm_flags & VM_MAYSHARE))
continue;
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
flush_cache_page(mpnt, mpnt->vm_start + offset); flush_cache_page(mpnt, mpnt->vm_start + offset);
} }
...@@ -127,7 +129,7 @@ make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page, ...@@ -127,7 +129,7 @@ make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page,
* space, then we need to handle them specially to maintain * space, then we need to handle them specially to maintain
* cache coherency. * cache coherency.
*/ */
while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap_shared, while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap,
&iter, pgoff, pgoff)) != NULL) { &iter, pgoff, pgoff)) != NULL) {
/* /*
* If this VMA is not in our MM, we can ignore it. * If this VMA is not in our MM, we can ignore it.
...@@ -136,6 +138,8 @@ make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page, ...@@ -136,6 +138,8 @@ make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page,
*/ */
if (mpnt->vm_mm != mm || mpnt == vma) if (mpnt->vm_mm != mm || mpnt == vma)
continue; continue;
if (!(mpnt->vm_flags & VM_MAYSHARE))
continue;
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
aliases += adjust_pte(mpnt, mpnt->vm_start + offset); aliases += adjust_pte(mpnt, mpnt->vm_start + offset);
} }
......
...@@ -244,46 +244,24 @@ void __flush_dcache_page(struct page *page) ...@@ -244,46 +244,24 @@ void __flush_dcache_page(struct page *page)
pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
/* We have ensured in arch_get_unmapped_area() that all shared /* We have carefully arranged in arch_get_unmapped_area() that
* mappings are mapped at equivalent addresses, so we only need
* to flush one for them all to become coherent */
while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap_shared,
&iter, pgoff, pgoff)) != NULL) {
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset;
/* flush instructions produce non access tlb misses.
* On PA, we nullify these instructions rather than
* taking a page fault if the pte doesn't exist, so we
* have to find a congruent address with an existing
* translation */
if (!translation_exists(mpnt, addr))
continue;
__flush_cache_page(mpnt, addr);
/* If we find an address to flush, that will also
* bring all the private mappings up to date (see
* comment below) */
return;
}
/* we have carefully arranged in arch_get_unmapped_area() that
* *any* mappings of a file are always congruently mapped (whether * *any* mappings of a file are always congruently mapped (whether
* declared as MAP_PRIVATE or MAP_SHARED), so we only need * declared as MAP_PRIVATE or MAP_SHARED), so we only need
* to flush one address here too */ * to flush one address here for them all to become coherent */
while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap, while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap,
&iter, pgoff, pgoff)) != NULL) { &iter, pgoff, pgoff)) != NULL) {
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset; addr = mpnt->vm_start + offset;
/* This is just for speed. If the page translation isn't /* Flush instructions produce non access tlb misses.
* there there's no point exciting the nadtlb handler into * On PA, we nullify these instructions rather than
* a nullification frenzy */ * taking a page fault if the pte doesn't exist.
if(!translation_exists(mpnt, addr)) * This is just for speed. If the page translation
* isn't there, there's no point exciting the
* nadtlb handler into a nullification frenzy */
if (!translation_exists(mpnt, addr))
continue; continue;
__flush_cache_page(mpnt, addr); __flush_cache_page(mpnt, addr);
......
...@@ -266,7 +266,7 @@ static void hugetlbfs_drop_inode(struct inode *inode) ...@@ -266,7 +266,7 @@ static void hugetlbfs_drop_inode(struct inode *inode)
* h_pgoff is in HPAGE_SIZE units. * h_pgoff is in HPAGE_SIZE units.
* vma->vm_pgoff is in PAGE_SIZE units. * vma->vm_pgoff is in PAGE_SIZE units.
*/ */
static void static inline void
hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
{ {
struct vm_area_struct *vma = NULL; struct vm_area_struct *vma = NULL;
...@@ -312,8 +312,6 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) ...@@ -312,8 +312,6 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
if (!prio_tree_empty(&mapping->i_mmap)) if (!prio_tree_empty(&mapping->i_mmap))
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
if (!prio_tree_empty(&mapping->i_mmap_shared))
hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff);
spin_unlock(&mapping->i_mmap_lock); spin_unlock(&mapping->i_mmap_lock);
truncate_hugepages(mapping, offset); truncate_hugepages(mapping, offset);
return 0; return 0;
......
...@@ -201,7 +201,6 @@ void inode_init_once(struct inode *inode) ...@@ -201,7 +201,6 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_data.private_list); INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock); spin_lock_init(&inode->i_data.private_lock);
INIT_PRIO_TREE_ROOT(&inode->i_data.i_mmap); INIT_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
INIT_PRIO_TREE_ROOT(&inode->i_data.i_mmap_shared);
INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
spin_lock_init(&inode->i_lock); spin_lock_init(&inode->i_lock);
i_size_ordered_init(inode); i_size_ordered_init(inode);
......
...@@ -331,9 +331,9 @@ struct address_space { ...@@ -331,9 +331,9 @@ struct address_space {
pgoff_t writeback_index;/* writeback starts here */ pgoff_t writeback_index;/* writeback starts here */
struct address_space_operations *a_ops; /* methods */ struct address_space_operations *a_ops; /* methods */
struct prio_tree_root i_mmap; /* tree of private mappings */ struct prio_tree_root i_mmap; /* tree of private mappings */
struct prio_tree_root i_mmap_shared; /* tree of shared mappings */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */
struct list_head i_mmap_nonlinear;/*list of nonlinear mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
spinlock_t i_mmap_lock; /* protect trees & list above */ spinlock_t i_mmap_lock; /* protect tree, count, list */
atomic_t truncate_count; /* Cover race condition with truncate */ atomic_t truncate_count; /* Cover race condition with truncate */
unsigned long flags; /* error bits/gfp mask */ unsigned long flags; /* error bits/gfp mask */
struct backing_dev_info *backing_dev_info; /* device readahead, etc */ struct backing_dev_info *backing_dev_info; /* device readahead, etc */
...@@ -383,20 +383,18 @@ int mapping_tagged(struct address_space *mapping, int tag); ...@@ -383,20 +383,18 @@ int mapping_tagged(struct address_space *mapping, int tag);
static inline int mapping_mapped(struct address_space *mapping) static inline int mapping_mapped(struct address_space *mapping)
{ {
return !prio_tree_empty(&mapping->i_mmap) || return !prio_tree_empty(&mapping->i_mmap) ||
!prio_tree_empty(&mapping->i_mmap_shared) ||
!list_empty(&mapping->i_mmap_nonlinear); !list_empty(&mapping->i_mmap_nonlinear);
} }
/* /*
* Might pages of this file have been modified in userspace? * Might pages of this file have been modified in userspace?
* Note that i_mmap_shared holds all the VM_SHARED vmas: do_mmap_pgoff * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
* marks vma as VM_SHARED if it is shared, and the file was opened for * marks vma as VM_SHARED if it is shared, and the file was opened for
* writing i.e. vma may be mprotected writable even if now readonly. * writing i.e. vma may be mprotected writable even if now readonly.
*/ */
static inline int mapping_writably_mapped(struct address_space *mapping) static inline int mapping_writably_mapped(struct address_space *mapping)
{ {
return !prio_tree_empty(&mapping->i_mmap_shared) || return mapping->i_mmap_writable != 0;
!list_empty(&mapping->i_mmap_nonlinear);
} }
/* /*
......
...@@ -64,7 +64,9 @@ struct vm_area_struct { ...@@ -64,7 +64,9 @@ struct vm_area_struct {
/* /*
* For areas with an address space and backing store, * For areas with an address space and backing store,
* one of the address_space->i_mmap{,shared} trees. * linkage into the address_space->i_mmap prio tree, or
* linkage to the list of like vmas hanging off its node, or
* linkage of vma in the address_space->i_mmap_nonlinear list.
*/ */
union { union {
struct { struct {
......
...@@ -203,7 +203,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, ...@@ -203,7 +203,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
mapping = vma->vm_file->f_mapping; mapping = vma->vm_file->f_mapping;
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
vma->vm_flags |= VM_NONLINEAR; vma->vm_flags |= VM_NONLINEAR;
vma_prio_tree_remove(vma, &mapping->i_mmap_shared); vma_prio_tree_remove(vma, &mapping->i_mmap);
vma_prio_tree_init(vma); vma_prio_tree_init(vma);
list_add_tail(&vma->shared.vm_set.list, list_add_tail(&vma->shared.vm_set.list,
&mapping->i_mmap_nonlinear); &mapping->i_mmap_nonlinear);
......
...@@ -1107,7 +1107,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -1107,7 +1107,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
/* /*
* Helper function for unmap_mapping_range(). * Helper function for unmap_mapping_range().
*/ */
static void unmap_mapping_range_list(struct prio_tree_root *root, static inline void unmap_mapping_range_list(struct prio_tree_root *root,
struct zap_details *details) struct zap_details *details)
{ {
struct vm_area_struct *vma = NULL; struct vm_area_struct *vma = NULL;
...@@ -1177,12 +1177,6 @@ void unmap_mapping_range(struct address_space *mapping, ...@@ -1177,12 +1177,6 @@ void unmap_mapping_range(struct address_space *mapping,
if (unlikely(!prio_tree_empty(&mapping->i_mmap))) if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
unmap_mapping_range_list(&mapping->i_mmap, &details); unmap_mapping_range_list(&mapping->i_mmap, &details);
/* Don't waste time to check mapping on fully shared vmas */
details.check_mapping = NULL;
if (unlikely(!prio_tree_empty(&mapping->i_mmap_shared)))
unmap_mapping_range_list(&mapping->i_mmap_shared, &details);
/* /*
* In nonlinear VMAs there is no correspondence between virtual address * In nonlinear VMAs there is no correspondence between virtual address
* offset and file offset. So we must perform an exhaustive search * offset and file offset. So we must perform an exhaustive search
......
...@@ -71,11 +71,11 @@ static inline void __remove_shared_vm_struct(struct vm_area_struct *vma, ...@@ -71,11 +71,11 @@ static inline void __remove_shared_vm_struct(struct vm_area_struct *vma,
{ {
if (vma->vm_flags & VM_DENYWRITE) if (vma->vm_flags & VM_DENYWRITE)
atomic_inc(&file->f_dentry->d_inode->i_writecount); atomic_inc(&file->f_dentry->d_inode->i_writecount);
if (vma->vm_flags & VM_SHARED)
mapping->i_mmap_writable--;
if (unlikely(vma->vm_flags & VM_NONLINEAR)) if (unlikely(vma->vm_flags & VM_NONLINEAR))
list_del_init(&vma->shared.vm_set.list); list_del_init(&vma->shared.vm_set.list);
else if (vma->vm_flags & VM_SHARED)
vma_prio_tree_remove(vma, &mapping->i_mmap_shared);
else else
vma_prio_tree_remove(vma, &mapping->i_mmap); vma_prio_tree_remove(vma, &mapping->i_mmap);
} }
...@@ -263,12 +263,12 @@ static inline void __vma_link_file(struct vm_area_struct *vma) ...@@ -263,12 +263,12 @@ static inline void __vma_link_file(struct vm_area_struct *vma)
if (vma->vm_flags & VM_DENYWRITE) if (vma->vm_flags & VM_DENYWRITE)
atomic_dec(&file->f_dentry->d_inode->i_writecount); atomic_dec(&file->f_dentry->d_inode->i_writecount);
if (vma->vm_flags & VM_SHARED)
mapping->i_mmap_writable++;
if (unlikely(vma->vm_flags & VM_NONLINEAR)) if (unlikely(vma->vm_flags & VM_NONLINEAR))
list_add_tail(&vma->shared.vm_set.list, list_add_tail(&vma->shared.vm_set.list,
&mapping->i_mmap_nonlinear); &mapping->i_mmap_nonlinear);
else if (vma->vm_flags & VM_SHARED)
vma_prio_tree_insert(vma, &mapping->i_mmap_shared);
else else
vma_prio_tree_insert(vma, &mapping->i_mmap); vma_prio_tree_insert(vma, &mapping->i_mmap);
} }
...@@ -308,8 +308,8 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -308,8 +308,8 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
} }
/* /*
* Insert vm structure into process list sorted by address and into the inode's * Insert vm structure into process list sorted by address and into the
* i_mmap ring. The caller should hold mm->page_table_lock and * inode's i_mmap tree. The caller should hold mm->page_table_lock and
* ->f_mappping->i_mmap_lock if vm_file is non-NULL. * ->f_mappping->i_mmap_lock if vm_file is non-NULL.
*/ */
static void static void
...@@ -328,8 +328,8 @@ __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) ...@@ -328,8 +328,8 @@ __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
} }
/* /*
* We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that is * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
* already present in an i_mmap{_shared} tree without adjusting the tree. * is already present in an i_mmap tree without adjusting the tree.
* The following helper function should be used when such adjustments * The following helper function should be used when such adjustments
* are necessary. The "next" vma (if any) is to be removed or inserted * are necessary. The "next" vma (if any) is to be removed or inserted
* before we drop the necessary locks. * before we drop the necessary locks.
...@@ -344,10 +344,8 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start, ...@@ -344,10 +344,8 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start,
if (file) { if (file) {
mapping = file->f_mapping; mapping = file->f_mapping;
if (!(vma->vm_flags & VM_SHARED)) if (!(vma->vm_flags & VM_NONLINEAR))
root = &mapping->i_mmap; root = &mapping->i_mmap;
else if (!(vma->vm_flags & VM_NONLINEAR))
root = &mapping->i_mmap_shared;
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
} }
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
...@@ -1516,7 +1514,7 @@ void exit_mmap(struct mm_struct *mm) ...@@ -1516,7 +1514,7 @@ void exit_mmap(struct mm_struct *mm)
} }
/* Insert vm structure into process list sorted by address /* Insert vm structure into process list sorted by address
* and into the inode's i_mmap ring. If vm_file is non-NULL * and into the inode's i_mmap tree. If vm_file is non-NULL
* then i_mmap_lock is taken here. * then i_mmap_lock is taken here.
*/ */
void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
......
/* /*
* mm/prio_tree.c - priority search tree for mapping->i_mmap{,_shared} * mm/prio_tree.c - priority search tree for mapping->i_mmap
* *
* Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu> * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
* *
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
*/ */
/* /*
* The following macros are used for implementing prio_tree for i_mmap{_shared} * The following macros are used for implementing prio_tree for i_mmap
*/ */
#define RADIX_INDEX(vma) ((vma)->vm_pgoff) #define RADIX_INDEX(vma) ((vma)->vm_pgoff)
...@@ -491,7 +491,7 @@ static struct prio_tree_node *prio_tree_next(struct prio_tree_root *root, ...@@ -491,7 +491,7 @@ static struct prio_tree_node *prio_tree_next(struct prio_tree_root *root,
} }
/* /*
* Radix priority search tree for address_space->i_mmap_{_shared} * Radix priority search tree for address_space->i_mmap
* *
* For each vma that map a unique set of file pages i.e., unique [radix_index, * For each vma that map a unique set of file pages i.e., unique [radix_index,
* heap_index] value, we have a corresponing priority search tree node. If * heap_index] value, we have a corresponing priority search tree node. If
......
...@@ -328,21 +328,6 @@ static inline int page_referenced_file(struct page *page) ...@@ -328,21 +328,6 @@ static inline int page_referenced_file(struct page *page)
} }
} }
while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap_shared,
&iter, pgoff, pgoff)) != NULL) {
if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) {
referenced++;
goto out;
}
if (vma->vm_mm->rss) {
address = vma_address(vma, pgoff);
referenced += page_referenced_one(page,
vma->vm_mm, address, &mapcount, &failed);
if (!mapcount)
goto out;
}
}
if (list_empty(&mapping->i_mmap_nonlinear)) if (list_empty(&mapping->i_mmap_nonlinear))
WARN_ON(!failed); WARN_ON(!failed);
out: out:
...@@ -745,17 +730,6 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -745,17 +730,6 @@ static inline int try_to_unmap_file(struct page *page)
} }
} }
while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap_shared,
&iter, pgoff, pgoff)) != NULL) {
if (vma->vm_mm->rss) {
address = vma_address(vma, pgoff);
ret = try_to_unmap_one(page,
vma->vm_mm, address, &mapcount, vma);
if (ret == SWAP_FAIL || !mapcount)
goto out;
}
}
if (list_empty(&mapping->i_mmap_nonlinear)) if (list_empty(&mapping->i_mmap_nonlinear))
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment