Commit e4cf8264 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] Fix vma corruption

It occurred to me that if vma and new_vma are one and the same, then
vma_relink_file will not do a good job of linking it after itself - in
that pretty unlikely case when move_page_tables fails.

And more generally, whenever copy_vma's vma_merge succeeds, we have no
guarantee that old vma comes before new_vma in the i_mmap lists, as we
need to satisfy Rajesh's point: that ordering is only guaranteed in the
newly allocated case.

We have to abandon the ordering method when/if we move from lists to
prio_trees, so this patch switches to the less glamorous use of
i_shared_sem exclusion, as in my prio_tree mremap.
parent 3ba9ac07
...@@ -526,9 +526,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid); ...@@ -526,9 +526,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *); struct rb_node **, struct rb_node *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct *, extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, unsigned long pgoff); unsigned long addr, unsigned long len, unsigned long pgoff);
extern void vma_relink_file(struct vm_area_struct *, struct vm_area_struct *);
extern void exit_mmap(struct mm_struct *); extern void exit_mmap(struct mm_struct *);
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
......
...@@ -1498,9 +1498,11 @@ void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) ...@@ -1498,9 +1498,11 @@ void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
* Copy the vma structure to a new location in the same mm, * Copy the vma structure to a new location in the same mm,
* prior to moving page table entries, to effect an mremap move. * prior to moving page table entries, to effect an mremap move.
*/ */
struct vm_area_struct *copy_vma(struct vm_area_struct *vma, struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
unsigned long addr, unsigned long len, unsigned long pgoff) unsigned long addr, unsigned long len, unsigned long pgoff)
{ {
struct vm_area_struct *vma = *vmap;
unsigned long vma_start = vma->vm_start;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma, *prev; struct vm_area_struct *new_vma, *prev;
struct rb_node **rb_link, *rb_parent; struct rb_node **rb_link, *rb_parent;
...@@ -1508,7 +1510,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct *vma, ...@@ -1508,7 +1510,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct *vma,
find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
new_vma = vma_merge(mm, prev, rb_parent, addr, addr + len, new_vma = vma_merge(mm, prev, rb_parent, addr, addr + len,
vma->vm_flags, vma->vm_file, pgoff); vma->vm_flags, vma->vm_file, pgoff);
if (!new_vma) { if (new_vma) {
/*
* Source vma may have been merged into new_vma
*/
if (vma_start >= new_vma->vm_start &&
vma_start < new_vma->vm_end)
*vmap = new_vma;
} else {
new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (new_vma) { if (new_vma) {
*new_vma = *vma; *new_vma = *vma;
...@@ -1525,24 +1534,3 @@ struct vm_area_struct *copy_vma(struct vm_area_struct *vma, ...@@ -1525,24 +1534,3 @@ struct vm_area_struct *copy_vma(struct vm_area_struct *vma,
} }
return new_vma; return new_vma;
} }
/*
* Position vma after prev in shared file list:
* for mremap move error recovery racing against vmtruncate.
*/
void vma_relink_file(struct vm_area_struct *vma, struct vm_area_struct *prev)
{
struct mm_struct *mm = vma->vm_mm;
struct address_space *mapping;
if (vma->vm_file) {
mapping = vma->vm_file->f_mapping;
if (mapping) {
down(&mapping->i_shared_sem);
spin_lock(&mm->page_table_lock);
list_move(&vma->shared, &prev->shared);
spin_unlock(&mm->page_table_lock);
up(&mapping->i_shared_sem);
}
}
}
...@@ -169,6 +169,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, ...@@ -169,6 +169,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long new_len, unsigned long new_addr) unsigned long new_len, unsigned long new_addr)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct address_space *mapping = NULL;
struct vm_area_struct *new_vma; struct vm_area_struct *new_vma;
unsigned long vm_flags = vma->vm_flags; unsigned long vm_flags = vma->vm_flags;
unsigned long new_pgoff; unsigned long new_pgoff;
...@@ -184,30 +185,35 @@ static unsigned long move_vma(struct vm_area_struct *vma, ...@@ -184,30 +185,35 @@ static unsigned long move_vma(struct vm_area_struct *vma,
return -ENOMEM; return -ENOMEM;
new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
new_vma = copy_vma(vma, new_addr, new_len, new_pgoff); new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
if (!new_vma) if (!new_vma)
return -ENOMEM; return -ENOMEM;
if (vma->vm_file) {
/*
* Subtle point from Rajesh Venkatasubramanian: before
* moving file-based ptes, we must lock vmtruncate out,
* since it might clean the dst vma before the src vma,
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
down(&mapping->i_shared_sem);
}
moved_len = move_page_tables(vma, new_addr, old_addr, old_len); moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
if (moved_len < old_len) { if (moved_len < old_len) {
/* /*
* On error, move entries back from new area to old, * On error, move entries back from new area to old,
* which will succeed since page tables still there, * which will succeed since page tables still there,
* and then proceed to unmap new area instead of old. * and then proceed to unmap new area instead of old.
*
* Subtle point from Rajesh Venkatasubramanian: before
* moving file-based ptes, move new_vma before old vma
* in the i_mmap or i_mmap_shared list, so when racing
* against vmtruncate we cannot propagate pages to be
* truncated back from new_vma into just cleaned old.
*/ */
vma_relink_file(vma, new_vma);
move_page_tables(new_vma, old_addr, new_addr, moved_len); move_page_tables(new_vma, old_addr, new_addr, moved_len);
vma = new_vma; vma = new_vma;
old_len = new_len; old_len = new_len;
old_addr = new_addr; old_addr = new_addr;
new_addr = -ENOMEM; new_addr = -ENOMEM;
} }
if (mapping)
up(&mapping->i_shared_sem);
/* Conceal VM_ACCOUNT so old reservation is not undone */ /* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) { if (vm_flags & VM_ACCOUNT) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment