Commit bc3d0059 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] vma corruption fix

From: Hugh Dickins <hugh@veritas.com>

Fixes bugzilla #2219

fork's dup_mmap leaves child mm_rb as copied from parent mm while doing all
the copy_page_ranges, and then calls build_mmap_rb without holding
page_table_lock.

try_to_unmap_one's find_vma (holding page_table_lock not mmap_sem) coming
on another cpu may cause mm mayhem.  It may leave the child's mmap_cache
pointing to a vma of the parent mm.

When the parent exits and the child faults, quite what happens rather
depends on what junk then inhabits vm_page_prot, which gets set in the page
table, with page_add_rmap adding the ptep, but junk pte likely to fail the
tests for page_remove_rmap.

Eventually the child exits, the page table is freed and try_to_unmap_one
oopses on null ptep_to_mm (but in a kernel with rss limiting, usually
page_referenced hits the null ptep_to_mm first).

This took me days and days to unravel!  Big thanks to Matthieu for
reporting it with a good test case.
parent ecad96d5
......@@ -530,7 +530,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
/* mmap.c */
extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void build_mmap_rb(struct mm_struct *);
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *);
extern void exit_mmap(struct mm_struct *);
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
......
......@@ -265,6 +265,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
{
struct vm_area_struct * mpnt, *tmp, **pprev;
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge = 0;
......@@ -277,6 +278,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
mm->map_count = 0;
mm->rss = 0;
cpus_clear(mm->cpu_vm_mask);
mm->mm_rb = RB_ROOT;
rb_link = &mm->mm_rb.rb_node;
rb_parent = NULL;
pprev = &mm->mmap;
/*
......@@ -324,11 +328,17 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
/*
* Link in the new vma and copy the page table entries:
* link in first so that swapoff can see swap entries.
* link in first so that swapoff can see swap entries,
* and try_to_unmap_one's find_vma find the new vma.
*/
spin_lock(&mm->page_table_lock);
*pprev = tmp;
pprev = &tmp->vm_next;
__vma_link_rb(mm, tmp, rb_link, rb_parent);
rb_link = &tmp->vm_rb.rb_right;
rb_parent = &tmp->vm_rb;
mm->map_count++;
retval = copy_page_range(mm, current->mm, tmp);
spin_unlock(&mm->page_table_lock);
......@@ -340,7 +350,6 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
goto fail;
}
retval = 0;
build_mmap_rb(mm);
out:
flush_tlb_mm(current->mm);
......
......@@ -222,8 +222,8 @@ __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
}
}
static void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
struct rb_node **rb_link, struct rb_node *rb_parent)
void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
struct rb_node **rb_link, struct rb_node *rb_parent)
{
rb_link_node(&vma->vm_rb, rb_parent, rb_link);
rb_insert_color(&vma->vm_rb, &mm->mm_rb);
......@@ -1404,22 +1404,6 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
EXPORT_SYMBOL(do_brk);
/* Build the RB tree corresponding to the VMA list. */
void build_mmap_rb(struct mm_struct * mm)
{
struct vm_area_struct * vma;
struct rb_node ** rb_link, * rb_parent;
mm->mm_rb = RB_ROOT;
rb_link = &mm->mm_rb.rb_node;
rb_parent = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
__vma_link_rb(mm, vma, rb_link, rb_parent);
rb_parent = &vma->vm_rb;
rb_link = &rb_parent->rb_right;
}
}
/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment