Commit 119a5fc1 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

khugepaged: collapse_pte_mapped_thp() protect the pmd lock

When retract_page_tables() removes a page table to make way for a huge
pmd, it holds huge page lock, i_mmap_lock_write, mmap_write_trylock and
pmd lock; but when collapse_pte_mapped_thp() does the same (to handle the
case when the original mmap_write_trylock had failed), only
mmap_write_trylock and pmd lock are held.

That's not enough.  One machine has twice crashed under load, with "BUG:
spinlock bad magic" and GPF on 6b6b6b6b6b6b6b6b.  Examining the second
crash, page_vma_mapped_walk_done()'s spin_unlock of pvmw->ptl (serving
page_referenced() on a file THP, that had found a page table at *pmd)
discovers that the page table page and its lock have already been freed by
the time it comes to unlock.

Follow the example of retract_page_tables(), but we only need one of huge
page lock or i_mmap_lock_write to secure against this: because it's the
narrower lock, and because it simplifies collapse_pte_mapped_thp() to know
the hpage earlier, choose to rely on huge page lock here.

Fixes: 27e1f827 ("khugepaged: enable collapse pmd for pte-mapped THP")
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: <stable@vger.kernel.org>	[5.4+]
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021213070.27773@eggly.anvilsSigned-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 723a80da
...@@ -1412,7 +1412,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ...@@ -1412,7 +1412,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
{ {
unsigned long haddr = addr & HPAGE_PMD_MASK; unsigned long haddr = addr & HPAGE_PMD_MASK;
struct vm_area_struct *vma = find_vma(mm, haddr); struct vm_area_struct *vma = find_vma(mm, haddr);
struct page *hpage = NULL; struct page *hpage;
pte_t *start_pte, *pte; pte_t *start_pte, *pte;
pmd_t *pmd, _pmd; pmd_t *pmd, _pmd;
spinlock_t *ptl; spinlock_t *ptl;
...@@ -1432,9 +1432,17 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ...@@ -1432,9 +1432,17 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE)) if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
return; return;
hpage = find_lock_page(vma->vm_file->f_mapping,
linear_page_index(vma, haddr));
if (!hpage)
return;
if (!PageHead(hpage))
goto drop_hpage;
pmd = mm_find_pmd(mm, haddr); pmd = mm_find_pmd(mm, haddr);
if (!pmd) if (!pmd)
return; goto drop_hpage;
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
...@@ -1453,30 +1461,11 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ...@@ -1453,30 +1461,11 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
page = vm_normal_page(vma, addr, *pte); page = vm_normal_page(vma, addr, *pte);
if (!page || !PageCompound(page))
goto abort;
if (!hpage) {
hpage = compound_head(page);
/*
* The mapping of the THP should not change.
*
* Note that uprobe, debugger, or MAP_PRIVATE may
* change the page table, but the new page will
* not pass PageCompound() check.
*/
if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
goto abort;
}
/* /*
* Confirm the page maps to the correct subpage. * Note that uprobe, debugger, or MAP_PRIVATE may change the
* * page table, but the new page will not be a subpage of hpage.
* Note that uprobe, debugger, or MAP_PRIVATE may change
* the page table, but the new page will not pass
* PageCompound() check.
*/ */
if (WARN_ON(hpage + i != page)) if (hpage + i != page)
goto abort; goto abort;
count++; count++;
} }
...@@ -1495,7 +1484,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ...@@ -1495,7 +1484,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
pte_unmap_unlock(start_pte, ptl); pte_unmap_unlock(start_pte, ptl);
/* step 3: set proper refcount and mm_counters. */ /* step 3: set proper refcount and mm_counters. */
if (hpage) { if (count) {
page_ref_sub(hpage, count); page_ref_sub(hpage, count);
add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count); add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
} }
...@@ -1506,10 +1495,15 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ...@@ -1506,10 +1495,15 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
spin_unlock(ptl); spin_unlock(ptl);
mm_dec_nr_ptes(mm); mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd)); pte_free(mm, pmd_pgtable(_pmd));
drop_hpage:
unlock_page(hpage);
put_page(hpage);
return; return;
abort: abort:
pte_unmap_unlock(start_pte, ptl); pte_unmap_unlock(start_pte, ptl);
goto drop_hpage;
} }
static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot) static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment