Commit c7ab0d2f authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

mm: convert try_to_unmap_one() to use page_vma_mapped_walk()

For consistency, it worth converting all page_check_address() to
page_vma_mapped_walk(), so we could drop the former.

It also makes freeze_page() as we walk though rmap only once.

Link: http://lkml.kernel.org/r/20170129173858.45174-8-kirill.shutemov@linux.intel.comSigned-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f27176cf
...@@ -2106,24 +2106,16 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, ...@@ -2106,24 +2106,16 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void freeze_page(struct page *page) static void freeze_page(struct page *page)
{ {
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
TTU_RMAP_LOCKED; TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
int i, ret; int ret;
VM_BUG_ON_PAGE(!PageHead(page), page); VM_BUG_ON_PAGE(!PageHead(page), page);
if (PageAnon(page)) if (PageAnon(page))
ttu_flags |= TTU_MIGRATION; ttu_flags |= TTU_MIGRATION;
/* We only need TTU_SPLIT_HUGE_PMD once */ ret = try_to_unmap(page, ttu_flags);
ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD); VM_BUG_ON_PAGE(ret, page);
for (i = 1; !ret && i < HPAGE_PMD_NR; i++) {
/* Cut short if the page is unmapped */
if (page_count(page) == 1)
return;
ret = try_to_unmap(page + i, ttu_flags);
}
VM_BUG_ON_PAGE(ret, page + i - 1);
} }
static void unfreeze_page(struct page *page) static void unfreeze_page(struct page *page)
......
...@@ -607,8 +607,7 @@ void try_to_unmap_flush_dirty(void) ...@@ -607,8 +607,7 @@ void try_to_unmap_flush_dirty(void)
try_to_unmap_flush(); try_to_unmap_flush();
} }
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
struct page *page, bool writable)
{ {
struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc; struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
...@@ -643,8 +642,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) ...@@ -643,8 +642,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
return should_defer; return should_defer;
} }
#else #else
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
struct page *page, bool writable)
{ {
} }
...@@ -1459,155 +1457,163 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1459,155 +1457,163 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg) unsigned long address, void *arg)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
pte_t *pte; struct page_vma_mapped_walk pvmw = {
.page = page,
.vma = vma,
.address = address,
};
pte_t pteval; pte_t pteval;
spinlock_t *ptl; struct page *subpage;
int ret = SWAP_AGAIN; int ret = SWAP_AGAIN;
struct rmap_private *rp = arg; struct rmap_private *rp = arg;
enum ttu_flags flags = rp->flags; enum ttu_flags flags = rp->flags;
/* munlock has nothing to gain from examining un-locked vmas */ /* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
goto out; return SWAP_AGAIN;
if (flags & TTU_SPLIT_HUGE_PMD) { if (flags & TTU_SPLIT_HUGE_PMD) {
split_huge_pmd_address(vma, address, split_huge_pmd_address(vma, address,
flags & TTU_MIGRATION, page); flags & TTU_MIGRATION, page);
/* check if we have anything to do after split */
if (page_mapcount(page) == 0)
goto out;
} }
pte = page_check_address(page, mm, address, &ptl, while (page_vma_mapped_walk(&pvmw)) {
PageTransCompound(page)); subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
if (!pte) address = pvmw.address;
goto out;
/* /* Unexpected PMD-mapped THP? */
* If the page is mlock()d, we cannot swap it out. VM_BUG_ON_PAGE(!pvmw.pte, page);
* If it's recently referenced (perhaps page_referenced
* skipped over this mm) then we should reactivate it. /*
*/ * If the page is mlock()d, we cannot swap it out.
if (!(flags & TTU_IGNORE_MLOCK)) { * If it's recently referenced (perhaps page_referenced
if (vma->vm_flags & VM_LOCKED) { * skipped over this mm) then we should reactivate it.
/* PTE-mapped THP are never mlocked */ */
if (!PageTransCompound(page)) { if (!(flags & TTU_IGNORE_MLOCK)) {
/* if (vma->vm_flags & VM_LOCKED) {
* Holding pte lock, we do *not* need /* PTE-mapped THP are never mlocked */
* mmap_sem here if (!PageTransCompound(page)) {
*/ /*
mlock_vma_page(page); * Holding pte lock, we do *not* need
* mmap_sem here
*/
mlock_vma_page(page);
}
ret = SWAP_MLOCK;
page_vma_mapped_walk_done(&pvmw);
break;
} }
ret = SWAP_MLOCK; if (flags & TTU_MUNLOCK)
goto out_unmap; continue;
} }
if (flags & TTU_MUNLOCK)
goto out_unmap; if (!(flags & TTU_IGNORE_ACCESS)) {
} if (ptep_clear_flush_young_notify(vma, address,
if (!(flags & TTU_IGNORE_ACCESS)) { pvmw.pte)) {
if (ptep_clear_flush_young_notify(vma, address, pte)) { ret = SWAP_FAIL;
ret = SWAP_FAIL; page_vma_mapped_walk_done(&pvmw);
goto out_unmap; break;
}
} }
}
/* Nuke the page table entry. */ /* Nuke the page table entry. */
flush_cache_page(vma, address, page_to_pfn(page)); flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
if (should_defer_flush(mm, flags)) { if (should_defer_flush(mm, flags)) {
/* /*
* We clear the PTE but do not flush so potentially a remote * We clear the PTE but do not flush so potentially
* CPU could still be writing to the page. If the entry was * a remote CPU could still be writing to the page.
* previously clean then the architecture must guarantee that * If the entry was previously clean then the
* a clear->dirty transition on a cached TLB entry is written * architecture must guarantee that a clear->dirty
* through and traps if the PTE is unmapped. * transition on a cached TLB entry is written through
*/ * and traps if the PTE is unmapped.
pteval = ptep_get_and_clear(mm, address, pte); */
pteval = ptep_get_and_clear(mm, address, pvmw.pte);
set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
} else {
pteval = ptep_clear_flush(vma, address, pvmw.pte);
}
set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval)); /* Move the dirty bit to the page. Now the pte is gone. */
} else { if (pte_dirty(pteval))
pteval = ptep_clear_flush(vma, address, pte); set_page_dirty(page);
}
/* Move the dirty bit to the physical page now the pte is gone. */ /* Update high watermark before we lower rss */
if (pte_dirty(pteval)) update_hiwater_rss(mm);
set_page_dirty(page);
/* Update high watermark before we lower rss */ if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
update_hiwater_rss(mm); if (PageHuge(page)) {
int nr = 1 << compound_order(page);
hugetlb_count_sub(nr, mm);
} else {
dec_mm_counter(mm, mm_counter(page));
}
if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (PageHuge(page)) { set_pte_at(mm, address, pvmw.pte, pteval);
hugetlb_count_sub(1 << compound_order(page), mm); } else if (pte_unused(pteval)) {
} else { /*
* The guest indicated that the page content is of no
* interest anymore. Simply discard the pte, vmscan
* will take care of the rest.
*/
dec_mm_counter(mm, mm_counter(page)); dec_mm_counter(mm, mm_counter(page));
} } else if (IS_ENABLED(CONFIG_MIGRATION) &&
set_pte_at(mm, address, pte, (flags & TTU_MIGRATION)) {
swp_entry_to_pte(make_hwpoison_entry(page))); swp_entry_t entry;
} else if (pte_unused(pteval)) { pte_t swp_pte;
/* /*
* The guest indicated that the page content is of no * Store the pfn of the page in a special migration
* interest anymore. Simply discard the pte, vmscan * pte. do_swap_page() will wait until the migration
* will take care of the rest. * pte is removed and then restart fault handling.
*/ */
dec_mm_counter(mm, mm_counter(page)); entry = make_migration_entry(subpage,
} else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION)) { pte_write(pteval));
swp_entry_t entry; swp_pte = swp_entry_to_pte(entry);
pte_t swp_pte; if (pte_soft_dirty(pteval))
/* swp_pte = pte_swp_mksoft_dirty(swp_pte);
* Store the pfn of the page in a special migration set_pte_at(mm, address, pvmw.pte, swp_pte);
* pte. do_swap_page() will wait until the migration } else if (PageAnon(page)) {
* pte is removed and then restart fault handling. swp_entry_t entry = { .val = page_private(subpage) };
*/ pte_t swp_pte;
entry = make_migration_entry(page, pte_write(pteval)); /*
swp_pte = swp_entry_to_pte(entry); * Store the swap location in the pte.
if (pte_soft_dirty(pteval)) * See handle_pte_fault() ...
swp_pte = pte_swp_mksoft_dirty(swp_pte); */
set_pte_at(mm, address, pte, swp_pte); VM_BUG_ON_PAGE(!PageSwapCache(page), page);
} else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(page) }; if (!PageDirty(page) && (flags & TTU_LZFREE)) {
pte_t swp_pte; /* It's a freeable page by MADV_FREE */
/* dec_mm_counter(mm, MM_ANONPAGES);
* Store the swap location in the pte. rp->lazyfreed++;
* See handle_pte_fault() ... goto discard;
*/ }
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
if (!PageDirty(page) && (flags & TTU_LZFREE)) { if (swap_duplicate(entry) < 0) {
/* It's a freeable page by MADV_FREE */ set_pte_at(mm, address, pvmw.pte, pteval);
ret = SWAP_FAIL;
page_vma_mapped_walk_done(&pvmw);
break;
}
if (list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
if (list_empty(&mm->mmlist))
list_add(&mm->mmlist, &init_mm.mmlist);
spin_unlock(&mmlist_lock);
}
dec_mm_counter(mm, MM_ANONPAGES); dec_mm_counter(mm, MM_ANONPAGES);
rp->lazyfreed++; inc_mm_counter(mm, MM_SWAPENTS);
goto discard; swp_pte = swp_entry_to_pte(entry);
} if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
if (swap_duplicate(entry) < 0) { set_pte_at(mm, address, pvmw.pte, swp_pte);
set_pte_at(mm, address, pte, pteval); } else
ret = SWAP_FAIL; dec_mm_counter(mm, mm_counter_file(page));
goto out_unmap;
}
if (list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
if (list_empty(&mm->mmlist))
list_add(&mm->mmlist, &init_mm.mmlist);
spin_unlock(&mmlist_lock);
}
dec_mm_counter(mm, MM_ANONPAGES);
inc_mm_counter(mm, MM_SWAPENTS);
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
set_pte_at(mm, address, pte, swp_pte);
} else
dec_mm_counter(mm, mm_counter_file(page));
discard: discard:
page_remove_rmap(page, PageHuge(page)); page_remove_rmap(subpage, PageHuge(page));
put_page(page); put_page(page);
out_unmap:
pte_unmap_unlock(pte, ptl);
if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK))
mmu_notifier_invalidate_page(mm, address); mmu_notifier_invalidate_page(mm, address);
out: }
return ret; return ret;
} }
...@@ -1632,7 +1638,7 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) ...@@ -1632,7 +1638,7 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
static int page_mapcount_is_zero(struct page *page) static int page_mapcount_is_zero(struct page *page)
{ {
return !page_mapcount(page); return !total_mapcount(page);
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment