Commit bd4c82c2 authored by Huang Ying's avatar Huang Ying Committed by Linus Torvalds

mm, THP, swap: delay splitting THP after swapped out

In this patch, splitting transparent huge page (THP) during swapping out
is delayed from after adding the THP into the swap cache to after
swapping out finishes.  After the patch, more operations for the
anonymous THP reclaiming, such as writing the THP to the swap device,
removing the THP from the swap cache could be batched.  So that the
performance of anonymous THP swapping out could be improved.

This is the second step for the THP swap support.  The plan is to delay
splitting the THP step by step and avoid splitting the THP finally.

With the patchset, the swap out throughput improves 42% (from about
5.81GB/s to about 8.25GB/s) in the vm-scalability swap-w-seq test case
with 16 processes.  At the same time, the IPI (reflect TLB flushing)
reduced about 78.9%.  The test is done on a Xeon E5 v3 system.  The swap
device used is a RAM simulated PMEM (persistent memory) device.  To test
the sequential swapping out, the test case creates 8 processes, which
sequentially allocate and write to the anonymous pages until the RAM and
part of the swap device is used up.

Link: http://lkml.kernel.org/r/20170724051840.2309-12-ying.huang@intel.comSigned-off-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ross Zwisler <ross.zwisler@intel.com> [for brd.c, zram_drv.c, pmem.c]
Cc: Vishal L Verma <vishal.l.verma@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d6810d73
...@@ -536,7 +536,9 @@ static inline int is_page_cache_freeable(struct page *page) ...@@ -536,7 +536,9 @@ static inline int is_page_cache_freeable(struct page *page)
* that isolated the page, the page cache radix tree and * that isolated the page, the page cache radix tree and
* optional buffer heads at page->private. * optional buffer heads at page->private.
*/ */
return page_count(page) - page_has_private(page) == 2; int radix_pins = PageTransHuge(page) && PageSwapCache(page) ?
HPAGE_PMD_NR : 1;
return page_count(page) - page_has_private(page) == 1 + radix_pins;
} }
static int may_write_to_inode(struct inode *inode, struct scan_control *sc) static int may_write_to_inode(struct inode *inode, struct scan_control *sc)
...@@ -666,6 +668,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, ...@@ -666,6 +668,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
bool reclaimed) bool reclaimed)
{ {
unsigned long flags; unsigned long flags;
int refcount;
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
BUG_ON(mapping != page_mapping(page)); BUG_ON(mapping != page_mapping(page));
...@@ -696,11 +699,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, ...@@ -696,11 +699,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
* Note that if SetPageDirty is always performed via set_page_dirty, * Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under tree_lock, then this ordering is not required. * and thus under tree_lock, then this ordering is not required.
*/ */
if (!page_ref_freeze(page, 2)) if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
refcount = 1 + HPAGE_PMD_NR;
else
refcount = 2;
if (!page_ref_freeze(page, refcount))
goto cannot_free; goto cannot_free;
/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */ /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
if (unlikely(PageDirty(page))) { if (unlikely(PageDirty(page))) {
page_ref_unfreeze(page, 2); page_ref_unfreeze(page, refcount);
goto cannot_free; goto cannot_free;
} }
...@@ -1122,58 +1129,56 @@ static unsigned long shrink_page_list(struct list_head *page_list, ...@@ -1122,58 +1129,56 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* Try to allocate it some swap space here. * Try to allocate it some swap space here.
* Lazyfree page could be freed directly * Lazyfree page could be freed directly
*/ */
if (PageAnon(page) && PageSwapBacked(page) && if (PageAnon(page) && PageSwapBacked(page)) {
!PageSwapCache(page)) { if (!PageSwapCache(page)) {
if (!(sc->gfp_mask & __GFP_IO)) if (!(sc->gfp_mask & __GFP_IO))
goto keep_locked; goto keep_locked;
if (PageTransHuge(page)) { if (PageTransHuge(page)) {
/* cannot split THP, skip it */ /* cannot split THP, skip it */
if (!can_split_huge_page(page, NULL)) if (!can_split_huge_page(page, NULL))
goto activate_locked; goto activate_locked;
/* /*
* Split pages without a PMD map right * Split pages without a PMD map right
* away. Chances are some or all of the * away. Chances are some or all of the
* tail pages can be freed without IO. * tail pages can be freed without IO.
*/ */
if (!compound_mapcount(page) && if (!compound_mapcount(page) &&
split_huge_page_to_list(page, page_list)) split_huge_page_to_list(page,
goto activate_locked; page_list))
} goto activate_locked;
if (!add_to_swap(page)) { }
if (!PageTransHuge(page)) if (!add_to_swap(page)) {
goto activate_locked; if (!PageTransHuge(page))
/* Split THP and swap individual base pages */ goto activate_locked;
if (split_huge_page_to_list(page, page_list)) /* Fallback to swap normal pages */
goto activate_locked; if (split_huge_page_to_list(page,
if (!add_to_swap(page)) page_list))
goto activate_locked; goto activate_locked;
} if (!add_to_swap(page))
goto activate_locked;
/* XXX: We don't support THP writes */ }
if (PageTransHuge(page) &&
split_huge_page_to_list(page, page_list)) {
delete_from_swap_cache(page);
goto activate_locked;
}
may_enter_fs = 1; may_enter_fs = 1;
/* Adding to swap updated mapping */ /* Adding to swap updated mapping */
mapping = page_mapping(page); mapping = page_mapping(page);
}
} else if (unlikely(PageTransHuge(page))) { } else if (unlikely(PageTransHuge(page))) {
/* Split file THP */ /* Split file THP */
if (split_huge_page_to_list(page, page_list)) if (split_huge_page_to_list(page, page_list))
goto keep_locked; goto keep_locked;
} }
VM_BUG_ON_PAGE(PageTransHuge(page), page);
/* /*
* The page is mapped into the page tables of one or more * The page is mapped into the page tables of one or more
* processes. Try to unmap it here. * processes. Try to unmap it here.
*/ */
if (page_mapped(page)) { if (page_mapped(page)) {
if (!try_to_unmap(page, ttu_flags | TTU_BATCH_FLUSH)) { enum ttu_flags flags = ttu_flags | TTU_BATCH_FLUSH;
if (unlikely(PageTransHuge(page)))
flags |= TTU_SPLIT_HUGE_PMD;
if (!try_to_unmap(page, flags)) {
nr_unmap_fail++; nr_unmap_fail++;
goto activate_locked; goto activate_locked;
} }
...@@ -1313,7 +1318,11 @@ static unsigned long shrink_page_list(struct list_head *page_list, ...@@ -1313,7 +1318,11 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* Is there need to periodically free_page_list? It would * Is there need to periodically free_page_list? It would
* appear not as the counts should be low * appear not as the counts should be low
*/ */
list_add(&page->lru, &free_pages); if (unlikely(PageTransHuge(page))) {
mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
} else
list_add(&page->lru, &free_pages);
continue; continue;
activate_locked: activate_locked:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment