Commit e9b61f19 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

thp: reintroduce split_huge_page()

This patch adds implementation of split_huge_page() for new
refcountings.

Unlike previous implementation, new split_huge_page() can fail if
somebody holds GUP pin on the page.  It also means that pin on page
would prevent it from bening split under you.  It makes situation in
many places much cleaner.

The basic scheme of split_huge_page():

  - Check that sum of mapcounts of all subpage is equal to page_count()
    plus one (caller pin). Foll off with -EBUSY. This way we can avoid
    useless PMD-splits.

  - Freeze the page counters by splitting all PMD and setup migration
    PTEs.

  - Re-check sum of mapcounts against page_count(). Page's counts are
    stable now. -EBUSY if page is pinned.

  - Split compound page.

  - Unfreeze the page by removing migration entries.
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: default avatarSasha Levin <sasha.levin@oracle.com>
Tested-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: default avatarJerome Marchand <jmarchan@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 4e41a30c
...@@ -90,8 +90,11 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); ...@@ -90,8 +90,11 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
extern unsigned long transparent_hugepage_flags; extern unsigned long transparent_hugepage_flags;
#define split_huge_page_to_list(page, list) BUILD_BUG() int split_huge_page_to_list(struct page *page, struct list_head *list);
#define split_huge_page(page) BUILD_BUG() static inline int split_huge_page(struct page *page)
{
return split_huge_page_to_list(page, NULL);
}
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address); unsigned long address);
......
...@@ -394,10 +394,21 @@ static inline struct page *read_mapping_page(struct address_space *mapping, ...@@ -394,10 +394,21 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
*/ */
static inline pgoff_t page_to_pgoff(struct page *page) static inline pgoff_t page_to_pgoff(struct page *page)
{ {
pgoff_t pgoff;
if (unlikely(PageHeadHuge(page))) if (unlikely(PageHeadHuge(page)))
return page->index << compound_order(page); return page->index << compound_order(page);
else
if (likely(!PageTransTail(page)))
return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
/*
* We don't initialize ->index for tail pages: calculate based on
* head page
*/
pgoff = compound_head(page)->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
pgoff += page - compound_head(page);
return pgoff;
} }
/* /*
......
This diff is collapsed.
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h>
/* /*
* The set of flags that only affect watermark checking and reclaim * The set of flags that only affect watermark checking and reclaim
...@@ -265,10 +266,27 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) ...@@ -265,10 +266,27 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE /*
extern unsigned long vma_address(struct page *page, * At what user virtual address is page expected in @vma?
struct vm_area_struct *vma); */
#endif static inline unsigned long
__vma_address(struct page *page, struct vm_area_struct *vma)
{
pgoff_t pgoff = page_to_pgoff(page);
return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
}
static inline unsigned long
vma_address(struct page *page, struct vm_area_struct *vma)
{
unsigned long address = __vma_address(page, vma);
/* page should be within @vma mapping range */
VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
return address;
}
#else /* !CONFIG_MMU */ #else /* !CONFIG_MMU */
static inline void clear_page_mlock(struct page *page) { } static inline void clear_page_mlock(struct page *page) { }
static inline void mlock_vma_page(struct page *page) { } static inline void mlock_vma_page(struct page *page) { }
......
...@@ -567,27 +567,6 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma) ...@@ -567,27 +567,6 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
anon_vma_unlock_read(anon_vma); anon_vma_unlock_read(anon_vma);
} }
/*
* At what user virtual address is page expected in @vma?
*/
static inline unsigned long
__vma_address(struct page *page, struct vm_area_struct *vma)
{
pgoff_t pgoff = page_to_pgoff(page);
return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
}
inline unsigned long
vma_address(struct page *page, struct vm_area_struct *vma)
{
unsigned long address = __vma_address(page, vma);
/* page should be within @vma mapping range */
VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
return address;
}
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
static void percpu_flush_tlb_batch_pages(void *data) static void percpu_flush_tlb_batch_pages(void *data)
{ {
...@@ -1155,20 +1134,12 @@ void do_page_add_anon_rmap(struct page *page, ...@@ -1155,20 +1134,12 @@ void do_page_add_anon_rmap(struct page *page,
bool compound = flags & RMAP_COMPOUND; bool compound = flags & RMAP_COMPOUND;
bool first; bool first;
if (PageTransCompound(page)) {
VM_BUG_ON_PAGE(!PageLocked(page), page);
if (compound) { if (compound) {
atomic_t *mapcount; atomic_t *mapcount;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageTransHuge(page), page); VM_BUG_ON_PAGE(!PageTransHuge(page), page);
mapcount = compound_mapcount_ptr(page); mapcount = compound_mapcount_ptr(page);
first = atomic_inc_and_test(mapcount); first = atomic_inc_and_test(mapcount);
} else {
/* Anon THP always mapped first with PMD */
first = 0;
VM_BUG_ON_PAGE(!page_mapcount(page), page);
atomic_inc(&page->_mapcount);
}
} else { } else {
first = atomic_inc_and_test(&page->_mapcount); first = atomic_inc_and_test(&page->_mapcount);
} }
...@@ -1182,7 +1153,6 @@ void do_page_add_anon_rmap(struct page *page, ...@@ -1182,7 +1153,6 @@ void do_page_add_anon_rmap(struct page *page,
* disabled. * disabled.
*/ */
if (compound) { if (compound) {
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
__inc_zone_page_state(page, __inc_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES); NR_ANON_TRANSPARENT_HUGEPAGES);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment