Commit 4b471e88 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

mm, thp: remove infrastructure for handling splitting PMDs

With new refcounting we don't need to mark PMDs splitting.  Let's drop
code to handle this.
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: default avatarSasha Levin <sasha.levin@oracle.com>
Tested-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Acked-by: default avatarJerome Marchand <jmarchan@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 1f19617d
#
# Feature name: pmdp_splitting_flush
# Kconfig: __HAVE_ARCH_PMDP_SPLITTING_FLUSH
# description: arch supports the pmdp_splitting_flush() VM API
#
-----------------------
| arch |status|
-----------------------
| alpha: | TODO |
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
| avr32: | TODO |
| blackfin: | TODO |
| c6x: | TODO |
| cris: | TODO |
| frv: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
| m32r: | TODO |
| m68k: | TODO |
| metag: | TODO |
| microblaze: | TODO |
| mips: | ok |
| mn10300: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
| s390: | ok |
| score: | TODO |
| sh: | TODO |
| sparc: | TODO |
| tile: | TODO |
| um: | TODO |
| unicore32: | TODO |
| x86: | ok |
| xtensa: | TODO |
-----------------------
...@@ -602,7 +602,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -602,7 +602,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte; pte_t *pte;
spinlock_t *ptl; spinlock_t *ptl;
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
smaps_pmd_entry(pmd, addr, walk); smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl); spin_unlock(ptl);
return 0; return 0;
...@@ -913,7 +913,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -913,7 +913,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl; spinlock_t *ptl;
struct page *page; struct page *page;
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) { if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty_pmd(vma, addr, pmd); clear_soft_dirty_pmd(vma, addr, pmd);
goto out; goto out;
...@@ -1187,7 +1187,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, ...@@ -1187,7 +1187,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
int err = 0; int err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_trans_huge_lock(pmdp, vma, &ptl) == 1) { if (pmd_trans_huge_lock(pmdp, vma, &ptl)) {
u64 flags = 0, frame = 0; u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp; pmd_t pmd = *pmdp;
...@@ -1519,7 +1519,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, ...@@ -1519,7 +1519,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
pte_t *orig_pte; pte_t *orig_pte;
pte_t *pte; pte_t *pte;
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
pte_t huge_pte = *(pte_t *)pmd; pte_t huge_pte = *(pte_t *)pmd;
struct page *page; struct page *page;
......
...@@ -207,11 +207,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, ...@@ -207,11 +207,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif #endif
#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
extern void pmdp_splitting_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
#endif
#ifndef pmdp_collapse_flush #ifndef pmdp_collapse_flush
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
...@@ -627,10 +622,6 @@ static inline int pmd_trans_huge(pmd_t pmd) ...@@ -627,10 +622,6 @@ static inline int pmd_trans_huge(pmd_t pmd)
{ {
return 0; return 0;
} }
static inline int pmd_trans_splitting(pmd_t pmd)
{
return 0;
}
#ifndef __HAVE_ARCH_PMD_WRITE #ifndef __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd) static inline int pmd_write(pmd_t pmd)
{ {
......
...@@ -25,7 +25,7 @@ extern int zap_huge_pmd(struct mmu_gather *tlb, ...@@ -25,7 +25,7 @@ extern int zap_huge_pmd(struct mmu_gather *tlb,
extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
unsigned char *vec); unsigned char *vec);
extern int move_huge_pmd(struct vm_area_struct *vma, extern bool move_huge_pmd(struct vm_area_struct *vma,
struct vm_area_struct *new_vma, struct vm_area_struct *new_vma,
unsigned long old_addr, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end, unsigned long new_addr, unsigned long old_end,
...@@ -48,15 +48,9 @@ enum transparent_hugepage_flag { ...@@ -48,15 +48,9 @@ enum transparent_hugepage_flag {
#endif #endif
}; };
enum page_check_address_pmd_flag {
PAGE_CHECK_ADDRESS_PMD_FLAG,
PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG,
PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG,
};
extern pmd_t *page_check_address_pmd(struct page *page, extern pmd_t *page_check_address_pmd(struct page *page,
struct mm_struct *mm, struct mm_struct *mm,
unsigned long address, unsigned long address,
enum page_check_address_pmd_flag flag,
spinlock_t **ptl); spinlock_t **ptl);
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
...@@ -100,7 +94,6 @@ extern unsigned long transparent_hugepage_flags; ...@@ -100,7 +94,6 @@ extern unsigned long transparent_hugepage_flags;
#define split_huge_page(page) BUILD_BUG() #define split_huge_page(page) BUILD_BUG()
#define split_huge_pmd(__vma, __pmd, __address) BUILD_BUG() #define split_huge_pmd(__vma, __pmd, __address) BUILD_BUG()
#define wait_split_huge_page(__anon_vma, __pmd) BUILD_BUG()
#if HPAGE_PMD_ORDER >= MAX_ORDER #if HPAGE_PMD_ORDER >= MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator" #error "hugepages can't be allocated by the buddy allocator"
#endif #endif
...@@ -110,17 +103,17 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma, ...@@ -110,17 +103,17 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start, unsigned long start,
unsigned long end, unsigned long end,
long adjust_next); long adjust_next);
extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, extern bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl); spinlock_t **ptl);
/* mmap_sem must be held on entry */ /* mmap_sem must be held on entry */
static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl) spinlock_t **ptl)
{ {
VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
if (pmd_trans_huge(*pmd)) if (pmd_trans_huge(*pmd))
return __pmd_trans_huge_lock(pmd, vma, ptl); return __pmd_trans_huge_lock(pmd, vma, ptl);
else else
return 0; return false;
} }
static inline int hpage_nr_pages(struct page *page) static inline int hpage_nr_pages(struct page *page)
{ {
...@@ -165,8 +158,6 @@ static inline int split_huge_page(struct page *page) ...@@ -165,8 +158,6 @@ static inline int split_huge_page(struct page *page)
{ {
return 0; return 0;
} }
#define wait_split_huge_page(__anon_vma, __pmd) \
do { } while (0)
#define split_huge_pmd(__vma, __pmd, __address) \ #define split_huge_pmd(__vma, __pmd, __address) \
do { } while (0) do { } while (0)
static inline int hugepage_madvise(struct vm_area_struct *vma, static inline int hugepage_madvise(struct vm_area_struct *vma,
...@@ -181,10 +172,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, ...@@ -181,10 +172,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
long adjust_next) long adjust_next)
{ {
} }
static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl) spinlock_t **ptl)
{ {
return 0; return false;
} }
static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
......
...@@ -241,13 +241,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma, ...@@ -241,13 +241,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
spin_unlock(ptl); spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags); return follow_page_pte(vma, address, pmd, flags);
} }
if (unlikely(pmd_trans_splitting(*pmd))) {
spin_unlock(ptl);
wait_split_huge_page(vma->anon_vma, pmd);
return follow_page_pte(vma, address, pmd, flags);
}
if (flags & FOLL_SPLIT) { if (flags & FOLL_SPLIT) {
int ret; int ret;
page = pmd_page(*pmd); page = pmd_page(*pmd);
...@@ -1068,9 +1061,6 @@ struct page *get_dump_page(unsigned long addr) ...@@ -1068,9 +1061,6 @@ struct page *get_dump_page(unsigned long addr)
* *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free * *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
* pages containing page tables. * pages containing page tables.
* *
* *) THP splits will broadcast an IPI, this can be achieved by overriding
* pmdp_splitting_flush.
*
* *) ptes can be read atomically by the architecture. * *) ptes can be read atomically by the architecture.
* *
* *) access_ok is sufficient to validate userspace address ranges. * *) access_ok is sufficient to validate userspace address ranges.
...@@ -1267,7 +1257,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, ...@@ -1267,7 +1257,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
pmd_t pmd = READ_ONCE(*pmdp); pmd_t pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
if (pmd_none(pmd) || pmd_trans_splitting(pmd)) if (pmd_none(pmd))
return 0; return 0;
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) { if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
......
...@@ -986,15 +986,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -986,15 +986,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
goto out_unlock; goto out_unlock;
} }
if (unlikely(pmd_trans_splitting(pmd))) {
/* split huge page running from under us */
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
pte_free(dst_mm, pgtable);
wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */
goto out;
}
src_page = pmd_page(pmd); src_page = pmd_page(pmd);
VM_BUG_ON_PAGE(!PageHead(src_page), src_page); VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
get_page(src_page); get_page(src_page);
...@@ -1470,7 +1461,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -1470,7 +1461,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t orig_pmd; pmd_t orig_pmd;
spinlock_t *ptl; spinlock_t *ptl;
if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1) if (!__pmd_trans_huge_lock(pmd, vma, &ptl))
return 0; return 0;
/* /*
* For architectures like ppc64 we look at deposited pgtable * For architectures like ppc64 we look at deposited pgtable
...@@ -1504,13 +1495,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -1504,13 +1495,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
return 1; return 1;
} }
int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, bool move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
unsigned long old_addr, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end, unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd) pmd_t *old_pmd, pmd_t *new_pmd)
{ {
spinlock_t *old_ptl, *new_ptl; spinlock_t *old_ptl, *new_ptl;
int ret = 0;
pmd_t pmd; pmd_t pmd;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
...@@ -1519,7 +1509,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, ...@@ -1519,7 +1509,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
(new_addr & ~HPAGE_PMD_MASK) || (new_addr & ~HPAGE_PMD_MASK) ||
old_end - old_addr < HPAGE_PMD_SIZE || old_end - old_addr < HPAGE_PMD_SIZE ||
(new_vma->vm_flags & VM_NOHUGEPAGE)) (new_vma->vm_flags & VM_NOHUGEPAGE))
goto out; return false;
/* /*
* The destination pmd shouldn't be established, free_pgtables() * The destination pmd shouldn't be established, free_pgtables()
...@@ -1527,15 +1517,14 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, ...@@ -1527,15 +1517,14 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
*/ */
if (WARN_ON(!pmd_none(*new_pmd))) { if (WARN_ON(!pmd_none(*new_pmd))) {
VM_BUG_ON(pmd_trans_huge(*new_pmd)); VM_BUG_ON(pmd_trans_huge(*new_pmd));
goto out; return false;
} }
/* /*
* We don't have to worry about the ordering of src and dst * We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_sem prevents deadlock. * ptlocks because exclusive mmap_sem prevents deadlock.
*/ */
ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl); if (__pmd_trans_huge_lock(old_pmd, vma, &old_ptl)) {
if (ret == 1) {
new_ptl = pmd_lockptr(mm, new_pmd); new_ptl = pmd_lockptr(mm, new_pmd);
if (new_ptl != old_ptl) if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
...@@ -1551,9 +1540,9 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, ...@@ -1551,9 +1540,9 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
if (new_ptl != old_ptl) if (new_ptl != old_ptl)
spin_unlock(new_ptl); spin_unlock(new_ptl);
spin_unlock(old_ptl); spin_unlock(old_ptl);
return true;
} }
out: return false;
return ret;
} }
/* /*
...@@ -1569,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -1569,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
spinlock_t *ptl; spinlock_t *ptl;
int ret = 0; int ret = 0;
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (__pmd_trans_huge_lock(pmd, vma, &ptl)) {
pmd_t entry; pmd_t entry;
bool preserve_write = prot_numa && pmd_write(*pmd); bool preserve_write = prot_numa && pmd_write(*pmd);
ret = 1; ret = 1;
...@@ -1600,29 +1589,19 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -1600,29 +1589,19 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
} }
/* /*
* Returns 1 if a given pmd maps a stable (not under splitting) thp. * Returns true if a given pmd maps a thp, false otherwise.
* Returns -1 if it maps a thp under splitting. Returns 0 otherwise.
* *
* Note that if it returns 1, this routine returns without unlocking page * Note that if it returns true, this routine returns without unlocking page
* table locks. So callers must unlock them. * table lock. So callers must unlock it.
*/ */
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl) spinlock_t **ptl)
{ {
*ptl = pmd_lock(vma->vm_mm, pmd); *ptl = pmd_lock(vma->vm_mm, pmd);
if (likely(pmd_trans_huge(*pmd))) { if (likely(pmd_trans_huge(*pmd)))
if (unlikely(pmd_trans_splitting(*pmd))) { return true;
spin_unlock(*ptl);
wait_split_huge_page(vma->anon_vma, pmd);
return -1;
} else {
/* Thp mapped by 'pmd' is stable, so we can
* handle it as it is. */
return 1;
}
}
spin_unlock(*ptl); spin_unlock(*ptl);
return 0; return false;
} }
/* /*
...@@ -1636,7 +1615,6 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, ...@@ -1636,7 +1615,6 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
pmd_t *page_check_address_pmd(struct page *page, pmd_t *page_check_address_pmd(struct page *page,
struct mm_struct *mm, struct mm_struct *mm,
unsigned long address, unsigned long address,
enum page_check_address_pmd_flag flag,
spinlock_t **ptl) spinlock_t **ptl)
{ {
pgd_t *pgd; pgd_t *pgd;
...@@ -1659,21 +1637,8 @@ pmd_t *page_check_address_pmd(struct page *page, ...@@ -1659,21 +1637,8 @@ pmd_t *page_check_address_pmd(struct page *page,
goto unlock; goto unlock;
if (pmd_page(*pmd) != page) if (pmd_page(*pmd) != page)
goto unlock; goto unlock;
/* if (pmd_trans_huge(*pmd))
* split_vma() may create temporary aliased mappings. There is
* no risk as long as all huge pmd are found and have their
* splitting bit set before __split_huge_page_refcount
* runs. Finding the same huge pmd more than once during the
* same rmap walk is not a problem.
*/
if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
pmd_trans_splitting(*pmd))
goto unlock;
if (pmd_trans_huge(*pmd)) {
VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
!pmd_trans_splitting(*pmd));
return pmd; return pmd;
}
unlock: unlock:
spin_unlock(*ptl); spin_unlock(*ptl);
return NULL; return NULL;
......
...@@ -4675,7 +4675,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, ...@@ -4675,7 +4675,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
pte_t *pte; pte_t *pte;
spinlock_t *ptl; spinlock_t *ptl;
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE) if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR; mc.precharge += HPAGE_PMD_NR;
spin_unlock(ptl); spin_unlock(ptl);
...@@ -4863,16 +4863,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, ...@@ -4863,16 +4863,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
union mc_target target; union mc_target target;
struct page *page; struct page *page;
/* if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
* No race with splitting thp happens because:
* - if pmd_trans_huge_lock() returns 1, the relevant thp is not
* under splitting, which means there's no concurrent thp split,
* - if another thread runs into split_huge_page() just after we
* entered this if-block, the thread must wait for page table lock
* to be unlocked in __split_huge_page_splitting(), where the main
* part of thp split is not executed yet.
*/
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (mc.precharge < HPAGE_PMD_NR) { if (mc.precharge < HPAGE_PMD_NR) {
spin_unlock(ptl); spin_unlock(ptl);
return 0; return 0;
......
...@@ -566,7 +566,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -566,7 +566,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
{ {
spinlock_t *ptl; spinlock_t *ptl;
pgtable_t new = pte_alloc_one(mm, address); pgtable_t new = pte_alloc_one(mm, address);
int wait_split_huge_page;
if (!new) if (!new)
return -ENOMEM; return -ENOMEM;
...@@ -586,18 +585,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -586,18 +585,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
ptl = pmd_lock(mm, pmd); ptl = pmd_lock(mm, pmd);
wait_split_huge_page = 0;
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
atomic_long_inc(&mm->nr_ptes); atomic_long_inc(&mm->nr_ptes);
pmd_populate(mm, pmd, new); pmd_populate(mm, pmd, new);
new = NULL; new = NULL;
} else if (unlikely(pmd_trans_splitting(*pmd))) }
wait_split_huge_page = 1;
spin_unlock(ptl); spin_unlock(ptl);
if (new) if (new)
pte_free(mm, new); pte_free(mm, new);
if (wait_split_huge_page)
wait_split_huge_page(vma->anon_vma, pmd);
return 0; return 0;
} }
...@@ -613,8 +608,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) ...@@ -613,8 +608,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
pmd_populate_kernel(&init_mm, pmd, new); pmd_populate_kernel(&init_mm, pmd, new);
new = NULL; new = NULL;
} else }
VM_BUG_ON(pmd_trans_splitting(*pmd));
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
if (new) if (new)
pte_free_kernel(&init_mm, new); pte_free_kernel(&init_mm, new);
...@@ -3374,14 +3368,6 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3374,14 +3368,6 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (pmd_trans_huge(orig_pmd)) { if (pmd_trans_huge(orig_pmd)) {
unsigned int dirty = flags & FAULT_FLAG_WRITE; unsigned int dirty = flags & FAULT_FLAG_WRITE;
/*
* If the pmd is splitting, return and retry the
* the fault. Alternative: wait until the split
* is done, and goto retry.
*/
if (pmd_trans_splitting(orig_pmd))
return 0;
if (pmd_protnone(orig_pmd)) if (pmd_protnone(orig_pmd))
return do_huge_pmd_numa_page(mm, vma, address, return do_huge_pmd_numa_page(mm, vma, address,
orig_pmd, pmd); orig_pmd, pmd);
......
...@@ -117,7 +117,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -117,7 +117,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
unsigned char *vec = walk->private; unsigned char *vec = walk->private;
int nr = (end - addr) >> PAGE_SHIFT; int nr = (end - addr) >> PAGE_SHIFT;
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
memset(vec, 1, nr); memset(vec, 1, nr);
spin_unlock(ptl); spin_unlock(ptl);
goto out; goto out;
......
...@@ -192,25 +192,24 @@ unsigned long move_page_tables(struct vm_area_struct *vma, ...@@ -192,25 +192,24 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (!new_pmd) if (!new_pmd)
break; break;
if (pmd_trans_huge(*old_pmd)) { if (pmd_trans_huge(*old_pmd)) {
int err = 0;
if (extent == HPAGE_PMD_SIZE) { if (extent == HPAGE_PMD_SIZE) {
bool moved;
VM_BUG_ON_VMA(vma->vm_file || !vma->anon_vma, VM_BUG_ON_VMA(vma->vm_file || !vma->anon_vma,
vma); vma);
/* See comment in move_ptes() */ /* See comment in move_ptes() */
if (need_rmap_locks) if (need_rmap_locks)
anon_vma_lock_write(vma->anon_vma); anon_vma_lock_write(vma->anon_vma);
err = move_huge_pmd(vma, new_vma, old_addr, moved = move_huge_pmd(vma, new_vma, old_addr,
new_addr, old_end, new_addr, old_end,
old_pmd, new_pmd); old_pmd, new_pmd);
if (need_rmap_locks) if (need_rmap_locks)
anon_vma_unlock_write(vma->anon_vma); anon_vma_unlock_write(vma->anon_vma);
if (moved) {
need_flush = true;
continue;
}
} }
if (err > 0) { split_huge_pmd(vma, old_pmd, old_addr);
need_flush = true;
continue;
} else if (!err) {
split_huge_pmd(vma, old_pmd, old_addr);
}
VM_BUG_ON(pmd_trans_huge(*old_pmd)); VM_BUG_ON(pmd_trans_huge(*old_pmd));
} }
if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma, if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma,
......
...@@ -61,8 +61,7 @@ static int page_idle_clear_pte_refs_one(struct page *page, ...@@ -61,8 +61,7 @@ static int page_idle_clear_pte_refs_one(struct page *page,
bool referenced = false; bool referenced = false;
if (unlikely(PageTransHuge(page))) { if (unlikely(PageTransHuge(page))) {
pmd = page_check_address_pmd(page, mm, addr, pmd = page_check_address_pmd(page, mm, addr, &ptl);
PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
if (pmd) { if (pmd) {
referenced = pmdp_clear_young_notify(vma, addr, pmd); referenced = pmdp_clear_young_notify(vma, addr, pmd);
spin_unlock(ptl); spin_unlock(ptl);
......
...@@ -139,18 +139,6 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, ...@@ -139,18 +139,6 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
} }
#endif #endif
#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t pmd = pmd_mksplitting(*pmdp);
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
/* tlb flush only to serialize against gup-fast */
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
#endif
#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable) pgtable_t pgtable)
......
...@@ -843,8 +843,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, ...@@ -843,8 +843,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
* rmap might return false positives; we must filter * rmap might return false positives; we must filter
* these out using page_check_address_pmd(). * these out using page_check_address_pmd().
*/ */
pmd = page_check_address_pmd(page, mm, address, pmd = page_check_address_pmd(page, mm, address, &ptl);
PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
if (!pmd) if (!pmd)
return SWAP_AGAIN; return SWAP_AGAIN;
...@@ -854,7 +853,6 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, ...@@ -854,7 +853,6 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
return SWAP_FAIL; /* To break the loop */ return SWAP_FAIL; /* To break the loop */
} }
/* go ahead even if the pmd is pmd_trans_splitting() */
if (pmdp_clear_flush_young_notify(vma, address, pmd)) if (pmdp_clear_flush_young_notify(vma, address, pmd))
referenced++; referenced++;
spin_unlock(ptl); spin_unlock(ptl);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment