Commit cb900f41 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

mm, hugetlb: convert hugetlbfs to use split pmd lock

Hugetlb supports multiple page sizes. We use split lock only for PMD
level, but not for PUD.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: default avatarAlex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent c389a250
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/hugetlb.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/mmzone.h> #include <linux/mmzone.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
......
...@@ -392,6 +392,15 @@ static inline int hugepage_migration_support(struct hstate *h) ...@@ -392,6 +392,15 @@ static inline int hugepage_migration_support(struct hstate *h)
return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT); return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT);
} }
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
if (huge_page_size(h) == PMD_SIZE)
return pmd_lockptr(mm, (pmd_t *) pte);
VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
return &mm->page_table_lock;
}
#else /* CONFIG_HUGETLB_PAGE */ #else /* CONFIG_HUGETLB_PAGE */
struct hstate {}; struct hstate {};
#define alloc_huge_page_node(h, nid) NULL #define alloc_huge_page_node(h, nid) NULL
...@@ -401,6 +410,7 @@ struct hstate {}; ...@@ -401,6 +410,7 @@ struct hstate {};
#define hstate_sizelog(s) NULL #define hstate_sizelog(s) NULL
#define hstate_vma(v) NULL #define hstate_vma(v) NULL
#define hstate_inode(i) NULL #define hstate_inode(i) NULL
#define page_hstate(page) NULL
#define huge_page_size(h) PAGE_SIZE #define huge_page_size(h) PAGE_SIZE
#define huge_page_mask(h) PAGE_MASK #define huge_page_mask(h) PAGE_MASK
#define vma_kernel_pagesize(v) PAGE_SIZE #define vma_kernel_pagesize(v) PAGE_SIZE
...@@ -421,6 +431,22 @@ static inline pgoff_t basepage_index(struct page *page) ...@@ -421,6 +431,22 @@ static inline pgoff_t basepage_index(struct page *page)
#define dissolve_free_huge_pages(s, e) do {} while (0) #define dissolve_free_huge_pages(s, e) do {} while (0)
#define pmd_huge_support() 0 #define pmd_huge_support() 0
#define hugepage_migration_support(h) 0 #define hugepage_migration_support(h) 0
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
return &mm->page_table_lock;
}
#endif /* CONFIG_HUGETLB_PAGE */ #endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
spinlock_t *ptl;
ptl = huge_pte_lockptr(h, mm, pte);
spin_lock(ptl);
return ptl;
}
#endif /* _LINUX_HUGETLB_H */ #endif /* _LINUX_HUGETLB_H */
...@@ -139,7 +139,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry) ...@@ -139,7 +139,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry)
extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address); unsigned long address);
extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte); extern void migration_entry_wait_huge(struct vm_area_struct *vma,
struct mm_struct *mm, pte_t *pte);
#else #else
#define make_migration_entry(page, write) swp_entry(0, 0) #define make_migration_entry(page, write) swp_entry(0, 0)
...@@ -151,8 +152,8 @@ static inline int is_migration_entry(swp_entry_t swp) ...@@ -151,8 +152,8 @@ static inline int is_migration_entry(swp_entry_t swp)
static inline void make_migration_entry_read(swp_entry_t *entryp) { } static inline void make_migration_entry_read(swp_entry_t *entryp) { }
static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address) { } unsigned long address) { }
static inline void migration_entry_wait_huge(struct mm_struct *mm, static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
pte_t *pte) { } struct mm_struct *mm, pte_t *pte) { }
static inline int is_write_migration_entry(swp_entry_t entry) static inline int is_write_migration_entry(swp_entry_t entry)
{ {
return 0; return 0;
......
...@@ -2376,6 +2376,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -2376,6 +2376,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
spinlock_t *src_ptl, *dst_ptl;
src_pte = huge_pte_offset(src, addr); src_pte = huge_pte_offset(src, addr);
if (!src_pte) if (!src_pte)
continue; continue;
...@@ -2387,8 +2388,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -2387,8 +2388,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
if (dst_pte == src_pte) if (dst_pte == src_pte)
continue; continue;
spin_lock(&dst->page_table_lock); dst_ptl = huge_pte_lock(h, dst, dst_pte);
spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
if (!huge_pte_none(huge_ptep_get(src_pte))) { if (!huge_pte_none(huge_ptep_get(src_pte))) {
if (cow) if (cow)
huge_ptep_set_wrprotect(src, addr, src_pte); huge_ptep_set_wrprotect(src, addr, src_pte);
...@@ -2398,8 +2400,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -2398,8 +2400,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
page_dup_rmap(ptepage); page_dup_rmap(ptepage);
set_huge_pte_at(dst, addr, dst_pte, entry); set_huge_pte_at(dst, addr, dst_pte, entry);
} }
spin_unlock(&src->page_table_lock); spin_unlock(src_ptl);
spin_unlock(&dst->page_table_lock); spin_unlock(dst_ptl);
} }
return 0; return 0;
...@@ -2442,6 +2444,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -2442,6 +2444,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long address; unsigned long address;
pte_t *ptep; pte_t *ptep;
pte_t pte; pte_t pte;
spinlock_t *ptl;
struct page *page; struct page *page;
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h); unsigned long sz = huge_page_size(h);
...@@ -2455,25 +2458,25 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -2455,25 +2458,25 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
tlb_start_vma(tlb, vma); tlb_start_vma(tlb, vma);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
again: again:
spin_lock(&mm->page_table_lock);
for (address = start; address < end; address += sz) { for (address = start; address < end; address += sz) {
ptep = huge_pte_offset(mm, address); ptep = huge_pte_offset(mm, address);
if (!ptep) if (!ptep)
continue; continue;
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep)) if (huge_pmd_unshare(mm, &address, ptep))
continue; goto unlock;
pte = huge_ptep_get(ptep); pte = huge_ptep_get(ptep);
if (huge_pte_none(pte)) if (huge_pte_none(pte))
continue; goto unlock;
/* /*
* HWPoisoned hugepage is already unmapped and dropped reference * HWPoisoned hugepage is already unmapped and dropped reference
*/ */
if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
huge_pte_clear(mm, address, ptep); huge_pte_clear(mm, address, ptep);
continue; goto unlock;
} }
page = pte_page(pte); page = pte_page(pte);
...@@ -2484,7 +2487,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -2484,7 +2487,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/ */
if (ref_page) { if (ref_page) {
if (page != ref_page) if (page != ref_page)
continue; goto unlock;
/* /*
* Mark the VMA as having unmapped its page so that * Mark the VMA as having unmapped its page so that
...@@ -2501,13 +2504,18 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -2501,13 +2504,18 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
page_remove_rmap(page); page_remove_rmap(page);
force_flush = !__tlb_remove_page(tlb, page); force_flush = !__tlb_remove_page(tlb, page);
if (force_flush) if (force_flush) {
spin_unlock(ptl);
break; break;
}
/* Bail out after unmapping reference page if supplied */ /* Bail out after unmapping reference page if supplied */
if (ref_page) if (ref_page) {
spin_unlock(ptl);
break; break;
} }
spin_unlock(&mm->page_table_lock); unlock:
spin_unlock(ptl);
}
/* /*
* mmu_gather ran out of room to batch pages, we break out of * mmu_gather ran out of room to batch pages, we break out of
* the PTE lock to avoid doing the potential expensive TLB invalidate * the PTE lock to avoid doing the potential expensive TLB invalidate
...@@ -2613,7 +2621,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2613,7 +2621,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
*/ */
static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *ptep, pte_t pte, unsigned long address, pte_t *ptep, pte_t pte,
struct page *pagecache_page) struct page *pagecache_page, spinlock_t *ptl)
{ {
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
struct page *old_page, *new_page; struct page *old_page, *new_page;
...@@ -2647,8 +2655,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2647,8 +2655,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
page_cache_get(old_page); page_cache_get(old_page);
/* Drop page_table_lock as buddy allocator may be called */ /* Drop page table lock as buddy allocator may be called */
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
new_page = alloc_huge_page(vma, address, outside_reserve); new_page = alloc_huge_page(vma, address, outside_reserve);
if (IS_ERR(new_page)) { if (IS_ERR(new_page)) {
...@@ -2666,13 +2674,13 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2666,13 +2674,13 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
BUG_ON(huge_pte_none(pte)); BUG_ON(huge_pte_none(pte));
if (unmap_ref_private(mm, vma, old_page, address)) { if (unmap_ref_private(mm, vma, old_page, address)) {
BUG_ON(huge_pte_none(pte)); BUG_ON(huge_pte_none(pte));
spin_lock(&mm->page_table_lock); spin_lock(ptl);
ptep = huge_pte_offset(mm, address & huge_page_mask(h)); ptep = huge_pte_offset(mm, address & huge_page_mask(h));
if (likely(pte_same(huge_ptep_get(ptep), pte))) if (likely(pte_same(huge_ptep_get(ptep), pte)))
goto retry_avoidcopy; goto retry_avoidcopy;
/* /*
* race occurs while re-acquiring page_table_lock, and * race occurs while re-acquiring page table
* our job is done. * lock, and our job is done.
*/ */
return 0; return 0;
} }
...@@ -2680,7 +2688,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2680,7 +2688,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
} }
/* Caller expects lock to be held */ /* Caller expects lock to be held */
spin_lock(&mm->page_table_lock); spin_lock(ptl);
if (err == -ENOMEM) if (err == -ENOMEM)
return VM_FAULT_OOM; return VM_FAULT_OOM;
else else
...@@ -2695,7 +2703,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2695,7 +2703,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
page_cache_release(new_page); page_cache_release(new_page);
page_cache_release(old_page); page_cache_release(old_page);
/* Caller expects lock to be held */ /* Caller expects lock to be held */
spin_lock(&mm->page_table_lock); spin_lock(ptl);
return VM_FAULT_OOM; return VM_FAULT_OOM;
} }
...@@ -2707,10 +2715,10 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2707,10 +2715,10 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
mmun_end = mmun_start + huge_page_size(h); mmun_end = mmun_start + huge_page_size(h);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
/* /*
* Retake the page_table_lock to check for racing updates * Retake the page table lock to check for racing updates
* before the page tables are altered * before the page tables are altered
*/ */
spin_lock(&mm->page_table_lock); spin_lock(ptl);
ptep = huge_pte_offset(mm, address & huge_page_mask(h)); ptep = huge_pte_offset(mm, address & huge_page_mask(h));
if (likely(pte_same(huge_ptep_get(ptep), pte))) { if (likely(pte_same(huge_ptep_get(ptep), pte))) {
ClearPagePrivate(new_page); ClearPagePrivate(new_page);
...@@ -2724,13 +2732,13 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2724,13 +2732,13 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
/* Make the old page be freed below */ /* Make the old page be freed below */
new_page = old_page; new_page = old_page;
} }
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
page_cache_release(new_page); page_cache_release(new_page);
page_cache_release(old_page); page_cache_release(old_page);
/* Caller expects lock to be held */ /* Caller expects lock to be held */
spin_lock(&mm->page_table_lock); spin_lock(ptl);
return 0; return 0;
} }
...@@ -2778,6 +2786,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2778,6 +2786,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page; struct page *page;
struct address_space *mapping; struct address_space *mapping;
pte_t new_pte; pte_t new_pte;
spinlock_t *ptl;
/* /*
* Currently, we are forced to kill the process in the event the * Currently, we are forced to kill the process in the event the
...@@ -2864,7 +2873,8 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2864,7 +2873,8 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto backout_unlocked; goto backout_unlocked;
} }
spin_lock(&mm->page_table_lock); ptl = huge_pte_lockptr(h, mm, ptep);
spin_lock(ptl);
size = i_size_read(mapping->host) >> huge_page_shift(h); size = i_size_read(mapping->host) >> huge_page_shift(h);
if (idx >= size) if (idx >= size)
goto backout; goto backout;
...@@ -2885,16 +2895,16 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2885,16 +2895,16 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */ /* Optimization, do the COW without a second fault */
ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page); ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
} }
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
unlock_page(page); unlock_page(page);
out: out:
return ret; return ret;
backout: backout:
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
backout_unlocked: backout_unlocked:
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
...@@ -2906,6 +2916,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2906,6 +2916,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
{ {
pte_t *ptep; pte_t *ptep;
pte_t entry; pte_t entry;
spinlock_t *ptl;
int ret; int ret;
struct page *page = NULL; struct page *page = NULL;
struct page *pagecache_page = NULL; struct page *pagecache_page = NULL;
...@@ -2918,7 +2929,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2918,7 +2929,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (ptep) { if (ptep) {
entry = huge_ptep_get(ptep); entry = huge_ptep_get(ptep);
if (unlikely(is_hugetlb_entry_migration(entry))) { if (unlikely(is_hugetlb_entry_migration(entry))) {
migration_entry_wait_huge(mm, ptep); migration_entry_wait_huge(vma, mm, ptep);
return 0; return 0;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE | return VM_FAULT_HWPOISON_LARGE |
...@@ -2974,17 +2985,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2974,17 +2985,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (page != pagecache_page) if (page != pagecache_page)
lock_page(page); lock_page(page);
spin_lock(&mm->page_table_lock); ptl = huge_pte_lockptr(h, mm, ptep);
spin_lock(ptl);
/* Check for a racing update before calling hugetlb_cow */ /* Check for a racing update before calling hugetlb_cow */
if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
goto out_page_table_lock; goto out_ptl;
if (flags & FAULT_FLAG_WRITE) { if (flags & FAULT_FLAG_WRITE) {
if (!huge_pte_write(entry)) { if (!huge_pte_write(entry)) {
ret = hugetlb_cow(mm, vma, address, ptep, entry, ret = hugetlb_cow(mm, vma, address, ptep, entry,
pagecache_page); pagecache_page, ptl);
goto out_page_table_lock; goto out_ptl;
} }
entry = huge_pte_mkdirty(entry); entry = huge_pte_mkdirty(entry);
} }
...@@ -2993,8 +3005,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2993,8 +3005,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
flags & FAULT_FLAG_WRITE)) flags & FAULT_FLAG_WRITE))
update_mmu_cache(vma, address, ptep); update_mmu_cache(vma, address, ptep);
out_page_table_lock: out_ptl:
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
if (pagecache_page) { if (pagecache_page) {
unlock_page(pagecache_page); unlock_page(pagecache_page);
...@@ -3020,9 +3032,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3020,9 +3032,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long remainder = *nr_pages; unsigned long remainder = *nr_pages;
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) { while (vaddr < vma->vm_end && remainder) {
pte_t *pte; pte_t *pte;
spinlock_t *ptl = NULL;
int absent; int absent;
struct page *page; struct page *page;
...@@ -3030,8 +3042,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3030,8 +3042,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
* Some archs (sparc64, sh*) have multiple pte_ts to * Some archs (sparc64, sh*) have multiple pte_ts to
* each hugepage. We have to make sure we get the * each hugepage. We have to make sure we get the
* first, for the page indexing below to work. * first, for the page indexing below to work.
*
* Note that page table lock is not held when pte is null.
*/ */
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
if (pte)
ptl = huge_pte_lock(h, mm, pte);
absent = !pte || huge_pte_none(huge_ptep_get(pte)); absent = !pte || huge_pte_none(huge_ptep_get(pte));
/* /*
...@@ -3043,6 +3059,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3043,6 +3059,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/ */
if (absent && (flags & FOLL_DUMP) && if (absent && (flags & FOLL_DUMP) &&
!hugetlbfs_pagecache_present(h, vma, vaddr)) { !hugetlbfs_pagecache_present(h, vma, vaddr)) {
if (pte)
spin_unlock(ptl);
remainder = 0; remainder = 0;
break; break;
} }
...@@ -3062,10 +3080,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3062,10 +3080,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
!huge_pte_write(huge_ptep_get(pte)))) { !huge_pte_write(huge_ptep_get(pte)))) {
int ret; int ret;
spin_unlock(&mm->page_table_lock); if (pte)
spin_unlock(ptl);
ret = hugetlb_fault(mm, vma, vaddr, ret = hugetlb_fault(mm, vma, vaddr,
(flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
spin_lock(&mm->page_table_lock);
if (!(ret & VM_FAULT_ERROR)) if (!(ret & VM_FAULT_ERROR))
continue; continue;
...@@ -3096,8 +3114,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3096,8 +3114,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/ */
goto same_page; goto same_page;
} }
spin_unlock(ptl);
} }
spin_unlock(&mm->page_table_lock);
*nr_pages = remainder; *nr_pages = remainder;
*position = vaddr; *position = vaddr;
...@@ -3118,13 +3136,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -3118,13 +3136,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
flush_cache_range(vma, address, end); flush_cache_range(vma, address, end);
mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
spin_lock(&mm->page_table_lock);
for (; address < end; address += huge_page_size(h)) { for (; address < end; address += huge_page_size(h)) {
spinlock_t *ptl;
ptep = huge_pte_offset(mm, address); ptep = huge_pte_offset(mm, address);
if (!ptep) if (!ptep)
continue; continue;
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep)) { if (huge_pmd_unshare(mm, &address, ptep)) {
pages++; pages++;
spin_unlock(ptl);
continue; continue;
} }
if (!huge_pte_none(huge_ptep_get(ptep))) { if (!huge_pte_none(huge_ptep_get(ptep))) {
...@@ -3134,8 +3154,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -3134,8 +3154,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
set_huge_pte_at(mm, address, ptep, pte); set_huge_pte_at(mm, address, ptep, pte);
pages++; pages++;
} }
spin_unlock(ptl);
} }
spin_unlock(&mm->page_table_lock);
/* /*
* Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
* may have cleared our pud entry and done put_page on the page table: * may have cleared our pud entry and done put_page on the page table:
...@@ -3298,6 +3318,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) ...@@ -3298,6 +3318,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
unsigned long saddr; unsigned long saddr;
pte_t *spte = NULL; pte_t *spte = NULL;
pte_t *pte; pte_t *pte;
spinlock_t *ptl;
if (!vma_shareable(vma, addr)) if (!vma_shareable(vma, addr))
return (pte_t *)pmd_alloc(mm, pud, addr); return (pte_t *)pmd_alloc(mm, pud, addr);
...@@ -3320,13 +3341,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) ...@@ -3320,13 +3341,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
if (!spte) if (!spte)
goto out; goto out;
spin_lock(&mm->page_table_lock); ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte);
spin_lock(ptl);
if (pud_none(*pud)) if (pud_none(*pud))
pud_populate(mm, pud, pud_populate(mm, pud,
(pmd_t *)((unsigned long)spte & PAGE_MASK)); (pmd_t *)((unsigned long)spte & PAGE_MASK));
else else
put_page(virt_to_page(spte)); put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
out: out:
pte = (pte_t *)pmd_alloc(mm, pud, addr); pte = (pte_t *)pmd_alloc(mm, pud, addr);
mutex_unlock(&mapping->i_mmap_mutex); mutex_unlock(&mapping->i_mmap_mutex);
...@@ -3340,7 +3362,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) ...@@ -3340,7 +3362,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
* indicated by page_count > 1, unmap is achieved by clearing pud and * indicated by page_count > 1, unmap is achieved by clearing pud and
* decrementing the ref count. If count == 1, the pte page is not shared. * decrementing the ref count. If count == 1, the pte page is not shared.
* *
* called with vma->vm_mm->page_table_lock held. * called with page table lock held.
* *
* returns: 1 successfully unmapped a shared pte page * returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user * 0 the underlying pte page is not shared, or it is the last user
......
...@@ -525,8 +525,9 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, ...@@ -525,8 +525,9 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
int nid; int nid;
struct page *page; struct page *page;
spinlock_t *ptl;
spin_lock(&vma->vm_mm->page_table_lock); ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd);
page = pte_page(huge_ptep_get((pte_t *)pmd)); page = pte_page(huge_ptep_get((pte_t *)pmd));
nid = page_to_nid(page); nid = page_to_nid(page);
if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
...@@ -536,7 +537,7 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, ...@@ -536,7 +537,7 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
isolate_huge_page(page, private); isolate_huge_page(page, private);
unlock: unlock:
spin_unlock(&vma->vm_mm->page_table_lock); spin_unlock(ptl);
#else #else
BUG(); BUG();
#endif #endif
......
...@@ -130,7 +130,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, ...@@ -130,7 +130,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
ptep = huge_pte_offset(mm, addr); ptep = huge_pte_offset(mm, addr);
if (!ptep) if (!ptep)
goto out; goto out;
ptl = &mm->page_table_lock; ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
} else { } else {
pmd = mm_find_pmd(mm, addr); pmd = mm_find_pmd(mm, addr);
if (!pmd) if (!pmd)
...@@ -249,9 +249,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, ...@@ -249,9 +249,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
__migration_entry_wait(mm, ptep, ptl); __migration_entry_wait(mm, ptep, ptl);
} }
void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) void migration_entry_wait_huge(struct vm_area_struct *vma,
struct mm_struct *mm, pte_t *pte)
{ {
spinlock_t *ptl = &(mm)->page_table_lock; spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
__migration_entry_wait(mm, pte, ptl); __migration_entry_wait(mm, pte, ptl);
} }
......
...@@ -601,7 +601,7 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, ...@@ -601,7 +601,7 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
if (unlikely(PageHuge(page))) { if (unlikely(PageHuge(page))) {
pte = huge_pte_offset(mm, address); pte = huge_pte_offset(mm, address);
ptl = &mm->page_table_lock; ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
goto check; goto check;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment