Commit 37e867c1 authored by David Rientjes's avatar David Rientjes Committed by Ben Hutchings

mm, thp: fix collapsing of hugepages on madvise

commit 6d50e60c upstream.

If an anonymous mapping is not allowed to fault thp memory and then
madvise(MADV_HUGEPAGE) is used after fault, khugepaged will never
collapse this memory into thp memory.

This occurs because the madvise(2) handler for thp, hugepage_madvise(),
clears VM_NOHUGEPAGE on the stack and it isn't stored in vma->vm_flags
until the final action of madvise_behavior().  This causes the
khugepaged_enter_vma_merge() to be a no-op in hugepage_madvise() when
the vma had previously had VM_NOHUGEPAGE set.

Fix this by passing the correct vma flags to the khugepaged mm slot
handler.  There's no chance khugepaged can run on this vma until after
madvise_behavior() returns since we hold mm->mmap_sem.

It would be possible to clear VM_NOHUGEPAGE directly from vma->vm_flags
in hugepage_advise(), but I didn't want to introduce special case
behavior into madvise_behavior().  I think it's best to just let it
always set vma->vm_flags itself.
Signed-off-by: default avatarDavid Rientjes <rientjes@google.com>
Reported-by: default avatarSuleiman Souhlal <suleiman@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
[bwh: Backported to 3.2: adjust context, indentation]
Signed-off-by: default avatarBen Hutchings <ben@decadent.org.uk>
parent e0fb1fad
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern int __khugepaged_enter(struct mm_struct *mm); extern int __khugepaged_enter(struct mm_struct *mm);
extern void __khugepaged_exit(struct mm_struct *mm); extern void __khugepaged_exit(struct mm_struct *mm);
extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma); extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long vm_flags);
#define khugepaged_enabled() \ #define khugepaged_enabled() \
(transparent_hugepage_flags & \ (transparent_hugepage_flags & \
...@@ -35,13 +36,13 @@ static inline void khugepaged_exit(struct mm_struct *mm) ...@@ -35,13 +36,13 @@ static inline void khugepaged_exit(struct mm_struct *mm)
__khugepaged_exit(mm); __khugepaged_exit(mm);
} }
static inline int khugepaged_enter(struct vm_area_struct *vma) static inline int khugepaged_enter(struct vm_area_struct *vma,
unsigned long vm_flags)
{ {
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
if ((khugepaged_always() || if ((khugepaged_always() ||
(khugepaged_req_madv() && (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
vma->vm_flags & VM_HUGEPAGE)) && !(vm_flags & VM_NOHUGEPAGE))
!(vma->vm_flags & VM_NOHUGEPAGE))
if (__khugepaged_enter(vma->vm_mm)) if (__khugepaged_enter(vma->vm_mm))
return -ENOMEM; return -ENOMEM;
return 0; return 0;
...@@ -54,11 +55,13 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) ...@@ -54,11 +55,13 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
static inline void khugepaged_exit(struct mm_struct *mm) static inline void khugepaged_exit(struct mm_struct *mm)
{ {
} }
static inline int khugepaged_enter(struct vm_area_struct *vma) static inline int khugepaged_enter(struct vm_area_struct *vma,
unsigned long vm_flags)
{ {
return 0; return 0;
} }
static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma) static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long vm_flags)
{ {
return 0; return 0;
} }
......
...@@ -682,7 +682,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -682,7 +682,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) { if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) {
if (unlikely(anon_vma_prepare(vma))) if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM; return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma))) if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
return VM_FAULT_OOM; return VM_FAULT_OOM;
page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
vma, haddr, numa_node_id(), 0); vma, haddr, numa_node_id(), 0);
...@@ -1493,7 +1493,7 @@ int hugepage_madvise(struct vm_area_struct *vma, ...@@ -1493,7 +1493,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
* register it here without waiting a page fault that * register it here without waiting a page fault that
* may not happen any time soon. * may not happen any time soon.
*/ */
if (unlikely(khugepaged_enter_vma_merge(vma))) if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
return -ENOMEM; return -ENOMEM;
break; break;
case MADV_NOHUGEPAGE: case MADV_NOHUGEPAGE:
...@@ -1625,7 +1625,8 @@ int __khugepaged_enter(struct mm_struct *mm) ...@@ -1625,7 +1625,8 @@ int __khugepaged_enter(struct mm_struct *mm)
return 0; return 0;
} }
int khugepaged_enter_vma_merge(struct vm_area_struct *vma) int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long vm_flags)
{ {
unsigned long hstart, hend; unsigned long hstart, hend;
if (!vma->anon_vma) if (!vma->anon_vma)
...@@ -1641,11 +1642,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) ...@@ -1641,11 +1642,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
* If is_pfn_mapping() is true is_learn_pfn_mapping() must be * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
* true too, verify it here. * true too, verify it here.
*/ */
VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); VM_BUG_ON(is_linear_pfn_mapping(vma) || vm_flags & VM_NO_THP);
hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
hend = vma->vm_end & HPAGE_PMD_MASK; hend = vma->vm_end & HPAGE_PMD_MASK;
if (hstart < hend) if (hstart < hend)
return khugepaged_enter(vma); return khugepaged_enter(vma, vm_flags);
return 0; return 0;
} }
......
...@@ -796,7 +796,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -796,7 +796,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
end, prev->vm_pgoff, NULL); end, prev->vm_pgoff, NULL);
if (err) if (err)
return NULL; return NULL;
khugepaged_enter_vma_merge(prev); khugepaged_enter_vma_merge(prev, vm_flags);
return prev; return prev;
} }
...@@ -815,7 +815,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -815,7 +815,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
next->vm_pgoff - pglen, NULL); next->vm_pgoff - pglen, NULL);
if (err) if (err)
return NULL; return NULL;
khugepaged_enter_vma_merge(area); khugepaged_enter_vma_merge(area, vm_flags);
return area; return area;
} }
...@@ -1741,7 +1741,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) ...@@ -1741,7 +1741,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
} }
} }
vma_unlock_anon_vma(vma); vma_unlock_anon_vma(vma);
khugepaged_enter_vma_merge(vma); khugepaged_enter_vma_merge(vma, vma->vm_flags);
return error; return error;
} }
#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
...@@ -1792,7 +1792,7 @@ int expand_downwards(struct vm_area_struct *vma, ...@@ -1792,7 +1792,7 @@ int expand_downwards(struct vm_area_struct *vma,
} }
} }
vma_unlock_anon_vma(vma); vma_unlock_anon_vma(vma);
khugepaged_enter_vma_merge(vma); khugepaged_enter_vma_merge(vma, vma->vm_flags);
return error; return error;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment