Commit 356ff8a9 authored by David Rientjes's avatar David Rientjes Committed by Linus Torvalds

Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"

This reverts commit 89c83fb5.

This should have been done as part of 2f0799a0 ("mm, thp: restore
node-local hugepage allocations").  The movement of the thp allocation
policy from alloc_pages_vma() to alloc_hugepage_direct_gfpmask() was
intended to only set __GFP_THISNODE for mempolicies that are not
MPOL_BIND whereas the revert could set this regardless of mempolicy.

While the check for MPOL_BIND between alloc_hugepage_direct_gfpmask()
and alloc_pages_vma() was racy, that has since been removed since the
revert.  What is left is the possibility to use __GFP_THISNODE in
policy_node() when it is unexpected because the special handling for
hugepages in alloc_pages_vma()  was removed as part of the consolidation.

Secondly, prior to 89c83fb5, alloc_pages_vma() implemented a somewhat
different policy for hugepage allocations, which were allocated through
alloc_hugepage_vma().  For hugepage allocations, if the allocating
process's node is in the set of allowed nodes, allocate with
__GFP_THISNODE for that node (for MPOL_PREFERRED, use that node with
__GFP_THISNODE instead).  This was changed for shmem_alloc_hugepage() to
allow fallback to other nodes in 89c83fb5 as it did for new_page() in
mm/mempolicy.c which is functionally different behavior and removes the
requirement to only allocate hugepages locally.

So this commit does a full revert of 89c83fb5 instead of the partial
revert that was done in 2f0799a0.  The result is the same thp
allocation policy for 4.20 that was in 4.19.

Fixes: 89c83fb5 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask")
Fixes: 2f0799a0 ("mm, thp: restore node-local hugepage allocations")
Signed-off-by: default avatarDavid Rientjes <rientjes@google.com>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5f179793
...@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) ...@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
} }
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr, struct vm_area_struct *vma, unsigned long addr,
int node); int node, bool hugepage);
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
#else #else
#define alloc_pages(gfp_mask, order) \ #define alloc_pages(gfp_mask, order) \
alloc_pages_node(numa_node_id(), gfp_mask, order) alloc_pages_node(numa_node_id(), gfp_mask, order)
#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
alloc_pages(gfp_mask, order)
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
alloc_pages(gfp_mask, order) alloc_pages(gfp_mask, order)
#endif #endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
#define alloc_page_vma(gfp_mask, vma, addr) \ #define alloc_page_vma(gfp_mask, vma, addr) \
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node) alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask); extern unsigned long get_zeroed_page(gfp_t gfp_mask);
......
...@@ -629,30 +629,30 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, ...@@ -629,30 +629,30 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
* available * available
* never: never stall for any thp allocation * never: never stall for any thp allocation
*/ */
static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
{ {
const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
/* Always do synchronous compaction */ /* Always do synchronous compaction */
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE | __GFP_THISNODE | return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
(vma_madvised ? 0 : __GFP_NORETRY);
/* Kick kcompactd and fail quickly */ /* Kick kcompactd and fail quickly */
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
return gfp_mask | __GFP_KSWAPD_RECLAIM; return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
/* Synchronous compaction if madvised, otherwise kick kcompactd */ /* Synchronous compaction if madvised, otherwise kick kcompactd */
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : return GFP_TRANSHUGE_LIGHT |
__GFP_KSWAPD_RECLAIM); (vma_madvised ? __GFP_DIRECT_RECLAIM :
__GFP_KSWAPD_RECLAIM);
/* Only do synchronous compaction if madvised */ /* Only do synchronous compaction if madvised */
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0); return GFP_TRANSHUGE_LIGHT |
(vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
return gfp_mask; return GFP_TRANSHUGE_LIGHT;
} }
/* Caller must hold page table lock. */ /* Caller must hold page table lock. */
...@@ -724,8 +724,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) ...@@ -724,8 +724,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
pte_free(vma->vm_mm, pgtable); pte_free(vma->vm_mm, pgtable);
return ret; return ret;
} }
gfp = alloc_hugepage_direct_gfpmask(vma, haddr); gfp = alloc_hugepage_direct_gfpmask(vma);
page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
if (unlikely(!page)) { if (unlikely(!page)) {
count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
...@@ -1295,9 +1295,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) ...@@ -1295,9 +1295,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
alloc: alloc:
if (transparent_hugepage_enabled(vma) && if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow()) { !transparent_hugepage_debug_cow()) {
huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr); huge_gfp = alloc_hugepage_direct_gfpmask(vma);
new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma, new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
haddr, numa_node_id());
} else } else
new_page = NULL; new_page = NULL;
......
...@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start) ...@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
} else if (PageTransHuge(page)) { } else if (PageTransHuge(page)) {
struct page *thp; struct page *thp;
thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
address, numa_node_id()); HPAGE_PMD_ORDER);
if (!thp) if (!thp)
return NULL; return NULL;
prep_transhuge_page(thp); prep_transhuge_page(thp);
...@@ -2011,6 +2011,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, ...@@ -2011,6 +2011,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
* @vma: Pointer to VMA or NULL if not available. * @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual Address of the allocation. Must be inside the VMA. * @addr: Virtual Address of the allocation. Must be inside the VMA.
* @node: Which node to prefer for allocation (modulo policy). * @node: Which node to prefer for allocation (modulo policy).
* @hugepage: for hugepages try only the preferred node if possible
* *
* This function allocates a page from the kernel page pool and applies * This function allocates a page from the kernel page pool and applies
* a NUMA policy associated with the VMA or the current process. * a NUMA policy associated with the VMA or the current process.
...@@ -2021,7 +2022,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, ...@@ -2021,7 +2022,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
*/ */
struct page * struct page *
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, int node) unsigned long addr, int node, bool hugepage)
{ {
struct mempolicy *pol; struct mempolicy *pol;
struct page *page; struct page *page;
...@@ -2039,6 +2040,31 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, ...@@ -2039,6 +2040,31 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
goto out; goto out;
} }
if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
int hpage_node = node;
/*
* For hugepage allocation and non-interleave policy which
* allows the current node (or other explicitly preferred
* node) we only try to allocate from the current/preferred
* node and don't fall back to other nodes, as the cost of
* remote accesses would likely offset THP benefits.
*
* If the policy is interleave, or does not allow the current
* node in its nodemask, we allocate the standard way.
*/
if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
hpage_node = pol->v.preferred_node;
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
page = __alloc_pages_node(hpage_node,
gfp | __GFP_THISNODE, order);
goto out;
}
}
nmask = policy_nodemask(gfp, pol); nmask = policy_nodemask(gfp, pol);
preferred_nid = policy_node(gfp, pol, node); preferred_nid = policy_node(gfp, pol, node);
page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
......
...@@ -1439,7 +1439,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, ...@@ -1439,7 +1439,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
shmem_pseudo_vma_init(&pvma, info, hindex); shmem_pseudo_vma_init(&pvma, info, hindex);
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
HPAGE_PMD_ORDER, &pvma, 0, numa_node_id()); HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
shmem_pseudo_vma_destroy(&pvma); shmem_pseudo_vma_destroy(&pvma);
if (page) if (page)
prep_transhuge_page(page); prep_transhuge_page(page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment