Commit 5ce9bfef authored by Vlastimil Babka's avatar Vlastimil Babka Committed by Linus Torvalds

mm, page_alloc: move cpuset seqcount checking to slowpath

This is a preparation for the following patch to make review simpler.
While the primary motivation is a bug fix, this also simplifies the fast
path, although the moved code is only enabled when cpusets are in use.

Link: http://lkml.kernel.org/r/20170120103843.24587-4-vbabka@suse.czSigned-off-by: default avatarVlastimil Babka <vbabka@suse.cz>
Acked-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarHillf Danton <hillf.zj@alibaba-inc.com>
Cc: Ganapatrao Kulkarni <gpkulkarni@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 16096c25
...@@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct page *page = NULL; struct page *page = NULL;
unsigned int alloc_flags; unsigned int alloc_flags;
unsigned long did_some_progress; unsigned long did_some_progress;
enum compact_priority compact_priority = DEF_COMPACT_PRIORITY; enum compact_priority compact_priority;
enum compact_result compact_result; enum compact_result compact_result;
int compaction_retries = 0; int compaction_retries;
int no_progress_loops = 0; int no_progress_loops;
unsigned long alloc_start = jiffies; unsigned long alloc_start = jiffies;
unsigned int stall_timeout = 10 * HZ; unsigned int stall_timeout = 10 * HZ;
unsigned int cpuset_mems_cookie;
/* /*
* In the slowpath, we sanity check order to avoid ever trying to * In the slowpath, we sanity check order to avoid ever trying to
...@@ -3549,6 +3550,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -3549,6 +3550,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
gfp_mask &= ~__GFP_ATOMIC; gfp_mask &= ~__GFP_ATOMIC;
retry_cpuset:
compaction_retries = 0;
no_progress_loops = 0;
compact_priority = DEF_COMPACT_PRIORITY;
cpuset_mems_cookie = read_mems_allowed_begin();
/* /*
* The fast path uses conservative alloc_flags to succeed only until * The fast path uses conservative alloc_flags to succeed only until
* kswapd needs to be woken up, and to avoid the cost of setting up * kswapd needs to be woken up, and to avoid the cost of setting up
...@@ -3720,6 +3727,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -3720,6 +3727,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
} }
nopage: nopage:
/*
* When updating a task's mems_allowed, it is possible to race with
* parallel threads in such a way that an allocation can fail while
* the mask is being updated. If a page allocation is about to fail,
* check if the cpuset changed during allocation and if so, retry.
*/
if (read_mems_allowed_retry(cpuset_mems_cookie))
goto retry_cpuset;
warn_alloc(gfp_mask, warn_alloc(gfp_mask,
"page allocation failure: order:%u", order); "page allocation failure: order:%u", order);
got_pg: got_pg:
...@@ -3734,7 +3750,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -3734,7 +3750,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, nodemask_t *nodemask) struct zonelist *zonelist, nodemask_t *nodemask)
{ {
struct page *page; struct page *page;
unsigned int cpuset_mems_cookie;
unsigned int alloc_flags = ALLOC_WMARK_LOW; unsigned int alloc_flags = ALLOC_WMARK_LOW;
gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = { struct alloc_context ac = {
...@@ -3771,9 +3786,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -3771,9 +3786,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA; alloc_flags |= ALLOC_CMA;
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
/* Dirty zone balancing only done in the fast path */ /* Dirty zone balancing only done in the fast path */
ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
...@@ -3786,6 +3798,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -3786,6 +3798,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
ac.high_zoneidx, ac.nodemask); ac.high_zoneidx, ac.nodemask);
if (!ac.preferred_zoneref->zone) { if (!ac.preferred_zoneref->zone) {
page = NULL; page = NULL;
/*
* This might be due to race with cpuset_current_mems_allowed
* update, so make sure we retry with original nodemask in the
* slow path.
*/
goto no_zone; goto no_zone;
} }
...@@ -3794,6 +3811,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -3794,6 +3811,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (likely(page)) if (likely(page))
goto out; goto out;
no_zone:
/* /*
* Runtime PM, block IO and its error handling path can deadlock * Runtime PM, block IO and its error handling path can deadlock
* because I/O on the device might not complete. * because I/O on the device might not complete.
...@@ -3811,24 +3829,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -3811,24 +3829,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
ac.nodemask = nodemask; ac.nodemask = nodemask;
ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
ac.high_zoneidx, ac.nodemask); ac.high_zoneidx, ac.nodemask);
if (!ac.preferred_zoneref->zone) /* If we have NULL preferred zone, slowpath wll handle that */
goto no_zone;
} }
page = __alloc_pages_slowpath(alloc_mask, order, &ac); page = __alloc_pages_slowpath(alloc_mask, order, &ac);
no_zone:
/*
* When updating a task's mems_allowed, it is possible to race with
* parallel threads in such a way that an allocation can fail while
* the mask is being updated. If a page allocation is about to fail,
* check if the cpuset changed during allocation and if so, retry.
*/
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) {
alloc_mask = gfp_mask;
goto retry_cpuset;
}
out: out:
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment