Commit b377fd39 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

Apply memory policies to top two highest zones when highest zone is ZONE_MOVABLE

The NUMA layer only supports NUMA policies for the highest zone.  When
ZONE_MOVABLE is configured with kernelcore=, the the highest zone becomes
ZONE_MOVABLE.  The result is that policies are only applied to allocations
like anonymous pages and page cache allocated from ZONE_MOVABLE when the
zone is used.

This patch applies policies to the two highest zones when the highest zone
is ZONE_MOVABLE.  As ZONE_MOVABLE consists of pages from the highest "real"
zone, it's always functionally equivalent.

The patch has been tested on a variety of machines both NUMA and non-NUMA
covering x86, x86_64 and ppc64.  No abnormal results were seen in
kernbench, tbench, dbench or hackbench.  It passes regression tests from
the numactl package with and without kernelcore= once numactl tests are
patched to wait for vmstat counters to update.

akpm: this is the nasty hack to fix NUMA mempolicies in the presence of
ZONE_MOVABLE and kernelcore= in 2.6.23.  Christoph says "For .24 either merge
the mobility or get the other solution that Mel is working on.  That solution
would only use a single zonelist per node and filter on the fly.  That may
help performance and also help to make memory policies work better."
Signed-off-by: default avatarMel Gorman <mel@csn.ul.ie>
Acked-by: default avatarLee Schermerhorn <lee.schermerhorn@hp.com>
Tested-by: default avatarLee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: default avatarChristoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 8e92f21b
...@@ -166,7 +166,7 @@ extern enum zone_type policy_zone; ...@@ -166,7 +166,7 @@ extern enum zone_type policy_zone;
static inline void check_highest_zone(enum zone_type k) static inline void check_highest_zone(enum zone_type k)
{ {
if (k > policy_zone) if (k > policy_zone && k != ZONE_MOVABLE)
policy_zone = k; policy_zone = k;
} }
......
...@@ -410,6 +410,24 @@ struct zonelist { ...@@ -410,6 +410,24 @@ struct zonelist {
#endif #endif
}; };
#ifdef CONFIG_NUMA
/*
* Only custom zonelists like MPOL_BIND need to be filtered as part of
* policies. As described in the comment for struct zonelist_cache, these
* zonelists will not have a zlcache so zlcache_ptr will not be set. Use
* that to determine if the zonelists needs to be filtered or not.
*/
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
return !zonelist->zlcache_ptr;
}
#else
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
return 0;
}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
struct node_active_region { struct node_active_region {
unsigned long start_pfn; unsigned long start_pfn;
......
...@@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) ...@@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
lower zones etc. Avoid empty zones because the memory allocator lower zones etc. Avoid empty zones because the memory allocator
doesn't like them. If you implement node hot removal you doesn't like them. If you implement node hot removal you
have to fix that. */ have to fix that. */
k = policy_zone; k = MAX_NR_ZONES - 1;
while (1) { while (1) {
for_each_node_mask(nd, *nodes) { for_each_node_mask(nd, *nodes) {
struct zone *z = &NODE_DATA(nd)->node_zones[k]; struct zone *z = &NODE_DATA(nd)->node_zones[k];
......
...@@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, ...@@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
int zlc_active = 0; /* set if using zonelist_cache */ int zlc_active = 0; /* set if using zonelist_cache */
int did_zlc_setup = 0; /* just call zlc_setup() one time */ int did_zlc_setup = 0; /* just call zlc_setup() one time */
enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */
zonelist_scan: zonelist_scan:
/* /*
...@@ -1166,6 +1167,18 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, ...@@ -1166,6 +1167,18 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
z = zonelist->zones; z = zonelist->zones;
do { do {
/*
* In NUMA, this could be a policy zonelist which contains
* zones that may not be allowed by the current gfp_mask.
* Check the zone is allowed by the current flags
*/
if (unlikely(alloc_should_filter_zonelist(zonelist))) {
if (highest_zoneidx == -1)
highest_zoneidx = gfp_zone(gfp_mask);
if (zone_idx(*z) > highest_zoneidx)
continue;
}
if (NUMA_BUILD && zlc_active && if (NUMA_BUILD && zlc_active &&
!zlc_zone_worth_trying(zonelist, z, allowednodes)) !zlc_zone_worth_trying(zonelist, z, allowednodes))
continue; continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment