Commit 0ae5e89c authored by Ying Han's avatar Ying Han Committed by Linus Torvalds

memcg: count the soft_limit reclaim in global background reclaim

The global kswapd scans per-zone LRU and reclaims pages regardless of the
cgroup. It breaks memory isolation since one cgroup can end up reclaiming
pages from another cgroup. Instead we should rely on memcg-aware target
reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under
memory pressure.

In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored. This patch is the first
step to skip shrink_zone() if soft_limit reclaim does enough work.

This is part of the effort which tries to reduce reclaiming pages in global
LRU in memcg. The per-memcg background reclaim patchset further enhances the
per-cgroup targetting reclaim, which I should have V4 posted shortly.

Try running multiple memory intensive workloads within seperate memcgs. Watch
the counters of soft_steal in memory.stat.

  $ cat /dev/cgroup/A/memory.stat | grep 'soft'
  soft_steal 240000
  soft_scan 240000
  total_soft_steal 240000
  total_soft_scan 240000

This patch:

In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU.  However, the return value is ignored.

We would like to skip shrink_zone() if soft_limit reclaim does enough
work.  Also, we need to make the memory pressure balanced across per-memcg
zones, like the logic vm-core.  This patch is the first step where we
start with counting the nr_scanned and nr_reclaimed from soft_limit
reclaim into the global scan_control.
Signed-off-by: default avatarYing Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: default avatarDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f042e707
...@@ -144,7 +144,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, ...@@ -144,7 +144,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
} }
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask); gfp_t gfp_mask,
unsigned long *total_scanned);
u64 mem_cgroup_get_limit(struct mem_cgroup *mem); u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
...@@ -338,7 +339,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, ...@@ -338,7 +339,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
static inline static inline
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask) gfp_t gfp_mask,
unsigned long *total_scanned)
{ {
return 0; return 0;
} }
......
...@@ -257,7 +257,8 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, ...@@ -257,7 +257,8 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap, gfp_t gfp_mask, bool noswap,
unsigned int swappiness, unsigned int swappiness,
struct zone *zone); struct zone *zone,
unsigned long *nr_scanned);
extern int __isolate_lru_page(struct page *page, int mode, int file); extern int __isolate_lru_page(struct page *page, int mode, int file);
extern unsigned long shrink_all_memory(unsigned long nr_pages); extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness; extern int vm_swappiness;
......
...@@ -1433,7 +1433,8 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) ...@@ -1433,7 +1433,8 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
struct zone *zone, struct zone *zone,
gfp_t gfp_mask, gfp_t gfp_mask,
unsigned long reclaim_options) unsigned long reclaim_options,
unsigned long *total_scanned)
{ {
struct mem_cgroup *victim; struct mem_cgroup *victim;
int ret, total = 0; int ret, total = 0;
...@@ -1442,6 +1443,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, ...@@ -1442,6 +1443,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
unsigned long excess; unsigned long excess;
unsigned long nr_scanned;
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
...@@ -1484,10 +1486,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, ...@@ -1484,10 +1486,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
continue; continue;
} }
/* we use swappiness of local cgroup */ /* we use swappiness of local cgroup */
if (check_soft) if (check_soft) {
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
noswap, get_swappiness(victim), zone); noswap, get_swappiness(victim), zone,
else &nr_scanned);
*total_scanned += nr_scanned;
} else
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
noswap, get_swappiness(victim)); noswap, get_swappiness(victim));
css_put(&victim->css); css_put(&victim->css);
...@@ -1928,7 +1932,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, ...@@ -1928,7 +1932,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
return CHARGE_WOULDBLOCK; return CHARGE_WOULDBLOCK;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
gfp_mask, flags); gfp_mask, flags, NULL);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages) if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
return CHARGE_RETRY; return CHARGE_RETRY;
/* /*
...@@ -3211,7 +3215,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, ...@@ -3211,7 +3215,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
break; break;
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
MEM_CGROUP_RECLAIM_SHRINK); MEM_CGROUP_RECLAIM_SHRINK,
NULL);
curusage = res_counter_read_u64(&memcg->res, RES_USAGE); curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */ /* Usage is reduced ? */
if (curusage >= oldusage) if (curusage >= oldusage)
...@@ -3271,7 +3276,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, ...@@ -3271,7 +3276,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
MEM_CGROUP_RECLAIM_NOSWAP | MEM_CGROUP_RECLAIM_NOSWAP |
MEM_CGROUP_RECLAIM_SHRINK); MEM_CGROUP_RECLAIM_SHRINK,
NULL);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */ /* Usage is reduced ? */
if (curusage >= oldusage) if (curusage >= oldusage)
...@@ -3285,7 +3291,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, ...@@ -3285,7 +3291,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
} }
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask) gfp_t gfp_mask,
unsigned long *total_scanned)
{ {
unsigned long nr_reclaimed = 0; unsigned long nr_reclaimed = 0;
struct mem_cgroup_per_zone *mz, *next_mz = NULL; struct mem_cgroup_per_zone *mz, *next_mz = NULL;
...@@ -3293,6 +3300,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, ...@@ -3293,6 +3300,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
int loop = 0; int loop = 0;
struct mem_cgroup_tree_per_zone *mctz; struct mem_cgroup_tree_per_zone *mctz;
unsigned long long excess; unsigned long long excess;
unsigned long nr_scanned;
if (order > 0) if (order > 0)
return 0; return 0;
...@@ -3311,10 +3319,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, ...@@ -3311,10 +3319,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
if (!mz) if (!mz)
break; break;
nr_scanned = 0;
reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
gfp_mask, gfp_mask,
MEM_CGROUP_RECLAIM_SOFT); MEM_CGROUP_RECLAIM_SOFT,
&nr_scanned);
nr_reclaimed += reclaimed; nr_reclaimed += reclaimed;
*total_scanned += nr_scanned;
spin_lock(&mctz->lock); spin_lock(&mctz->lock);
/* /*
......
...@@ -2171,9 +2171,11 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, ...@@ -2171,9 +2171,11 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap, gfp_t gfp_mask, bool noswap,
unsigned int swappiness, unsigned int swappiness,
struct zone *zone) struct zone *zone,
unsigned long *nr_scanned)
{ {
struct scan_control sc = { struct scan_control sc = {
.nr_scanned = 0,
.nr_to_reclaim = SWAP_CLUSTER_MAX, .nr_to_reclaim = SWAP_CLUSTER_MAX,
.may_writepage = !laptop_mode, .may_writepage = !laptop_mode,
.may_unmap = 1, .may_unmap = 1,
...@@ -2182,6 +2184,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, ...@@ -2182,6 +2184,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
.order = 0, .order = 0,
.mem_cgroup = mem, .mem_cgroup = mem,
}; };
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
...@@ -2200,6 +2203,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, ...@@ -2200,6 +2203,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
*nr_scanned = sc.nr_scanned;
return sc.nr_reclaimed; return sc.nr_reclaimed;
} }
...@@ -2347,6 +2351,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, ...@@ -2347,6 +2351,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long total_scanned; unsigned long total_scanned;
struct reclaim_state *reclaim_state = current->reclaim_state; struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
struct scan_control sc = { struct scan_control sc = {
.gfp_mask = GFP_KERNEL, .gfp_mask = GFP_KERNEL,
.may_unmap = 1, .may_unmap = 1,
...@@ -2439,11 +2445,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, ...@@ -2439,11 +2445,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
sc.nr_scanned = 0; sc.nr_scanned = 0;
nr_soft_scanned = 0;
/* /*
* Call soft limit reclaim before calling shrink_zone. * Call soft limit reclaim before calling shrink_zone.
* For now we ignore the return value
*/ */
mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask); nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
order, sc.gfp_mask,
&nr_soft_scanned);
sc.nr_reclaimed += nr_soft_reclaimed;
total_scanned += nr_soft_scanned;
/* /*
* We put equal pressure on every zone, unless * We put equal pressure on every zone, unless
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment