Commit a9dd0a83 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm, vmscan: make shrink_node decisions more node-centric

Earlier patches focused on having direct reclaim and kswapd use data
that is node-centric for reclaiming but shrink_node() itself still uses
too much zone information.  This patch removes unnecessary zone-based
information with the most important decision being whether to continue
reclaim or not.  Some memcg APIs are adjusted as a result even though
memcg itself still uses some zone information.

[mgorman@techsingularity.net: optimization]
  Link: http://lkml.kernel.org/r/1468588165-12461-2-git-send-email-mgorman@techsingularity.net
Link: http://lkml.kernel.org/r/1467970510-21195-14-git-send-email-mgorman@techsingularity.netSigned-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 86c79f6b
...@@ -324,22 +324,23 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) ...@@ -324,22 +324,23 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
} }
/** /**
* mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
* @node: node of the wanted lruvec
* @zone: zone of the wanted lruvec * @zone: zone of the wanted lruvec
* @memcg: memcg of the wanted lruvec * @memcg: memcg of the wanted lruvec
* *
* Returns the lru list vector holding pages for the given @zone and * Returns the lru list vector holding pages for a given @node or a given
* @mem. This can be the global zone lruvec, if the memory controller * @memcg and @zone. This can be the node lruvec, if the memory controller
* is disabled. * is disabled.
*/ */
static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
struct mem_cgroup *memcg) struct zone *zone, struct mem_cgroup *memcg)
{ {
struct mem_cgroup_per_zone *mz; struct mem_cgroup_per_zone *mz;
struct lruvec *lruvec; struct lruvec *lruvec;
if (mem_cgroup_disabled()) { if (mem_cgroup_disabled()) {
lruvec = zone_lruvec(zone); lruvec = node_lruvec(pgdat);
goto out; goto out;
} }
...@@ -609,10 +610,10 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new) ...@@ -609,10 +610,10 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
{ {
} }
static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
struct mem_cgroup *memcg) struct zone *zone, struct mem_cgroup *memcg)
{ {
return zone_lruvec(zone); return node_lruvec(pgdat);
} }
static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
......
...@@ -739,9 +739,9 @@ static inline spinlock_t *zone_lru_lock(struct zone *zone) ...@@ -739,9 +739,9 @@ static inline spinlock_t *zone_lru_lock(struct zone *zone)
return &zone->zone_pgdat->lru_lock; return &zone->zone_pgdat->lru_lock;
} }
static inline struct lruvec *zone_lruvec(struct zone *zone) static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
{ {
return &zone->zone_pgdat->lruvec; return &pgdat->lruvec;
} }
static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
......
...@@ -316,7 +316,7 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, ...@@ -316,7 +316,7 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages, unsigned long nr_pages,
gfp_t gfp_mask, gfp_t gfp_mask,
bool may_swap); bool may_swap);
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap, gfp_t gfp_mask, bool noswap,
struct zone *zone, struct zone *zone,
unsigned long *nr_scanned); unsigned long *nr_scanned);
......
...@@ -1432,8 +1432,8 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, ...@@ -1432,8 +1432,8 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
} }
continue; continue;
} }
total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, total += mem_cgroup_shrink_node(victim, gfp_mask, false,
zone, &nr_scanned); zone, &nr_scanned);
*total_scanned += nr_scanned; *total_scanned += nr_scanned;
if (!soft_limit_excess(root_memcg)) if (!soft_limit_excess(root_memcg))
break; break;
......
...@@ -5911,6 +5911,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) ...@@ -5911,6 +5911,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
#endif #endif
pgdat_page_ext_init(pgdat); pgdat_page_ext_init(pgdat);
spin_lock_init(&pgdat->lru_lock); spin_lock_init(&pgdat->lru_lock);
lruvec_init(node_lruvec(pgdat));
for (j = 0; j < MAX_NR_ZONES; j++) { for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j; struct zone *zone = pgdat->node_zones + j;
...@@ -5973,7 +5974,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) ...@@ -5973,7 +5974,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
/* For bootup, initialized properly in watermark setup */ /* For bootup, initialized properly in watermark setup */
mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);
lruvec_init(zone_lruvec(zone));
if (!size) if (!size)
continue; continue;
......
...@@ -2224,12 +2224,13 @@ static inline void init_tlb_ubc(void) ...@@ -2224,12 +2224,13 @@ static inline void init_tlb_ubc(void)
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
/* /*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim. * This is a basic per-node page freer. Used by both kswapd and direct reclaim.
*/ */
static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg, static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg,
struct scan_control *sc, unsigned long *lru_pages) struct scan_control *sc, unsigned long *lru_pages)
{ {
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); struct zone *zone = &pgdat->node_zones[sc->reclaim_idx];
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg);
unsigned long nr[NR_LRU_LISTS]; unsigned long nr[NR_LRU_LISTS];
unsigned long targets[NR_LRU_LISTS]; unsigned long targets[NR_LRU_LISTS];
unsigned long nr_to_scan; unsigned long nr_to_scan;
...@@ -2362,13 +2363,14 @@ static bool in_reclaim_compaction(struct scan_control *sc) ...@@ -2362,13 +2363,14 @@ static bool in_reclaim_compaction(struct scan_control *sc)
* calls try_to_compact_zone() that it will have enough free pages to succeed. * calls try_to_compact_zone() that it will have enough free pages to succeed.
* It will give up earlier than that if there is difficulty reclaiming pages. * It will give up earlier than that if there is difficulty reclaiming pages.
*/ */
static inline bool should_continue_reclaim(struct zone *zone, static inline bool should_continue_reclaim(struct pglist_data *pgdat,
unsigned long nr_reclaimed, unsigned long nr_reclaimed,
unsigned long nr_scanned, unsigned long nr_scanned,
struct scan_control *sc) struct scan_control *sc)
{ {
unsigned long pages_for_compaction; unsigned long pages_for_compaction;
unsigned long inactive_lru_pages; unsigned long inactive_lru_pages;
int z;
/* If not in reclaim/compaction mode, stop */ /* If not in reclaim/compaction mode, stop */
if (!in_reclaim_compaction(sc)) if (!in_reclaim_compaction(sc))
...@@ -2402,21 +2404,29 @@ static inline bool should_continue_reclaim(struct zone *zone, ...@@ -2402,21 +2404,29 @@ static inline bool should_continue_reclaim(struct zone *zone,
* inactive lists are large enough, continue reclaiming * inactive lists are large enough, continue reclaiming
*/ */
pages_for_compaction = (2UL << sc->order); pages_for_compaction = (2UL << sc->order);
inactive_lru_pages = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE); inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
if (get_nr_swap_pages() > 0) if (get_nr_swap_pages() > 0)
inactive_lru_pages += node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON); inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
if (sc->nr_reclaimed < pages_for_compaction && if (sc->nr_reclaimed < pages_for_compaction &&
inactive_lru_pages > pages_for_compaction) inactive_lru_pages > pages_for_compaction)
return true; return true;
/* If compaction would go ahead or the allocation would succeed, stop */ /* If compaction would go ahead or the allocation would succeed, stop */
switch (compaction_suitable(zone, sc->order, 0, 0)) { for (z = 0; z <= sc->reclaim_idx; z++) {
case COMPACT_PARTIAL: struct zone *zone = &pgdat->node_zones[z];
case COMPACT_CONTINUE: if (!populated_zone(zone))
return false; continue;
default:
return true; switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) {
case COMPACT_PARTIAL:
case COMPACT_CONTINUE:
return false;
default:
/* check next zone */
;
}
} }
return true;
} }
static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
...@@ -2425,15 +2435,14 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, ...@@ -2425,15 +2435,14 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
struct reclaim_state *reclaim_state = current->reclaim_state; struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_reclaimed, nr_scanned; unsigned long nr_reclaimed, nr_scanned;
bool reclaimable = false; bool reclaimable = false;
struct zone *zone = &pgdat->node_zones[classzone_idx];
do { do {
struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup *root = sc->target_mem_cgroup;
struct mem_cgroup_reclaim_cookie reclaim = { struct mem_cgroup_reclaim_cookie reclaim = {
.zone = zone, .zone = &pgdat->node_zones[classzone_idx],
.priority = sc->priority, .priority = sc->priority,
}; };
unsigned long zone_lru_pages = 0; unsigned long node_lru_pages = 0;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
nr_reclaimed = sc->nr_reclaimed; nr_reclaimed = sc->nr_reclaimed;
...@@ -2454,11 +2463,11 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, ...@@ -2454,11 +2463,11 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
reclaimed = sc->nr_reclaimed; reclaimed = sc->nr_reclaimed;
scanned = sc->nr_scanned; scanned = sc->nr_scanned;
shrink_zone_memcg(zone, memcg, sc, &lru_pages); shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
zone_lru_pages += lru_pages; node_lru_pages += lru_pages;
if (!global_reclaim(sc)) if (!global_reclaim(sc))
shrink_slab(sc->gfp_mask, zone_to_nid(zone), shrink_slab(sc->gfp_mask, pgdat->node_id,
memcg, sc->nr_scanned - scanned, memcg, sc->nr_scanned - scanned,
lru_pages); lru_pages);
...@@ -2470,7 +2479,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, ...@@ -2470,7 +2479,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
/* /*
* Direct reclaim and kswapd have to scan all memory * Direct reclaim and kswapd have to scan all memory
* cgroups to fulfill the overall scan target for the * cgroups to fulfill the overall scan target for the
* zone. * node.
* *
* Limit reclaim, on the other hand, only cares about * Limit reclaim, on the other hand, only cares about
* nr_to_reclaim pages to be reclaimed and it will * nr_to_reclaim pages to be reclaimed and it will
...@@ -2489,9 +2498,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, ...@@ -2489,9 +2498,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
* the eligible LRU pages were scanned. * the eligible LRU pages were scanned.
*/ */
if (global_reclaim(sc)) if (global_reclaim(sc))
shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, shrink_slab(sc->gfp_mask, pgdat->node_id, NULL,
sc->nr_scanned - nr_scanned, sc->nr_scanned - nr_scanned,
zone_lru_pages); node_lru_pages);
if (reclaim_state) { if (reclaim_state) {
sc->nr_reclaimed += reclaim_state->reclaimed_slab; sc->nr_reclaimed += reclaim_state->reclaimed_slab;
...@@ -2506,7 +2515,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, ...@@ -2506,7 +2515,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
if (sc->nr_reclaimed - nr_reclaimed) if (sc->nr_reclaimed - nr_reclaimed)
reclaimable = true; reclaimable = true;
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc)); sc->nr_scanned - nr_scanned, sc));
return reclaimable; return reclaimable;
...@@ -2906,7 +2915,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, ...@@ -2906,7 +2915,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
gfp_t gfp_mask, bool noswap, gfp_t gfp_mask, bool noswap,
struct zone *zone, struct zone *zone,
unsigned long *nr_scanned) unsigned long *nr_scanned)
...@@ -2931,11 +2940,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, ...@@ -2931,11 +2940,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
/* /*
* NOTE: Although we can get the priority field, using it * NOTE: Although we can get the priority field, using it
* here is not a good idea, since it limits the pages we can scan. * here is not a good idea, since it limits the pages we can scan.
* if we don't reclaim here, the shrink_zone from balance_pgdat * if we don't reclaim here, the shrink_node from balance_pgdat
* will pick up pages from other mem cgroup's as well. We hack * will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero. * the priority and make it zero.
*/ */
shrink_zone_memcg(zone, memcg, &sc, &lru_pages); shrink_node_memcg(zone->zone_pgdat, memcg, &sc, &lru_pages);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
...@@ -2994,7 +3003,7 @@ static void age_active_anon(struct pglist_data *pgdat, ...@@ -2994,7 +3003,7 @@ static void age_active_anon(struct pglist_data *pgdat,
memcg = mem_cgroup_iter(NULL, NULL, NULL); memcg = mem_cgroup_iter(NULL, NULL, NULL);
do { do {
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg);
if (inactive_list_is_low(lruvec, false)) if (inactive_list_is_low(lruvec, false))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec, shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
......
...@@ -218,7 +218,7 @@ void *workingset_eviction(struct address_space *mapping, struct page *page) ...@@ -218,7 +218,7 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page);
lruvec = mem_cgroup_zone_lruvec(zone, memcg); lruvec = mem_cgroup_lruvec(zone->zone_pgdat, zone, memcg);
eviction = atomic_long_inc_return(&lruvec->inactive_age); eviction = atomic_long_inc_return(&lruvec->inactive_age);
return pack_shadow(memcgid, zone, eviction); return pack_shadow(memcgid, zone, eviction);
} }
...@@ -267,7 +267,7 @@ bool workingset_refault(void *shadow) ...@@ -267,7 +267,7 @@ bool workingset_refault(void *shadow)
rcu_read_unlock(); rcu_read_unlock();
return false; return false;
} }
lruvec = mem_cgroup_zone_lruvec(zone, memcg); lruvec = mem_cgroup_lruvec(zone->zone_pgdat, zone, memcg);
refault = atomic_long_read(&lruvec->inactive_age); refault = atomic_long_read(&lruvec->inactive_age);
active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
rcu_read_unlock(); rcu_read_unlock();
...@@ -319,7 +319,7 @@ void workingset_activation(struct page *page) ...@@ -319,7 +319,7 @@ void workingset_activation(struct page *page)
memcg = page_memcg_rcu(page); memcg = page_memcg_rcu(page);
if (!mem_cgroup_disabled() && !memcg) if (!mem_cgroup_disabled() && !memcg)
goto out; goto out;
lruvec = mem_cgroup_zone_lruvec(page_zone(page), memcg); lruvec = mem_cgroup_lruvec(page_pgdat(page), page_zone(page), memcg);
atomic_long_inc(&lruvec->inactive_age); atomic_long_inc(&lruvec->inactive_age);
out: out:
rcu_read_unlock(); rcu_read_unlock();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment