Commit d2af3397 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

mm: vmscan: replace shrink_node() loop with a retry jump

Most of the function body is inside a loop, which imposes an additional
indentation and scoping level that makes the code a bit hard to follow and
modify.

The looping only happens in case of reclaim-compaction, which isn't the
common case.  So rather than adding yet another function level to the
reclaim path and have every reclaim invocation go through a level that
only exists for one specific cornercase, use a retry goto.

Link: http://lkml.kernel.org/r/20191022144803.302233-6-hannes@cmpxchg.orgSigned-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Reviewed-by: default avatarRoman Gushchin <guro@fb.com>
Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b5ead35e
...@@ -2729,144 +2729,143 @@ static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg) ...@@ -2729,144 +2729,143 @@ static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
{ {
struct reclaim_state *reclaim_state = current->reclaim_state; struct reclaim_state *reclaim_state = current->reclaim_state;
struct mem_cgroup *root = sc->target_mem_cgroup;
unsigned long nr_reclaimed, nr_scanned; unsigned long nr_reclaimed, nr_scanned;
bool reclaimable = false; bool reclaimable = false;
struct mem_cgroup *memcg;
again:
memset(&sc->nr, 0, sizeof(sc->nr));
do { nr_reclaimed = sc->nr_reclaimed;
struct mem_cgroup *root = sc->target_mem_cgroup; nr_scanned = sc->nr_scanned;
struct mem_cgroup *memcg;
memset(&sc->nr, 0, sizeof(sc->nr));
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
memcg = mem_cgroup_iter(root, NULL, NULL); memcg = mem_cgroup_iter(root, NULL, NULL);
do { do {
unsigned long reclaimed; unsigned long reclaimed;
unsigned long scanned; unsigned long scanned;
switch (mem_cgroup_protected(root, memcg)) { switch (mem_cgroup_protected(root, memcg)) {
case MEMCG_PROT_MIN: case MEMCG_PROT_MIN:
/* /*
* Hard protection. * Hard protection.
* If there is no reclaimable memory, OOM. * If there is no reclaimable memory, OOM.
*/ */
continue;
case MEMCG_PROT_LOW:
/*
* Soft protection.
* Respect the protection only as long as
* there is an unprotected supply
* of reclaimable memory from other cgroups.
*/
if (!sc->memcg_low_reclaim) {
sc->memcg_low_skipped = 1;
continue; continue;
case MEMCG_PROT_LOW:
/*
* Soft protection.
* Respect the protection only as long as
* there is an unprotected supply
* of reclaimable memory from other cgroups.
*/
if (!sc->memcg_low_reclaim) {
sc->memcg_low_skipped = 1;
continue;
}
memcg_memory_event(memcg, MEMCG_LOW);
break;
case MEMCG_PROT_NONE:
/*
* All protection thresholds breached. We may
* still choose to vary the scan pressure
* applied based on by how much the cgroup in
* question has exceeded its protection
* thresholds (see get_scan_count).
*/
break;
} }
memcg_memory_event(memcg, MEMCG_LOW);
break;
case MEMCG_PROT_NONE:
/*
* All protection thresholds breached. We may
* still choose to vary the scan pressure
* applied based on by how much the cgroup in
* question has exceeded its protection
* thresholds (see get_scan_count).
*/
break;
}
reclaimed = sc->nr_reclaimed; reclaimed = sc->nr_reclaimed;
scanned = sc->nr_scanned; scanned = sc->nr_scanned;
shrink_node_memcg(pgdat, memcg, sc); shrink_node_memcg(pgdat, memcg, sc);
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
sc->priority);
/* Record the group's reclaim efficiency */
vmpressure(sc->gfp_mask, memcg, false,
sc->nr_scanned - scanned,
sc->nr_reclaimed - reclaimed);
} while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
if (reclaim_state) { shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
sc->nr_reclaimed += reclaim_state->reclaimed_slab; sc->priority);
reclaim_state->reclaimed_slab = 0;
}
/* Record the subtree's reclaim efficiency */ /* Record the group's reclaim efficiency */
vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, vmpressure(sc->gfp_mask, memcg, false,
sc->nr_scanned - nr_scanned, sc->nr_scanned - scanned,
sc->nr_reclaimed - nr_reclaimed); sc->nr_reclaimed - reclaimed);
if (sc->nr_reclaimed - nr_reclaimed) } while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
reclaimable = true;
if (current_is_kswapd()) { if (reclaim_state) {
/* sc->nr_reclaimed += reclaim_state->reclaimed_slab;
* If reclaim is isolating dirty pages under writeback, reclaim_state->reclaimed_slab = 0;
* it implies that the long-lived page allocation rate }
* is exceeding the page laundering rate. Either the
* global limits are not being effective at throttling
* processes due to the page distribution throughout
* zones or there is heavy usage of a slow backing
* device. The only option is to throttle from reclaim
* context which is not ideal as there is no guarantee
* the dirtying process is throttled in the same way
* balance_dirty_pages() manages.
*
* Once a node is flagged PGDAT_WRITEBACK, kswapd will
* count the number of pages under pages flagged for
* immediate reclaim and stall if any are encountered
* in the nr_immediate check below.
*/
if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/* /* Record the subtree's reclaim efficiency */
* Tag a node as congested if all the dirty pages vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
* scanned were backed by a congested BDI and sc->nr_scanned - nr_scanned,
* wait_iff_congested will stall. sc->nr_reclaimed - nr_reclaimed);
*/
if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
set_bit(PGDAT_CONGESTED, &pgdat->flags);
/* Allow kswapd to start writing pages during reclaim.*/ if (sc->nr_reclaimed - nr_reclaimed)
if (sc->nr.unqueued_dirty == sc->nr.file_taken) reclaimable = true;
set_bit(PGDAT_DIRTY, &pgdat->flags);
/* if (current_is_kswapd()) {
* If kswapd scans pages marked marked for immediate /*
* reclaim and under writeback (nr_immediate), it * If reclaim is isolating dirty pages under writeback,
* implies that pages are cycling through the LRU * it implies that the long-lived page allocation rate
* faster than they are written so also forcibly stall. * is exceeding the page laundering rate. Either the
*/ * global limits are not being effective at throttling
if (sc->nr.immediate) * processes due to the page distribution throughout
congestion_wait(BLK_RW_ASYNC, HZ/10); * zones or there is heavy usage of a slow backing
} * device. The only option is to throttle from reclaim
* context which is not ideal as there is no guarantee
* the dirtying process is throttled in the same way
* balance_dirty_pages() manages.
*
* Once a node is flagged PGDAT_WRITEBACK, kswapd will
* count the number of pages under pages flagged for
* immediate reclaim and stall if any are encountered
* in the nr_immediate check below.
*/
if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/* /*
* Legacy memcg will stall in page writeback so avoid forcibly * Tag a node as congested if all the dirty pages
* stalling in wait_iff_congested(). * scanned were backed by a congested BDI and
* wait_iff_congested will stall.
*/ */
if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) && if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
sc->nr.dirty && sc->nr.dirty == sc->nr.congested) set_bit(PGDAT_CONGESTED, &pgdat->flags);
set_memcg_congestion(pgdat, root, true);
/* Allow kswapd to start writing pages during reclaim.*/
if (sc->nr.unqueued_dirty == sc->nr.file_taken)
set_bit(PGDAT_DIRTY, &pgdat->flags);
/* /*
* Stall direct reclaim for IO completions if underlying BDIs * If kswapd scans pages marked marked for immediate
* and node is congested. Allow kswapd to continue until it * reclaim and under writeback (nr_immediate), it
* starts encountering unqueued dirty pages or cycling through * implies that pages are cycling through the LRU
* the LRU too quickly. * faster than they are written so also forcibly stall.
*/ */
if (!sc->hibernation_mode && !current_is_kswapd() && if (sc->nr.immediate)
current_may_throttle() && pgdat_memcg_congested(pgdat, root)) congestion_wait(BLK_RW_ASYNC, HZ/10);
wait_iff_congested(BLK_RW_ASYNC, HZ/10); }
/*
* Legacy memcg will stall in page writeback so avoid forcibly
* stalling in wait_iff_congested().
*/
if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
set_memcg_congestion(pgdat, root, true);
/*
* Stall direct reclaim for IO completions if underlying BDIs
* and node is congested. Allow kswapd to continue until it
* starts encountering unqueued dirty pages or cycling through
* the LRU too quickly.
*/
if (!sc->hibernation_mode && !current_is_kswapd() &&
current_may_throttle() && pgdat_memcg_congested(pgdat, root))
wait_iff_congested(BLK_RW_ASYNC, HZ/10);
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc)); sc))
goto again;
/* /*
* Kswapd gives up on balancing particular nodes after too * Kswapd gives up on balancing particular nodes after too
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment