Commit bb3ab596 authored by KOSAKI Motohiro's avatar KOSAKI Motohiro Committed by Linus Torvalds

vmscan: stop kswapd waiting on congestion when the min watermark is not being met

If reclaim fails to make sufficient progress, the priority is raised.
Once the priority is higher, kswapd starts waiting on congestion.
However, if the zone is below the min watermark then kswapd needs to
continue working without delay as there is a danger of an increased rate
of GFP_ATOMIC allocation failure.

This patch changes the conditions under which kswapd waits on congestion
by only going to sleep if the min watermarks are being met.

[mel@csn.ul.ie: add stats to track how relevant the logic is]
[mel@csn.ul.ie: make kswapd only check its own zones and rename the relevant counters]
Signed-off-by: default avatarKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: default avatarMel Gorman <mel@csn.ul.ie>
Reviewed-by: default avatarRik van Riel <riel@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f50de2d3
...@@ -40,7 +40,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, ...@@ -40,7 +40,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGSCAN_ZONE_RECLAIM_FAILED, PGSCAN_ZONE_RECLAIM_FAILED,
#endif #endif
PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL, PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
KSWAPD_PREMATURE_FAST, KSWAPD_PREMATURE_SLOW, KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
KSWAPD_SKIP_CONGESTION_WAIT,
PAGEOUTRUN, ALLOCSTALL, PGROTATED, PAGEOUTRUN, ALLOCSTALL, PGROTATED,
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
......
...@@ -1905,19 +1905,25 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, ...@@ -1905,19 +1905,25 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
#endif #endif
/* is kswapd sleeping prematurely? */ /* is kswapd sleeping prematurely? */
static int sleeping_prematurely(int order, long remaining) static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
{ {
struct zone *zone; int i;
/* If a direct reclaimer woke kswapd within HZ/10, it's premature */ /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
if (remaining) if (remaining)
return 1; return 1;
/* If after HZ/10, a zone is below the high mark, it's premature */ /* If after HZ/10, a zone is below the high mark, it's premature */
for_each_populated_zone(zone) for (i = 0; i < pgdat->nr_zones; i++) {
struct zone *zone = pgdat->node_zones + i;
if (!populated_zone(zone))
continue;
if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
0, 0)) 0, 0))
return 1; return 1;
}
return 0; return 0;
} }
...@@ -1979,6 +1985,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) ...@@ -1979,6 +1985,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
for (priority = DEF_PRIORITY; priority >= 0; priority--) { for (priority = DEF_PRIORITY; priority >= 0; priority--) {
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long lru_pages = 0; unsigned long lru_pages = 0;
int has_under_min_watermark_zone = 0;
/* The swap token gets in the way of swapout... */ /* The swap token gets in the way of swapout... */
if (!priority) if (!priority)
...@@ -2085,6 +2092,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) ...@@ -2085,6 +2092,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
if (total_scanned > SWAP_CLUSTER_MAX * 2 && if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
sc.may_writepage = 1; sc.may_writepage = 1;
/*
* We are still under min water mark. it mean we have
* GFP_ATOMIC allocation failure risk. Hurry up!
*/
if (!zone_watermark_ok(zone, order, min_wmark_pages(zone),
end_zone, 0))
has_under_min_watermark_zone = 1;
} }
if (all_zones_ok) if (all_zones_ok)
break; /* kswapd: all done */ break; /* kswapd: all done */
...@@ -2092,8 +2108,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) ...@@ -2092,8 +2108,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
* OK, kswapd is getting into trouble. Take a nap, then take * OK, kswapd is getting into trouble. Take a nap, then take
* another pass across the zones. * another pass across the zones.
*/ */
if (total_scanned && priority < DEF_PRIORITY - 2) if (total_scanned && (priority < DEF_PRIORITY - 2)) {
congestion_wait(BLK_RW_ASYNC, HZ/10); if (has_under_min_watermark_zone)
count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
else
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
/* /*
* We do this so kswapd doesn't build up large priorities for * We do this so kswapd doesn't build up large priorities for
...@@ -2207,7 +2227,7 @@ static int kswapd(void *p) ...@@ -2207,7 +2227,7 @@ static int kswapd(void *p)
long remaining = 0; long remaining = 0;
/* Try to sleep for a short interval */ /* Try to sleep for a short interval */
if (!sleeping_prematurely(order, remaining)) { if (!sleeping_prematurely(pgdat, order, remaining)) {
remaining = schedule_timeout(HZ/10); remaining = schedule_timeout(HZ/10);
finish_wait(&pgdat->kswapd_wait, &wait); finish_wait(&pgdat->kswapd_wait, &wait);
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
...@@ -2218,13 +2238,13 @@ static int kswapd(void *p) ...@@ -2218,13 +2238,13 @@ static int kswapd(void *p)
* premature sleep. If not, then go fully * premature sleep. If not, then go fully
* to sleep until explicitly woken up * to sleep until explicitly woken up
*/ */
if (!sleeping_prematurely(order, remaining)) if (!sleeping_prematurely(pgdat, order, remaining))
schedule(); schedule();
else { else {
if (remaining) if (remaining)
count_vm_event(KSWAPD_PREMATURE_FAST); count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
else else
count_vm_event(KSWAPD_PREMATURE_SLOW); count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
} }
} }
......
...@@ -683,8 +683,9 @@ static const char * const vmstat_text[] = { ...@@ -683,8 +683,9 @@ static const char * const vmstat_text[] = {
"slabs_scanned", "slabs_scanned",
"kswapd_steal", "kswapd_steal",
"kswapd_inodesteal", "kswapd_inodesteal",
"kswapd_slept_prematurely_fast", "kswapd_low_wmark_hit_quickly",
"kswapd_slept_prematurely_slow", "kswapd_high_wmark_hit_quickly",
"kswapd_skip_congestion_wait",
"pageoutrun", "pageoutrun",
"allocstall", "allocstall",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment