Commit f56ce412 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

mm: memcontrol: fix occasional OOMs due to proportional memory.low reclaim

We've noticed occasional OOM killing when memory.low settings are in
effect for cgroups.  This is unexpected and undesirable as memory.low is
supposed to express non-OOMing memory priorities between cgroups.

The reason for this is proportional memory.low reclaim.  When cgroups
are below their memory.low threshold, reclaim passes them over in the
first round, and then retries if it couldn't find pages anywhere else.
But when cgroups are slightly above their memory.low setting, page scan
force is scaled down and diminished in proportion to the overage, to the
point where it can cause reclaim to fail as well - only in that case we
currently don't retry, and instead trigger OOM.

To fix this, hook proportional reclaim into the same retry logic we have
in place for when cgroups are skipped entirely.  This way if reclaim
fails and some cgroups were scanned with diminished pressure, we'll try
another full-force cycle before giving up and OOMing.

[akpm@linux-foundation.org: coding-style fixes]

Link: https://lkml.kernel.org/r/20210817180506.220056-1-hannes@cmpxchg.org
Fixes: 9783aa99 ("mm, memcg: proportional memory.{low,min} reclaim")
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Reported-by: default avatarLeon Yang <lnyng@fb.com>
Reviewed-by: default avatarRik van Riel <riel@surriel.com>
Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
Acked-by: default avatarRoman Gushchin <guro@fb.com>
Acked-by: default avatarChris Down <chris@chrisdown.name>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>		[5.4+]
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 91ed3ed0
...@@ -612,12 +612,15 @@ static inline bool mem_cgroup_disabled(void) ...@@ -612,12 +612,15 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys); return !cgroup_subsys_enabled(memory_cgrp_subsys);
} }
static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, static inline void mem_cgroup_protection(struct mem_cgroup *root,
struct mem_cgroup *memcg, struct mem_cgroup *memcg,
bool in_low_reclaim) unsigned long *min,
unsigned long *low)
{ {
*min = *low = 0;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
return 0; return;
/* /*
* There is no reclaim protection applied to a targeted reclaim. * There is no reclaim protection applied to a targeted reclaim.
...@@ -653,13 +656,10 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, ...@@ -653,13 +656,10 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
* *
*/ */
if (root == memcg) if (root == memcg)
return 0; return;
if (in_low_reclaim)
return READ_ONCE(memcg->memory.emin);
return max(READ_ONCE(memcg->memory.emin), *min = READ_ONCE(memcg->memory.emin);
READ_ONCE(memcg->memory.elow)); *low = READ_ONCE(memcg->memory.elow);
} }
void mem_cgroup_calculate_protection(struct mem_cgroup *root, void mem_cgroup_calculate_protection(struct mem_cgroup *root,
...@@ -1147,11 +1147,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, ...@@ -1147,11 +1147,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
{ {
} }
static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, static inline void mem_cgroup_protection(struct mem_cgroup *root,
struct mem_cgroup *memcg, struct mem_cgroup *memcg,
bool in_low_reclaim) unsigned long *min,
unsigned long *low)
{ {
return 0; *min = *low = 0;
} }
static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
......
...@@ -100,9 +100,12 @@ struct scan_control { ...@@ -100,9 +100,12 @@ struct scan_control {
unsigned int may_swap:1; unsigned int may_swap:1;
/* /*
* Cgroups are not reclaimed below their configured memory.low, * Cgroup memory below memory.low is protected as long as we
* unless we threaten to OOM. If any cgroups are skipped due to * don't threaten to OOM. If any cgroup is reclaimed at
* memory.low and nothing was reclaimed, go back for memory.low. * reduced force or passed over entirely due to its memory.low
* setting (memcg_low_skipped), and nothing is reclaimed as a
* result, then go back for one more cycle that reclaims the protected
* memory (memcg_low_reclaim) to avert OOM.
*/ */
unsigned int memcg_low_reclaim:1; unsigned int memcg_low_reclaim:1;
unsigned int memcg_low_skipped:1; unsigned int memcg_low_skipped:1;
...@@ -2537,15 +2540,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, ...@@ -2537,15 +2540,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
for_each_evictable_lru(lru) { for_each_evictable_lru(lru) {
int file = is_file_lru(lru); int file = is_file_lru(lru);
unsigned long lruvec_size; unsigned long lruvec_size;
unsigned long low, min;
unsigned long scan; unsigned long scan;
unsigned long protection;
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
protection = mem_cgroup_protection(sc->target_mem_cgroup, mem_cgroup_protection(sc->target_mem_cgroup, memcg,
memcg, &min, &low);
sc->memcg_low_reclaim);
if (protection) { if (min || low) {
/* /*
* Scale a cgroup's reclaim pressure by proportioning * Scale a cgroup's reclaim pressure by proportioning
* its current usage to its memory.low or memory.min * its current usage to its memory.low or memory.min
...@@ -2576,6 +2578,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, ...@@ -2576,6 +2578,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* hard protection. * hard protection.
*/ */
unsigned long cgroup_size = mem_cgroup_size(memcg); unsigned long cgroup_size = mem_cgroup_size(memcg);
unsigned long protection;
/* memory.low scaling, make sure we retry before OOM */
if (!sc->memcg_low_reclaim && low > min) {
protection = low;
sc->memcg_low_skipped = 1;
} else {
protection = min;
}
/* Avoid TOCTOU with earlier protection check */ /* Avoid TOCTOU with earlier protection check */
cgroup_size = max(cgroup_size, protection); cgroup_size = max(cgroup_size, protection);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment