Commit e26733e0 authored by Chris Down's avatar Chris Down Committed by Linus Torvalds

mm, memcg: throttle allocators based on ancestral memory.high

Prior to this commit, we only directly check the affected cgroup's
memory.high against its usage.  However, it's possible that we are being
reclaimed as a result of hitting an ancestor memory.high and should be
penalised based on that, instead.

This patch changes memory.high overage throttling to use the largest
overage in its ancestors when considering how many penalty jiffies to
charge.  This makes sure that we penalise poorly behaving cgroups in the
same way regardless of at what level of the hierarchy memory.high was
breached.

Fixes: 0e4b01df ("mm, memcg: throttle allocators when failing reclaim over memory.high")
Reported-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarChris Down <chris@chrisdown.name>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Nathan Chancellor <natechancellor@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: <stable@vger.kernel.org>	[5.4.x+]
Link: http://lkml.kernel.org/r/8cd132f84bd7e16cdb8fde3378cdbf05ba00d387.1584036142.git.chris@chrisdown.nameSigned-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d397a45f
...@@ -2297,28 +2297,41 @@ static void high_work_func(struct work_struct *work) ...@@ -2297,28 +2297,41 @@ static void high_work_func(struct work_struct *work)
#define MEMCG_DELAY_SCALING_SHIFT 14 #define MEMCG_DELAY_SCALING_SHIFT 14
/* /*
* Scheduled by try_charge() to be executed from the userland return path * Get the number of jiffies that we should penalise a mischievous cgroup which
* and reclaims memory over the high limit. * is exceeding its memory.high by checking both it and its ancestors.
*/ */
void mem_cgroup_handle_over_high(void) static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
unsigned int nr_pages)
{ {
unsigned long usage, high, clamped_high; unsigned long penalty_jiffies;
unsigned long pflags; u64 max_overage = 0;
unsigned long penalty_jiffies, overage;
unsigned int nr_pages = current->memcg_nr_pages_over_high;
struct mem_cgroup *memcg;
if (likely(!nr_pages)) do {
return; unsigned long usage, high;
u64 overage;
memcg = get_mem_cgroup_from_mm(current->mm); usage = page_counter_read(&memcg->memory);
reclaim_high(memcg, nr_pages, GFP_KERNEL); high = READ_ONCE(memcg->high);
current->memcg_nr_pages_over_high = 0;
/*
* Prevent division by 0 in overage calculation by acting as if
* it was a threshold of 1 page
*/
high = max(high, 1UL);
overage = usage - high;
overage <<= MEMCG_DELAY_PRECISION_SHIFT;
overage = div64_u64(overage, high);
if (overage > max_overage)
max_overage = overage;
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
if (!max_overage)
return 0;
/* /*
* memory.high is breached and reclaim is unable to keep up. Throttle
* allocators proactively to slow down excessive growth.
*
* We use overage compared to memory.high to calculate the number of * We use overage compared to memory.high to calculate the number of
* jiffies to sleep (penalty_jiffies). Ideally this value should be * jiffies to sleep (penalty_jiffies). Ideally this value should be
* fairly lenient on small overages, and increasingly harsh when the * fairly lenient on small overages, and increasingly harsh when the
...@@ -2326,24 +2339,9 @@ void mem_cgroup_handle_over_high(void) ...@@ -2326,24 +2339,9 @@ void mem_cgroup_handle_over_high(void)
* its crazy behaviour, so we exponentially increase the delay based on * its crazy behaviour, so we exponentially increase the delay based on
* overage amount. * overage amount.
*/ */
penalty_jiffies = max_overage * max_overage * HZ;
usage = page_counter_read(&memcg->memory); penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
high = READ_ONCE(memcg->high); penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;
if (usage <= high)
goto out;
/*
* Prevent division by 0 in overage calculation by acting as if it was a
* threshold of 1 page
*/
clamped_high = max(high, 1UL);
overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
clamped_high);
penalty_jiffies = ((u64)overage * overage * HZ)
>> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
/* /*
* Factor in the task's own contribution to the overage, such that four * Factor in the task's own contribution to the overage, such that four
...@@ -2360,7 +2358,32 @@ void mem_cgroup_handle_over_high(void) ...@@ -2360,7 +2358,32 @@ void mem_cgroup_handle_over_high(void)
* application moving forwards and also permit diagnostics, albeit * application moving forwards and also permit diagnostics, albeit
* extremely slowly. * extremely slowly.
*/ */
penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES); return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
}
/*
* Scheduled by try_charge() to be executed from the userland return path
* and reclaims memory over the high limit.
*/
void mem_cgroup_handle_over_high(void)
{
unsigned long penalty_jiffies;
unsigned long pflags;
unsigned int nr_pages = current->memcg_nr_pages_over_high;
struct mem_cgroup *memcg;
if (likely(!nr_pages))
return;
memcg = get_mem_cgroup_from_mm(current->mm);
reclaim_high(memcg, nr_pages, GFP_KERNEL);
current->memcg_nr_pages_over_high = 0;
/*
* memory.high is breached and reclaim is unable to keep up. Throttle
* allocators proactively to slow down excessive growth.
*/
penalty_jiffies = calculate_high_delay(memcg, nr_pages);
/* /*
* Don't sleep if the amount of jiffies this memcg owes us is so low * Don't sleep if the amount of jiffies this memcg owes us is so low
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment