Commit ec0db74b authored by Kinsey Ho's avatar Kinsey Ho Committed by Andrew Morton

mm: restart if multiple traversals raced

Currently, if multiple reclaimers raced on the same position, the
reclaimers which detect the race will still reclaim from the same memcg. 
Instead, the reclaimers which detect the race should move on to the next
memcg in the hierarchy.

So, in the case where multiple traversals race, jump back to the start of
the mem_cgroup_iter() function to find the next memcg in the hierarchy to
reclaim from.

Link: https://lkml.kernel.org/r/20240905003058.1859929-5-kinseyho@google.com
Reported-by: syzbot+e099d407346c45275ce9@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/000000000000817cf10620e20d33@google.com/Signed-off-by: default avatarKinsey Ho <kinseyho@google.com>
Reviewed-by: default avatarT.J. Mercier <tjmercier@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 3d150e31
...@@ -57,7 +57,7 @@ enum memcg_memory_event { ...@@ -57,7 +57,7 @@ enum memcg_memory_event {
struct mem_cgroup_reclaim_cookie { struct mem_cgroup_reclaim_cookie {
pg_data_t *pgdat; pg_data_t *pgdat;
unsigned int generation; int generation;
}; };
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
...@@ -78,7 +78,7 @@ struct lruvec_stats; ...@@ -78,7 +78,7 @@ struct lruvec_stats;
struct mem_cgroup_reclaim_iter { struct mem_cgroup_reclaim_iter {
struct mem_cgroup *position; struct mem_cgroup *position;
/* scan generation, increased every round-trip */ /* scan generation, increased every round-trip */
unsigned int generation; atomic_t generation;
}; };
/* /*
......
...@@ -986,8 +986,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, ...@@ -986,8 +986,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup_reclaim_cookie *reclaim) struct mem_cgroup_reclaim_cookie *reclaim)
{ {
struct mem_cgroup_reclaim_iter *iter; struct mem_cgroup_reclaim_iter *iter;
struct cgroup_subsys_state *css = NULL; struct cgroup_subsys_state *css;
struct mem_cgroup *memcg = NULL; struct mem_cgroup *memcg;
struct mem_cgroup *pos = NULL; struct mem_cgroup *pos = NULL;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
...@@ -998,19 +998,23 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, ...@@ -998,19 +998,23 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
rcu_read_lock(); rcu_read_lock();
restart: restart:
memcg = NULL;
if (reclaim) { if (reclaim) {
int gen;
struct mem_cgroup_per_node *mz; struct mem_cgroup_per_node *mz;
mz = root->nodeinfo[reclaim->pgdat->node_id]; mz = root->nodeinfo[reclaim->pgdat->node_id];
iter = &mz->iter; iter = &mz->iter;
gen = atomic_read(&iter->generation);
/* /*
* On start, join the current reclaim iteration cycle. * On start, join the current reclaim iteration cycle.
* Exit when a concurrent walker completes it. * Exit when a concurrent walker completes it.
*/ */
if (!prev) if (!prev)
reclaim->generation = iter->generation; reclaim->generation = gen;
else if (reclaim->generation != iter->generation) else if (reclaim->generation != gen)
goto out_unlock; goto out_unlock;
pos = READ_ONCE(iter->position); pos = READ_ONCE(iter->position);
...@@ -1018,8 +1022,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, ...@@ -1018,8 +1022,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
pos = prev; pos = prev;
} }
if (pos) css = pos ? &pos->css : NULL;
css = &pos->css;
for (;;) { for (;;) {
css = css_next_descendant_pre(css, &root->css); css = css_next_descendant_pre(css, &root->css);
...@@ -1033,21 +1036,26 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, ...@@ -1033,21 +1036,26 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
* and kicking, and don't take an extra reference. * and kicking, and don't take an extra reference.
*/ */
if (css == &root->css || css_tryget(css)) { if (css == &root->css || css_tryget(css)) {
memcg = mem_cgroup_from_css(css);
break; break;
} }
} }
memcg = mem_cgroup_from_css(css);
if (reclaim) { if (reclaim) {
/* /*
* The position could have already been updated by a competing * The position could have already been updated by a competing
* thread, so check that the value hasn't changed since we read * thread, so check that the value hasn't changed since we read
* it to avoid reclaiming from the same cgroup twice. * it to avoid reclaiming from the same cgroup twice.
*/ */
(void)cmpxchg(&iter->position, pos, memcg); if (cmpxchg(&iter->position, pos, memcg) != pos) {
if (css && css != &root->css)
css_put(css);
goto restart;
}
if (!memcg) { if (!memcg) {
iter->generation++; atomic_inc(&iter->generation);
/* /*
* Reclaimers share the hierarchy walk, and a * Reclaimers share the hierarchy walk, and a
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment