Commit 4009b2f1 authored by Yosry Ahmed's avatar Yosry Ahmed Committed by Andrew Morton

workingset: memcg: sleep when flushing stats in workingset_refault()

In workingset_refault(), we call
mem_cgroup_flush_stats_atomic_ratelimited() to read accurate stats within
an RCU read section and with sleeping disallowed.  Move the call above the
RCU read section to make it non-atomic.

Flushing is an expensive operation that scales with the number of cpus and
the number of cgroups in the system, so avoid doing it atomically where
possible.

Since workingset_refault() is the only caller of
mem_cgroup_flush_stats_atomic_ratelimited(), just make it non-atomic, and
rename it to mem_cgroup_flush_stats_ratelimited().

Link: https://lkml.kernel.org/r/20230330191801.1967435-7-yosryahmed@google.comSigned-off-by: default avatarYosry Ahmed <yosryahmed@google.com>
Acked-by: default avatarShakeel Butt <shakeelb@google.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vasily Averin <vasily.averin@linux.dev>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 9fad9aee
...@@ -1039,7 +1039,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, ...@@ -1039,7 +1039,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
void mem_cgroup_flush_stats(void); void mem_cgroup_flush_stats(void);
void mem_cgroup_flush_stats_atomic(void); void mem_cgroup_flush_stats_atomic(void);
void mem_cgroup_flush_stats_atomic_ratelimited(void); void mem_cgroup_flush_stats_ratelimited(void);
void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val); int val);
...@@ -1541,7 +1541,7 @@ static inline void mem_cgroup_flush_stats_atomic(void) ...@@ -1541,7 +1541,7 @@ static inline void mem_cgroup_flush_stats_atomic(void)
{ {
} }
static inline void mem_cgroup_flush_stats_atomic_ratelimited(void) static inline void mem_cgroup_flush_stats_ratelimited(void)
{ {
} }
......
...@@ -674,10 +674,10 @@ void mem_cgroup_flush_stats_atomic(void) ...@@ -674,10 +674,10 @@ void mem_cgroup_flush_stats_atomic(void)
do_flush_stats(true); do_flush_stats(true);
} }
void mem_cgroup_flush_stats_atomic_ratelimited(void) void mem_cgroup_flush_stats_ratelimited(void)
{ {
if (time_after64(jiffies_64, READ_ONCE(flush_next_time))) if (time_after64(jiffies_64, READ_ONCE(flush_next_time)))
mem_cgroup_flush_stats_atomic(); mem_cgroup_flush_stats();
} }
static void flush_memcg_stats_dwork(struct work_struct *w) static void flush_memcg_stats_dwork(struct work_struct *w)
......
...@@ -406,6 +406,9 @@ void workingset_refault(struct folio *folio, void *shadow) ...@@ -406,6 +406,9 @@ void workingset_refault(struct folio *folio, void *shadow)
unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
eviction <<= bucket_order; eviction <<= bucket_order;
/* Flush stats (and potentially sleep) before holding RCU read lock */
mem_cgroup_flush_stats_ratelimited();
rcu_read_lock(); rcu_read_lock();
/* /*
* Look up the memcg associated with the stored ID. It might * Look up the memcg associated with the stored ID. It might
...@@ -461,8 +464,6 @@ void workingset_refault(struct folio *folio, void *shadow) ...@@ -461,8 +464,6 @@ void workingset_refault(struct folio *folio, void *shadow)
lruvec = mem_cgroup_lruvec(memcg, pgdat); lruvec = mem_cgroup_lruvec(memcg, pgdat);
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
mem_cgroup_flush_stats_atomic_ratelimited();
/* /*
* Compare the distance to the existing workingset size. We * Compare the distance to the existing workingset size. We
* don't activate pages that couldn't stay resident even if * don't activate pages that couldn't stay resident even if
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment