Commit c68ed794 authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

mm/vmstat: protect per cpu variables with preempt disable on RT

Disable preemption on -RT for the vmstat code.  On vanila the code runs in
IRQ-off regions while on -RT it may not when stats are updated under a
local_lock.  "preempt_disable" ensures that the same resources is not
updated in parallel due to preemption.

This patch differs from the preempt-rt version where __count_vm_event and
__count_vm_events are also protected.  The counters are explicitly
"allowed to be to be racy" so there is no need to protect them from
preemption.  Only the accurate page stats that are updated by a
read-modify-write need protection.  This patch also differs in that a
preempt_[en|dis]able_rt helper is not used.  As vmstat is the only user of
the helper, it was suggested that it be open-coded in vmstat.c instead of
risking the helper being used in unnecessary contexts.

Link: https://lkml.kernel.org/r/20210805160019.1137-2-mgorman@techsingularity.netSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 2d338201
...@@ -319,6 +319,16 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, ...@@ -319,6 +319,16 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
long x; long x;
long t; long t;
/*
* Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels,
* atomicity is provided by IRQs being disabled -- either explicitly
* or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables
* CPU migrations and preemption potentially corrupts a counter so
* disable preemption.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
x = delta + __this_cpu_read(*p); x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold); t = __this_cpu_read(pcp->stat_threshold);
...@@ -328,6 +338,9 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, ...@@ -328,6 +338,9 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
x = 0; x = 0;
} }
__this_cpu_write(*p, x); __this_cpu_write(*p, x);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
} }
EXPORT_SYMBOL(__mod_zone_page_state); EXPORT_SYMBOL(__mod_zone_page_state);
...@@ -350,6 +363,10 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, ...@@ -350,6 +363,10 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
delta >>= PAGE_SHIFT; delta >>= PAGE_SHIFT;
} }
/* See __mod_node_page_state */
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
x = delta + __this_cpu_read(*p); x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold); t = __this_cpu_read(pcp->stat_threshold);
...@@ -359,6 +376,9 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, ...@@ -359,6 +376,9 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
x = 0; x = 0;
} }
__this_cpu_write(*p, x); __this_cpu_write(*p, x);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
} }
EXPORT_SYMBOL(__mod_node_page_state); EXPORT_SYMBOL(__mod_node_page_state);
...@@ -391,6 +411,10 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) ...@@ -391,6 +411,10 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
s8 __percpu *p = pcp->vm_stat_diff + item; s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t; s8 v, t;
/* See __mod_node_page_state */
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
v = __this_cpu_inc_return(*p); v = __this_cpu_inc_return(*p);
t = __this_cpu_read(pcp->stat_threshold); t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v > t)) { if (unlikely(v > t)) {
...@@ -399,6 +423,9 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) ...@@ -399,6 +423,9 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
zone_page_state_add(v + overstep, zone, item); zone_page_state_add(v + overstep, zone, item);
__this_cpu_write(*p, -overstep); __this_cpu_write(*p, -overstep);
} }
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
} }
void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
...@@ -409,6 +436,10 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) ...@@ -409,6 +436,10 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
/* See __mod_node_page_state */
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
v = __this_cpu_inc_return(*p); v = __this_cpu_inc_return(*p);
t = __this_cpu_read(pcp->stat_threshold); t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v > t)) { if (unlikely(v > t)) {
...@@ -417,6 +448,9 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) ...@@ -417,6 +448,9 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
node_page_state_add(v + overstep, pgdat, item); node_page_state_add(v + overstep, pgdat, item);
__this_cpu_write(*p, -overstep); __this_cpu_write(*p, -overstep);
} }
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
} }
void __inc_zone_page_state(struct page *page, enum zone_stat_item item) void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
...@@ -437,6 +471,10 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) ...@@ -437,6 +471,10 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
s8 __percpu *p = pcp->vm_stat_diff + item; s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t; s8 v, t;
/* See __mod_node_page_state */
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
v = __this_cpu_dec_return(*p); v = __this_cpu_dec_return(*p);
t = __this_cpu_read(pcp->stat_threshold); t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v < - t)) { if (unlikely(v < - t)) {
...@@ -445,6 +483,9 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) ...@@ -445,6 +483,9 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
zone_page_state_add(v - overstep, zone, item); zone_page_state_add(v - overstep, zone, item);
__this_cpu_write(*p, overstep); __this_cpu_write(*p, overstep);
} }
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
} }
void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
...@@ -455,6 +496,10 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) ...@@ -455,6 +496,10 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
/* See __mod_node_page_state */
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
v = __this_cpu_dec_return(*p); v = __this_cpu_dec_return(*p);
t = __this_cpu_read(pcp->stat_threshold); t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v < - t)) { if (unlikely(v < - t)) {
...@@ -463,6 +508,9 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) ...@@ -463,6 +508,9 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
node_page_state_add(v - overstep, pgdat, item); node_page_state_add(v - overstep, pgdat, item);
__this_cpu_write(*p, overstep); __this_cpu_write(*p, overstep);
} }
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
} }
void __dec_zone_page_state(struct page *page, enum zone_stat_item item) void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment