Commit b4a0215e authored by Kefeng Wang's avatar Kefeng Wang Committed by Andrew Morton

mm: fix null-ptr-deref in kswapd_is_running()

kswapd_run/stop() will set pgdat->kswapd to NULL, which could race with
kswapd_is_running() in kcompactd(),

kswapd_run/stop()                       kcompactd()
                                          kswapd_is_running()
  pgdat->kswapd // error or nomal ptr
                                          verify pgdat->kswapd
                                            // load non-NULL
pgdat->kswapd
  pgdat->kswapd = NULL
                                          task_is_running(pgdat->kswapd)
                                            // Null pointer derefence

KASAN reports the null-ptr-deref shown below,

  vmscan: Failed to start kswapd on node 0
  ...
  BUG: KASAN: null-ptr-deref in kcompactd+0x440/0x504
  Read of size 8 at addr 0000000000000024 by task kcompactd0/37

  CPU: 0 PID: 37 Comm: kcompactd0 Kdump: loaded Tainted: G           OE     5.10.60 #1
  Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
  Call trace:
   dump_backtrace+0x0/0x394
   show_stack+0x34/0x4c
   dump_stack+0x158/0x1e4
   __kasan_report+0x138/0x140
   kasan_report+0x44/0xdc
   __asan_load8+0x94/0xd0
   kcompactd+0x440/0x504
   kthread+0x1a4/0x1f0
   ret_from_fork+0x10/0x18

At present kswapd/kcompactd_run() and kswapd/kcompactd_stop() are protected
by mem_hotplug_begin/done(), but without kcompactd(). There is no need to
involve memory hotplug lock in kcompactd(), so let's add a new mutex to
protect pgdat->kswapd accesses.

Also, because the kcompactd task will check the state of kswapd task, it's
better to call kcompactd_stop() before kswapd_stop() to reduce lock
conflicts.

[akpm@linux-foundation.org: add comments]
Link: https://lkml.kernel.org/r/20220827111959.186838-1-wangkefeng.wang@huawei.comSigned-off-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 639118d1
...@@ -215,6 +215,22 @@ void put_online_mems(void); ...@@ -215,6 +215,22 @@ void put_online_mems(void);
void mem_hotplug_begin(void); void mem_hotplug_begin(void);
void mem_hotplug_done(void); void mem_hotplug_done(void);
/* See kswapd_is_running() */
static inline void pgdat_kswapd_lock(pg_data_t *pgdat)
{
mutex_lock(&pgdat->kswapd_lock);
}
static inline void pgdat_kswapd_unlock(pg_data_t *pgdat)
{
mutex_unlock(&pgdat->kswapd_lock);
}
static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat)
{
mutex_init(&pgdat->kswapd_lock);
}
#else /* ! CONFIG_MEMORY_HOTPLUG */ #else /* ! CONFIG_MEMORY_HOTPLUG */
#define pfn_to_online_page(pfn) \ #define pfn_to_online_page(pfn) \
({ \ ({ \
...@@ -251,6 +267,10 @@ static inline bool movable_node_is_enabled(void) ...@@ -251,6 +267,10 @@ static inline bool movable_node_is_enabled(void)
{ {
return false; return false;
} }
static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {}
static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {}
static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {}
#endif /* ! CONFIG_MEMORY_HOTPLUG */ #endif /* ! CONFIG_MEMORY_HOTPLUG */
/* /*
......
...@@ -956,8 +956,10 @@ typedef struct pglist_data { ...@@ -956,8 +956,10 @@ typedef struct pglist_data {
atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */ atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */
unsigned long nr_reclaim_start; /* nr pages written while throttled unsigned long nr_reclaim_start; /* nr pages written while throttled
* when throttling started. */ * when throttling started. */
struct task_struct *kswapd; /* Protected by #ifdef CONFIG_MEMORY_HOTPLUG
mem_hotplug_begin/done() */ struct mutex kswapd_lock;
#endif
struct task_struct *kswapd; /* Protected by kswapd_lock */
int kswapd_order; int kswapd_order;
enum zone_type kswapd_highest_zoneidx; enum zone_type kswapd_highest_zoneidx;
......
...@@ -1981,9 +1981,21 @@ static inline bool is_via_compact_memory(int order) ...@@ -1981,9 +1981,21 @@ static inline bool is_via_compact_memory(int order)
return order == -1; return order == -1;
} }
/*
* Determine whether kswapd is (or recently was!) running on this node.
*
* pgdat_kswapd_lock() pins pgdat->kswapd, so a concurrent kswapd_stop() can't
* zero it.
*/
static bool kswapd_is_running(pg_data_t *pgdat) static bool kswapd_is_running(pg_data_t *pgdat)
{ {
return pgdat->kswapd && task_is_running(pgdat->kswapd); bool running;
pgdat_kswapd_lock(pgdat);
running = pgdat->kswapd && task_is_running(pgdat->kswapd);
pgdat_kswapd_unlock(pgdat);
return running;
} }
/* /*
......
...@@ -1940,8 +1940,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, ...@@ -1940,8 +1940,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
node_states_clear_node(node, &arg); node_states_clear_node(node, &arg);
if (arg.status_change_nid >= 0) { if (arg.status_change_nid >= 0) {
kswapd_stop(node);
kcompactd_stop(node); kcompactd_stop(node);
kswapd_stop(node);
} }
writeback_set_ratelimit(); writeback_set_ratelimit();
......
...@@ -7616,6 +7616,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) ...@@ -7616,6 +7616,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
int i; int i;
pgdat_resize_init(pgdat); pgdat_resize_init(pgdat);
pgdat_kswapd_lock_init(pgdat);
pgdat_init_split_queue(pgdat); pgdat_init_split_queue(pgdat);
pgdat_init_kcompactd(pgdat); pgdat_init_kcompactd(pgdat);
......
...@@ -4643,16 +4643,17 @@ void kswapd_run(int nid) ...@@ -4643,16 +4643,17 @@ void kswapd_run(int nid)
{ {
pg_data_t *pgdat = NODE_DATA(nid); pg_data_t *pgdat = NODE_DATA(nid);
if (pgdat->kswapd) pgdat_kswapd_lock(pgdat);
return; if (!pgdat->kswapd) {
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); if (IS_ERR(pgdat->kswapd)) {
if (IS_ERR(pgdat->kswapd)) { /* failure at boot is fatal */
/* failure at boot is fatal */ BUG_ON(system_state < SYSTEM_RUNNING);
BUG_ON(system_state < SYSTEM_RUNNING); pr_err("Failed to start kswapd on node %d\n", nid);
pr_err("Failed to start kswapd on node %d\n", nid); pgdat->kswapd = NULL;
pgdat->kswapd = NULL; }
} }
pgdat_kswapd_unlock(pgdat);
} }
/* /*
...@@ -4661,12 +4662,16 @@ void kswapd_run(int nid) ...@@ -4661,12 +4662,16 @@ void kswapd_run(int nid)
*/ */
void kswapd_stop(int nid) void kswapd_stop(int nid)
{ {
struct task_struct *kswapd = NODE_DATA(nid)->kswapd; pg_data_t *pgdat = NODE_DATA(nid);
struct task_struct *kswapd;
pgdat_kswapd_lock(pgdat);
kswapd = pgdat->kswapd;
if (kswapd) { if (kswapd) {
kthread_stop(kswapd); kthread_stop(kswapd);
NODE_DATA(nid)->kswapd = NULL; pgdat->kswapd = NULL;
} }
pgdat_kswapd_unlock(pgdat);
} }
static int __init kswapd_init(void) static int __init kswapd_init(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment