Commit 6811378e authored by Yasunori Goto's avatar Yasunori Goto Committed by Linus Torvalds

[PATCH] wait_table and zonelist initializing for memory hotadd: update zonelists

In current code, zonelist is considered to be build once, no modification.
But MemoryHotplug can add new zone/pgdat.  It must be updated.

This patch modifies build_all_zonelists().  By this, build_all_zonelist() can
reconfig pgdat's zonelists.

To update them safety, this patch use stop_machine_run().  Other cpus don't
touch among updating them by using it.

In old version (V2 of node hotadd), kernel updated them after zone
initialization.  But present_page of its new zone is still 0, because
online_page() is not called yet at this time.  Build_zonelists() checks
present_pages to find present zone.  It was too early.  So, I changed it after
online_pages().
Signed-off-by: default avatarYasunori Goto     <y-goto@jp.fujitsu.com>
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent cca448fe
...@@ -127,6 +127,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) ...@@ -127,6 +127,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
unsigned long flags; unsigned long flags;
unsigned long onlined_pages = 0; unsigned long onlined_pages = 0;
struct zone *zone; struct zone *zone;
int need_zonelists_rebuild = 0;
/* /*
* This doesn't need a lock to do pfn_to_page(). * This doesn't need a lock to do pfn_to_page().
...@@ -139,6 +140,14 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) ...@@ -139,6 +140,14 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages); grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);
pgdat_resize_unlock(zone->zone_pgdat, &flags); pgdat_resize_unlock(zone->zone_pgdat, &flags);
/*
* If this zone is not populated, then it is not in zonelist.
* This means the page allocator ignores this zone.
* So, zonelist must be updated after online.
*/
if (!populated_zone(zone))
need_zonelists_rebuild = 1;
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
struct page *page = pfn_to_page(pfn + i); struct page *page = pfn_to_page(pfn + i);
online_page(page); online_page(page);
...@@ -149,5 +158,8 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) ...@@ -149,5 +158,8 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
setup_per_zone_pages_min(); setup_per_zone_pages_min();
if (need_zonelists_rebuild)
build_all_zonelists();
return 0; return 0;
} }
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <linux/nodemask.h> #include <linux/nodemask.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/mempolicy.h> #include <linux/mempolicy.h>
#include <linux/stop_machine.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/div64.h> #include <asm/div64.h>
...@@ -1704,14 +1705,29 @@ static void __meminit build_zonelists(pg_data_t *pgdat) ...@@ -1704,14 +1705,29 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
void __init build_all_zonelists(void) /* return values int ....just for stop_machine_run() */
static int __meminit __build_all_zonelists(void *dummy)
{ {
int i; int nid;
for_each_online_node(nid)
build_zonelists(NODE_DATA(nid));
return 0;
}
void __meminit build_all_zonelists(void)
{
if (system_state == SYSTEM_BOOTING) {
__build_all_zonelists(0);
cpuset_init_current_mems_allowed();
} else {
/* we have to stop all cpus to guaranntee there is no user
of zonelist */
stop_machine_run(__build_all_zonelists, NULL, NR_CPUS);
/* cpuset refresh routine should be here */
}
for_each_online_node(i)
build_zonelists(NODE_DATA(i));
printk("Built %i zonelists\n", num_online_nodes()); printk("Built %i zonelists\n", num_online_nodes());
cpuset_init_current_mems_allowed();
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment