Commit 482ec7c4 authored by Nathan Lynch's avatar Nathan Lynch Committed by Paul Mackerras

[PATCH] powerpc numa: Support sparse online node map

The powerpc numa code unconditionally onlines all nodes from 0 to the
highest node id found, regardless of whether cpus or memory are
present in the nodes.  This wastes 8K per node and complicates some
cpu and memory hotplug situations, such as adding a resource that
doesn't map to one of the nodes discovered at boot.

Set nodes online as resources are scanned.  Fall back to node 0 only
when we're sure this isn't a NUMA machine.

Instead of defaulting to node 0 for cases of hot-adding a resource
which doesn't belong to any initialized node, assign it to the first
online node.
Signed-off-by: default avatarNathan Lynch <nathanl@austin.ibm.com>
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent bc16a759
...@@ -191,27 +191,28 @@ static int *of_get_associativity(struct device_node *dev) ...@@ -191,27 +191,28 @@ static int *of_get_associativity(struct device_node *dev)
return (unsigned int *)get_property(dev, "ibm,associativity", NULL); return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
} }
/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
* info is found.
*/
static int of_node_to_nid(struct device_node *device) static int of_node_to_nid(struct device_node *device)
{ {
int nid; int nid = -1;
unsigned int *tmp; unsigned int *tmp;
if (min_common_depth == -1) if (min_common_depth == -1)
return 0; goto out;
tmp = of_get_associativity(device); tmp = of_get_associativity(device);
if (tmp && (tmp[0] >= min_common_depth)) { if (!tmp)
goto out;
if (tmp[0] >= min_common_depth)
nid = tmp[min_common_depth]; nid = tmp[min_common_depth];
} else {
dbg("WARNING: no NUMA information for %s\n",
device->full_name);
nid = 0;
}
/* POWER4 LPAR uses 0xffff as invalid node */ /* POWER4 LPAR uses 0xffff as invalid node */
if (nid == 0xffff) if (nid == 0xffff || nid >= MAX_NUMNODES)
nid = 0; nid = -1;
out:
return nid; return nid;
} }
...@@ -301,15 +302,9 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu) ...@@ -301,15 +302,9 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu)
nid = of_node_to_nid(cpu); nid = of_node_to_nid(cpu);
if (nid >= num_online_nodes()) { if (nid < 0 || !node_online(nid))
printk(KERN_ERR "WARNING: cpu %ld " nid = any_online_node(NODE_MASK_ALL);
"maps to invalid NUMA node %d\n",
lcpu, nid);
nid = 0;
}
out: out:
node_set_online(nid);
map_cpu_to_node(lcpu, nid); map_cpu_to_node(lcpu, nid);
of_node_put(cpu); of_node_put(cpu);
...@@ -376,7 +371,7 @@ static int __init parse_numa_properties(void) ...@@ -376,7 +371,7 @@ static int __init parse_numa_properties(void)
{ {
struct device_node *cpu = NULL; struct device_node *cpu = NULL;
struct device_node *memory = NULL; struct device_node *memory = NULL;
int max_domain = 0; int default_nid = 0;
unsigned long i; unsigned long i;
if (numa_enabled == 0) { if (numa_enabled == 0) {
...@@ -392,25 +387,26 @@ static int __init parse_numa_properties(void) ...@@ -392,25 +387,26 @@ static int __init parse_numa_properties(void)
dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
/* /*
* Even though we connect cpus to numa domains later in SMP init, * Even though we connect cpus to numa domains later in SMP
* we need to know the maximum node id now. This is because each * init, we need to know the node ids now. This is because
* node id must have NODE_DATA etc backing it. * each node to be onlined must have NODE_DATA etc backing it.
* As a result of hotplug we could still have cpus appear later on
* with larger node ids. In that case we force the cpu into node 0.
*/ */
for_each_cpu(i) { for_each_present_cpu(i) {
int nid; int nid;
cpu = find_cpu_node(i); cpu = find_cpu_node(i);
BUG_ON(!cpu);
if (cpu) {
nid = of_node_to_nid(cpu); nid = of_node_to_nid(cpu);
of_node_put(cpu); of_node_put(cpu);
if (nid < MAX_NUMNODES && /*
max_domain < nid) * Don't fall back to default_nid yet -- we will plug
max_domain = nid; * cpus into nodes once the memory scan has discovered
} * the topology.
*/
if (nid < 0)
continue;
node_set_online(nid);
} }
get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
...@@ -439,17 +435,15 @@ static int __init parse_numa_properties(void) ...@@ -439,17 +435,15 @@ static int __init parse_numa_properties(void)
start = read_n_cells(n_mem_addr_cells, &memcell_buf); start = read_n_cells(n_mem_addr_cells, &memcell_buf);
size = read_n_cells(n_mem_size_cells, &memcell_buf); size = read_n_cells(n_mem_size_cells, &memcell_buf);
/*
* Assumption: either all memory nodes or none will
* have associativity properties. If none, then
* everything goes to default_nid.
*/
nid = of_node_to_nid(memory); nid = of_node_to_nid(memory);
if (nid < 0)
if (nid >= MAX_NUMNODES) { nid = default_nid;
printk(KERN_ERR "WARNING: memory at %lx maps " node_set_online(nid);
"to invalid NUMA node %d\n", start,
nid);
nid = 0;
}
if (max_domain < nid)
max_domain = nid;
if (!(size = numa_enforce_memory_limit(start, size))) { if (!(size = numa_enforce_memory_limit(start, size))) {
if (--ranges) if (--ranges)
...@@ -465,10 +459,7 @@ static int __init parse_numa_properties(void) ...@@ -465,10 +459,7 @@ static int __init parse_numa_properties(void)
goto new_range; goto new_range;
} }
for (i = 0; i <= max_domain; i++) numa_setup_cpu(boot_cpuid);
node_set_online(i);
max_domain = numa_setup_cpu(boot_cpuid);
return 0; return 0;
} }
...@@ -768,10 +759,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr) ...@@ -768,10 +759,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
{ {
struct device_node *memory = NULL; struct device_node *memory = NULL;
nodemask_t nodes; nodemask_t nodes;
int nid = 0; int default_nid = any_online_node(NODE_MASK_ALL);
if (!numa_enabled || (min_common_depth < 0)) if (!numa_enabled || (min_common_depth < 0))
return nid; return default_nid;
while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
unsigned long start, size; unsigned long start, size;
...@@ -791,8 +782,8 @@ int hot_add_scn_to_nid(unsigned long scn_addr) ...@@ -791,8 +782,8 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
nid = of_node_to_nid(memory); nid = of_node_to_nid(memory);
/* Domains not present at boot default to 0 */ /* Domains not present at boot default to 0 */
if (!node_online(nid)) if (nid < 0 || !node_online(nid))
nid = any_online_node(NODE_MASK_ALL); nid = default_nid;
if ((scn_addr >= start) && (scn_addr < (start + size))) { if ((scn_addr >= start) && (scn_addr < (start + size))) {
of_node_put(memory); of_node_put(memory);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment