Commit 2b4fa851 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'x86/numa' into x86/devel

Conflicts:

	arch/x86/Kconfig
	arch/x86/kernel/e820.c
	arch/x86/kernel/efi_64.c
	arch/x86/kernel/mpparse.c
	arch/x86/kernel/setup.c
	arch/x86/kernel/setup_32.c
	arch/x86/mm/init_64.c
	include/asm-x86/proto.h
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents 3de352bb 46f68e1c
...@@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE ...@@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE
def_bool y def_bool y
config HAVE_SETUP_PER_CPU_AREA config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64 || (X86_SMP && !X86_VOYAGER) def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)
config HAVE_CPUMASK_OF_CPU_MAP config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP def_bool X86_64_SMP
...@@ -579,7 +579,21 @@ config SWIOTLB ...@@ -579,7 +579,21 @@ config SWIOTLB
config IOMMU_HELPER config IOMMU_HELPER
def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB)
config MAXSMP
bool "Configure Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP
default n
help
Configure maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
if MAXSMP
config NR_CPUS
int
default "4096"
endif
if !MAXSMP
config NR_CPUS config NR_CPUS
int "Maximum number of CPUs (2-4096)" int "Maximum number of CPUs (2-4096)"
range 2 4096 range 2 4096
...@@ -592,7 +606,8 @@ config NR_CPUS ...@@ -592,7 +606,8 @@ config NR_CPUS
minimum value which makes sense is 2. minimum value which makes sense is 2.
This is purely to save memory - each supported CPU adds This is purely to save memory - each supported CPU adds
approximately one kilobyte to the kernel image. approximately eight kilobytes to the kernel image.
endif
config SCHED_SMT config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support" bool "SMT (Hyperthreading) scheduler support"
...@@ -983,13 +998,25 @@ config NUMA_EMU ...@@ -983,13 +998,25 @@ config NUMA_EMU
into virtual nodes when booted with "numa=fake=N", where N is the into virtual nodes when booted with "numa=fake=N", where N is the
number of nodes. This is only useful for debugging. number of nodes. This is only useful for debugging.
if MAXSMP
config NODES_SHIFT
int
default "9"
endif
if !MAXSMP
config NODES_SHIFT config NODES_SHIFT
int "Max num nodes shift(1-9)" int "Maximum NUMA Nodes (as a power of 2)"
range 1 9 if X86_64 range 1 9 if X86_64
default "6" if X86_64 default "6" if X86_64
default "4" if X86_NUMAQ default "4" if X86_NUMAQ
default "3" default "3"
depends on NEED_MULTIPLE_NODES depends on NEED_MULTIPLE_NODES
help
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accomodate various tables.
endif
config HAVE_ARCH_BOOTMEM_NODE config HAVE_ARCH_BOOTMEM_NODE
def_bool y def_bool y
......
...@@ -68,7 +68,7 @@ config DEBUG_PAGEALLOC ...@@ -68,7 +68,7 @@ config DEBUG_PAGEALLOC
config DEBUG_PER_CPU_MAPS config DEBUG_PER_CPU_MAPS
bool "Debug access to per_cpu maps" bool "Debug access to per_cpu maps"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL
depends on X86_64_SMP depends on X86_SMP
default n default n
help help
Say Y to verify that the per_cpu map being accessed has Say Y to verify that the per_cpu map being accessed has
......
...@@ -52,9 +52,6 @@ ...@@ -52,9 +52,6 @@
unsigned long mp_lapic_addr; unsigned long mp_lapic_addr;
DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
/* /*
* Knob to control our willingness to enable the local APIC. * Knob to control our willingness to enable the local APIC.
* *
...@@ -1546,9 +1543,9 @@ void __cpuinit generic_processor_info(int apicid, int version) ...@@ -1546,9 +1543,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* are we being called early in kernel startup? */ /* are we being called early in kernel startup? */
if (x86_cpu_to_apicid_early_ptr) { if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
cpu_to_apicid[cpu] = apicid; cpu_to_apicid[cpu] = apicid;
bios_cpu_apicid[cpu] = apicid; bios_cpu_apicid[cpu] = apicid;
......
...@@ -90,9 +90,6 @@ static unsigned long apic_phys; ...@@ -90,9 +90,6 @@ static unsigned long apic_phys;
unsigned long mp_lapic_addr; unsigned long mp_lapic_addr;
DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
unsigned int __cpuinitdata maxcpus = NR_CPUS; unsigned int __cpuinitdata maxcpus = NR_CPUS;
/* /*
* Get the LAPIC version * Get the LAPIC version
...@@ -1075,9 +1072,9 @@ void __cpuinit generic_processor_info(int apicid, int version) ...@@ -1075,9 +1072,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
max_physical_apicid = apicid; max_physical_apicid = apicid;
/* are we being called early in kernel startup? */ /* are we being called early in kernel startup? */
if (x86_cpu_to_apicid_early_ptr) { if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
cpu_to_apicid[cpu] = apicid; cpu_to_apicid[cpu] = apicid;
bios_cpu_apicid[cpu] = apicid; bios_cpu_apicid[cpu] = apicid;
...@@ -1253,7 +1250,7 @@ __cpuinit int apic_is_clustered_box(void) ...@@ -1253,7 +1250,7 @@ __cpuinit int apic_is_clustered_box(void)
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
return 0; return 0;
bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
bitmap_zero(clustermap, NUM_APIC_CLUSTERS); bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
for (i = 0; i < NR_CPUS; i++) { for (i = 0; i < NR_CPUS; i++) {
......
...@@ -25,6 +25,20 @@ ...@@ -25,6 +25,20 @@
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/bios_ebda.h> #include <asm/bios_ebda.h>
/* boot cpu pda */
static struct x8664_pda _boot_cpu_pda __read_mostly;
#ifdef CONFIG_SMP
/*
* We install an empty cpu_pda pointer table to indicate to early users
* (numa_set_node) that the cpu_pda pointer table for cpus other than
* the boot cpu is not yet setup.
*/
static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
#else
static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
#endif
static void __init zap_identity_mappings(void) static void __init zap_identity_mappings(void)
{ {
pgd_t *pgd = pgd_offset_k(0UL); pgd_t *pgd = pgd_offset_k(0UL);
...@@ -88,10 +102,12 @@ void __init x86_64_start_kernel(char * real_mode_data) ...@@ -88,10 +102,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
early_printk("Kernel alive\n"); early_printk("Kernel alive\n");
for (i = 0; i < NR_CPUS; i++) _cpu_pda = __cpu_pda;
cpu_pda(i) = &boot_cpu_pda[i]; cpu_pda(0) = &_boot_cpu_pda;
pda_init(0); pda_init(0);
early_printk("Kernel really alive\n");
copy_bootdata(__va(real_mode_data)); copy_bootdata(__va(real_mode_data));
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
......
...@@ -90,7 +90,7 @@ int __init check_nmi_watchdog(void) ...@@ -90,7 +90,7 @@ int __init check_nmi_watchdog(void)
if (!atomic_read(&nmi_active)) if (!atomic_read(&nmi_active))
return 0; return 0;
prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
if (!prev_nmi_count) if (!prev_nmi_count)
goto error; goto error;
...@@ -101,7 +101,7 @@ int __init check_nmi_watchdog(void) ...@@ -101,7 +101,7 @@ int __init check_nmi_watchdog(void)
smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
#endif #endif
for (cpu = 0; cpu < NR_CPUS; cpu++) for (cpu = 0; cpu < nr_cpu_ids; cpu++)
prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count;
local_irq_enable(); local_irq_enable();
mdelay((20*1000)/nmi_hz); // wait 20 ticks mdelay((20*1000)/nmi_hz); // wait 20 ticks
......
...@@ -20,13 +20,34 @@ unsigned int boot_cpu_physical_apicid = -1U; ...@@ -20,13 +20,34 @@ unsigned int boot_cpu_physical_apicid = -1U;
unsigned int max_physical_apicid; unsigned int max_physical_apicid;
EXPORT_SYMBOL(boot_cpu_physical_apicid); EXPORT_SYMBOL(boot_cpu_physical_apicid);
DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
/* Bitmask of physically existing CPUs */ /* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map; physid_mask_t phys_cpu_present_map;
#endif #endif
/* map cpu index to physical APIC ID */
DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
#define X86_64_NUMA 1
/* map cpu index to node index */
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
/* which logical CPUs are on which nodes */
cpumask_t *node_to_cpumask_map;
EXPORT_SYMBOL(node_to_cpumask_map);
/* setup node_to_cpumask_map */
static void __init setup_node_to_cpumask_map(void);
#else
static inline void setup_node_to_cpumask_map(void) { }
#endif
#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
/* /*
* Copy data used in early init routines from the initial arrays to the * Copy data used in early init routines from the initial arrays to the
...@@ -38,20 +59,21 @@ static void __init setup_per_cpu_maps(void) ...@@ -38,20 +59,21 @@ static void __init setup_per_cpu_maps(void)
int cpu; int cpu;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; per_cpu(x86_cpu_to_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_apicid, cpu);
per_cpu(x86_bios_cpu_apicid, cpu) = per_cpu(x86_bios_cpu_apicid, cpu) =
x86_bios_cpu_apicid_init[cpu]; early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#ifdef CONFIG_NUMA #ifdef X86_64_NUMA
per_cpu(x86_cpu_to_node_map, cpu) = per_cpu(x86_cpu_to_node_map, cpu) =
x86_cpu_to_node_map_init[cpu]; early_per_cpu_map(x86_cpu_to_node_map, cpu);
#endif #endif
} }
/* indicate the early static arrays will soon be gone */ /* indicate the early static arrays will soon be gone */
x86_cpu_to_apicid_early_ptr = NULL; early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
x86_bios_cpu_apicid_early_ptr = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#ifdef CONFIG_NUMA #ifdef X86_64_NUMA
x86_cpu_to_node_map_early_ptr = NULL; early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif #endif
} }
...@@ -80,6 +102,50 @@ static inline void setup_cpumask_of_cpu(void) { } ...@@ -80,6 +102,50 @@ static inline void setup_cpumask_of_cpu(void) { }
*/ */
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset); EXPORT_SYMBOL(__per_cpu_offset);
static inline void setup_cpu_pda_map(void) { }
#elif !defined(CONFIG_SMP)
static inline void setup_cpu_pda_map(void) { }
#else /* CONFIG_SMP && CONFIG_X86_64 */
/*
* Allocate cpu_pda pointer table and array via alloc_bootmem.
*/
static void __init setup_cpu_pda_map(void)
{
char *pda;
struct x8664_pda **new_cpu_pda;
unsigned long size;
int cpu;
size = roundup(sizeof(struct x8664_pda), cache_line_size());
/* allocate cpu_pda array and pointer table */
{
unsigned long tsize = nr_cpu_ids * sizeof(void *);
unsigned long asize = size * (nr_cpu_ids - 1);
tsize = roundup(tsize, cache_line_size());
new_cpu_pda = alloc_bootmem(tsize + asize);
pda = (char *)new_cpu_pda + tsize;
}
/* initialize pointer table to static pda's */
for_each_possible_cpu(cpu) {
if (cpu == 0) {
/* leave boot cpu pda in place */
new_cpu_pda[0] = cpu_pda(0);
continue;
}
new_cpu_pda[cpu] = (struct x8664_pda *)pda;
new_cpu_pda[cpu]->in_bootmem = 1;
pda += size;
}
/* point to new pointer table */
_cpu_pda = new_cpu_pda;
}
#endif #endif
/* /*
...@@ -89,50 +155,52 @@ EXPORT_SYMBOL(__per_cpu_offset); ...@@ -89,50 +155,52 @@ EXPORT_SYMBOL(__per_cpu_offset);
*/ */
void __init setup_per_cpu_areas(void) void __init setup_per_cpu_areas(void)
{ {
int i, highest_cpu = 0; ssize_t size = PERCPU_ENOUGH_ROOM;
unsigned long size; char *ptr;
int cpu;
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
prefill_possible_map(); prefill_possible_map();
#else
nr_cpu_ids = num_processors;
#endif #endif
/* Setup cpu_pda map */
setup_cpu_pda_map();
/* Copy section for each CPU (we discard the original) */ /* Copy section for each CPU (we discard the original) */
size = PERCPU_ENOUGH_ROOM; size = PERCPU_ENOUGH_ROOM;
printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
size); size);
for_each_possible_cpu(i) { for_each_possible_cpu(cpu) {
char *ptr;
#ifndef CONFIG_NEED_MULTIPLE_NODES #ifndef CONFIG_NEED_MULTIPLE_NODES
ptr = alloc_bootmem_pages(size); ptr = alloc_bootmem_pages(size);
#else #else
int node = early_cpu_to_node(i); int node = early_cpu_to_node(cpu);
if (!node_online(node) || !NODE_DATA(node)) { if (!node_online(node) || !NODE_DATA(node)) {
ptr = alloc_bootmem_pages(size); ptr = alloc_bootmem_pages(size);
printk(KERN_INFO printk(KERN_INFO
"cpu %d has no node or node-local memory\n", i); "cpu %d has no node %d or node-local memory\n",
cpu, node);
} }
else else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif #endif
if (!ptr) per_cpu_offset(cpu) = ptr - __per_cpu_start;
panic("Cannot allocate cpu data for CPU %d\n", i);
#ifdef CONFIG_X86_64
cpu_pda(i)->data_offset = ptr - __per_cpu_start;
#else
__per_cpu_offset[i] = ptr - __per_cpu_start;
#endif
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
highest_cpu = i;
} }
nr_cpu_ids = highest_cpu + 1; printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); NR_CPUS, nr_cpu_ids, nr_node_ids);
/* Setup percpu data maps */ /* Setup percpu data maps */
setup_per_cpu_maps(); setup_per_cpu_maps();
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
/* Setup cpumask_of_cpu map */ /* Setup cpumask_of_cpu map */
setup_cpumask_of_cpu(); setup_cpumask_of_cpu();
} }
...@@ -163,3 +231,176 @@ void __init parse_setup_data(void) ...@@ -163,3 +231,176 @@ void __init parse_setup_data(void)
early_iounmap(data, PAGE_SIZE); early_iounmap(data, PAGE_SIZE);
} }
} }
#ifdef X86_64_NUMA
/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
* Note: node_to_cpumask() is not valid until after this is done.
*/
static void __init setup_node_to_cpumask_map(void)
{
unsigned int node, num = 0;
cpumask_t *map;
/* setup nr_node_ids if not done yet */
if (nr_node_ids == MAX_NUMNODES) {
for_each_node_mask(node, node_possible_map)
num = node;
nr_node_ids = num + 1;
}
/* allocate the map */
map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
map, nr_node_ids);
/* node_to_cpumask() will now work */
node_to_cpumask_map = map;
}
void __cpuinit numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
if (cpu_pda(cpu) && node != NUMA_NO_NODE)
cpu_pda(cpu)->nodenumber = node;
if (cpu_to_node_map)
cpu_to_node_map[cpu] = node;
else if (per_cpu_offset(cpu))
per_cpu(x86_cpu_to_node_map, cpu) = node;
else
Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
}
void __cpuinit numa_clear_node(int cpu)
{
numa_set_node(cpu, NUMA_NO_NODE);
}
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
void __cpuinit numa_add_cpu(int cpu)
{
cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
void __cpuinit numa_remove_cpu(int cpu)
{
cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
}
#else /* CONFIG_DEBUG_PER_CPU_MAPS */
/*
* --------- debug versions of the numa functions ---------
*/
static void __cpuinit numa_set_cpumask(int cpu, int enable)
{
int node = cpu_to_node(cpu);
cpumask_t *mask;
char buf[64];
if (node_to_cpumask_map == NULL) {
printk(KERN_ERR "node_to_cpumask_map NULL\n");
dump_stack();
return;
}
mask = &node_to_cpumask_map[node];
if (enable)
cpu_set(cpu, *mask);
else
cpu_clear(cpu, *mask);
cpulist_scnprintf(buf, sizeof(buf), *mask);
printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
}
void __cpuinit numa_add_cpu(int cpu)
{
numa_set_cpumask(cpu, 1);
}
void __cpuinit numa_remove_cpu(int cpu)
{
numa_set_cpumask(cpu, 0);
}
int cpu_to_node(int cpu)
{
if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
printk(KERN_WARNING
"cpu_to_node(%d): usage too early!\n", cpu);
dump_stack();
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
}
return per_cpu(x86_cpu_to_node_map, cpu);
}
EXPORT_SYMBOL(cpu_to_node);
/*
* Same function as cpu_to_node() but used if called before the
* per_cpu areas are setup.
*/
int early_cpu_to_node(int cpu)
{
if (early_per_cpu_ptr(x86_cpu_to_node_map))
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
if (!per_cpu_offset(cpu)) {
printk(KERN_WARNING
"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
dump_stack();
return NUMA_NO_NODE;
}
return per_cpu(x86_cpu_to_node_map, cpu);
}
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
cpumask_t *_node_to_cpumask_ptr(int node)
{
if (node_to_cpumask_map == NULL) {
printk(KERN_WARNING
"_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
node);
dump_stack();
return &cpu_online_map;
}
BUG_ON(node >= nr_node_ids);
return &node_to_cpumask_map[node];
}
EXPORT_SYMBOL(_node_to_cpumask_ptr);
/*
* Returns a bitmask of CPUs on Node 'node'.
*/
cpumask_t node_to_cpumask(int node)
{
if (node_to_cpumask_map == NULL) {
printk(KERN_WARNING
"node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
dump_stack();
return cpu_online_map;
}
BUG_ON(node >= nr_node_ids);
return node_to_cpumask_map[node];
}
EXPORT_SYMBOL(node_to_cpumask);
/*
* --------- end of debug versions of the numa functions ---------
*/
#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
#endif /* X86_64_NUMA */
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/kgdb.h> #include <linux/kgdb.h>
#include <linux/topology.h>
#include <asm/pda.h> #include <asm/pda.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/processor.h> #include <asm/processor.h>
...@@ -34,9 +35,8 @@ struct boot_params boot_params; ...@@ -34,9 +35,8 @@ struct boot_params boot_params;
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; struct x8664_pda **_cpu_pda __read_mostly;
EXPORT_SYMBOL(_cpu_pda); EXPORT_SYMBOL(_cpu_pda);
struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
...@@ -114,8 +114,10 @@ void pda_init(int cpu) ...@@ -114,8 +114,10 @@ void pda_init(int cpu)
__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
if (!pda->irqstackptr) if (!pda->irqstackptr)
panic("cannot allocate irqstack for cpu %d", cpu); panic("cannot allocate irqstack for cpu %d", cpu);
}
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
pda->nodenumber = cpu_to_node(cpu);
}
pda->irqstackptr += IRQSTACKSIZE-64; pda->irqstackptr += IRQSTACKSIZE-64;
} }
......
...@@ -659,18 +659,6 @@ static void set_mca_bus(int x) ...@@ -659,18 +659,6 @@ static void set_mca_bus(int x)
static void set_mca_bus(int x) { } static void set_mca_bus(int x) { }
#endif #endif
#ifdef CONFIG_NUMA
/*
* In the golden day, when everything among i386 and x86_64 will be
* integrated, this will not live here
*/
void *x86_cpu_to_node_map_early_ptr;
int x86_cpu_to_node_map_init[NR_CPUS] = {
[0 ... NR_CPUS-1] = NUMA_NO_NODE
};
DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
#endif
static void probe_roms(void); static void probe_roms(void);
/* /*
...@@ -866,18 +854,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -866,18 +854,6 @@ void __init setup_arch(char **cmdline_p)
paravirt_post_allocator_init(); paravirt_post_allocator_init();
#ifdef CONFIG_X86_SMP
/*
* setup to use the early static init tables during kernel startup
* X86_SMP will exclude sub-arches that don't deal well with it.
*/
x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
#ifdef CONFIG_NUMA
x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
#endif
#endif
#ifdef CONFIG_X86_GENERICARCH #ifdef CONFIG_X86_GENERICARCH
generic_apic_probe(); generic_apic_probe();
#endif #endif
......
...@@ -376,15 +376,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -376,15 +376,6 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init(); kvmclock_init();
#endif #endif
#ifdef CONFIG_SMP
/* setup to use the early static init tables during kernel startup */
x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
#ifdef CONFIG_NUMA
x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
#endif
#endif
#ifdef CONFIG_ACPI #ifdef CONFIG_ACPI
/* /*
* Initialize the ACPI boot-time table parser (gets the RSDP and SDT). * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
......
...@@ -67,22 +67,6 @@ ...@@ -67,22 +67,6 @@
#include <mach_wakecpu.h> #include <mach_wakecpu.h>
#include <smpboot_hooks.h> #include <smpboot_hooks.h>
/*
* FIXME: For x86_64, those are defined in other files. But moving them here,
* would make the setup areas dependent on smp, which is a loss. When we
* integrate apic between arches, we can probably do a better job, but
* right now, they'll stay here -- glommer
*/
/* which logical CPU number maps to which CPU (physical APIC ID) */
u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
{ [0 ... NR_CPUS-1] = BAD_APICID };
void *x86_cpu_to_apicid_early_ptr;
u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
= { [0 ... NR_CPUS-1] = BAD_APICID };
void *x86_bios_cpu_apicid_early_ptr;
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID]; u8 apicid_2_node[MAX_APICID];
static int low_mappings; static int low_mappings;
...@@ -814,6 +798,45 @@ static void __cpuinit do_fork_idle(struct work_struct *work) ...@@ -814,6 +798,45 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
complete(&c_idle->done); complete(&c_idle->done);
} }
#ifdef CONFIG_X86_64
/*
* Allocate node local memory for the AP pda.
*
* Must be called after the _cpu_pda pointer table is initialized.
*/
static int __cpuinit get_local_pda(int cpu)
{
struct x8664_pda *oldpda, *newpda;
unsigned long size = sizeof(struct x8664_pda);
int node = cpu_to_node(cpu);
if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
return 0;
oldpda = cpu_pda(cpu);
newpda = kmalloc_node(size, GFP_ATOMIC, node);
if (!newpda) {
printk(KERN_ERR "Could not allocate node local PDA "
"for CPU %d on node %d\n", cpu, node);
if (oldpda)
return 0; /* have a usable pda */
else
return -1;
}
if (oldpda) {
memcpy(newpda, oldpda, size);
if (!after_bootmem)
free_bootmem((unsigned long)oldpda, size);
}
newpda->in_bootmem = 0;
cpu_pda(cpu) = newpda;
return 0;
}
#endif /* CONFIG_X86_64 */
static int __cpuinit do_boot_cpu(int apicid, int cpu) static int __cpuinit do_boot_cpu(int apicid, int cpu)
/* /*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
...@@ -839,19 +862,11 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) ...@@ -839,19 +862,11 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
} }
/* Allocate node local memory for AP pdas */ /* Allocate node local memory for AP pdas */
if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { if (cpu > 0) {
struct x8664_pda *newpda, *pda; boot_error = get_local_pda(cpu);
int node = cpu_to_node(cpu); if (boot_error)
pda = cpu_pda(cpu); goto restore_state;
newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, /* if can't get pda memory, can't start cpu */
node);
if (newpda) {
memcpy(newpda, pda, sizeof(struct x8664_pda));
cpu_pda(cpu) = newpda;
} else
printk(KERN_ERR
"Could not allocate node local PDA for CPU %d on node %d\n",
cpu, node);
} }
#endif #endif
...@@ -970,11 +985,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) ...@@ -970,11 +985,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
} }
} }
restore_state:
if (boot_error) { if (boot_error) {
/* Try to put things back the way they were before ... */ /* Try to put things back the way they were before ... */
unmap_cpu_to_logical_apicid(cpu); unmap_cpu_to_logical_apicid(cpu);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
clear_node_cpumask(cpu); /* was set by numa_add_cpu */ numa_remove_cpu(cpu); /* was set by numa_add_cpu */
#endif #endif
cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
...@@ -1347,6 +1364,8 @@ __init void prefill_possible_map(void) ...@@ -1347,6 +1364,8 @@ __init void prefill_possible_map(void)
for (i = 0; i < possible; i++) for (i = 0; i < possible; i++)
cpu_set(i, cpu_possible_map); cpu_set(i, cpu_possible_map);
nr_cpu_ids = possible;
} }
static void __ref remove_cpu_from_maps(int cpu) static void __ref remove_cpu_from_maps(int cpu)
...@@ -1357,7 +1376,7 @@ static void __ref remove_cpu_from_maps(int cpu) ...@@ -1357,7 +1376,7 @@ static void __ref remove_cpu_from_maps(int cpu)
cpu_clear(cpu, cpu_callin_map); cpu_clear(cpu, cpu_callin_map);
/* was set by cpu_init() */ /* was set by cpu_init() */
clear_bit(cpu, (unsigned long *)&cpu_initialized); clear_bit(cpu, (unsigned long *)&cpu_initialized);
clear_node_cpumask(cpu); numa_remove_cpu(cpu);
#endif #endif
} }
......
...@@ -27,30 +27,17 @@ ...@@ -27,30 +27,17 @@
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data); EXPORT_SYMBOL(node_data);
bootmem_data_t plat_node_bdata[MAX_NUMNODES]; static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
struct memnode memnode; struct memnode memnode;
#ifdef CONFIG_SMP
int x86_cpu_to_node_map_init[NR_CPUS] = {
[0 ... NR_CPUS-1] = NUMA_NO_NODE
};
void *x86_cpu_to_node_map_early_ptr;
EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
#endif
DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
}; };
cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_to_cpumask_map);
int numa_off __initdata; int numa_off __initdata;
unsigned long __initdata nodemap_addr; static unsigned long __initdata nodemap_addr;
unsigned long __initdata nodemap_size; static unsigned long __initdata nodemap_size;
/* /*
* Given a shift value, try to populate memnodemap[] * Given a shift value, try to populate memnodemap[]
...@@ -192,7 +179,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start, ...@@ -192,7 +179,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
void __init setup_node_bootmem(int nodeid, unsigned long start, void __init setup_node_bootmem(int nodeid, unsigned long start,
unsigned long end) unsigned long end)
{ {
unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size; unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
unsigned long bootmap_start, nodedata_phys; unsigned long bootmap_start, nodedata_phys;
void *bootmap; void *bootmap;
const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
...@@ -204,7 +191,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, ...@@ -204,7 +191,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
start, end); start, end);
start_pfn = start >> PAGE_SHIFT; start_pfn = start >> PAGE_SHIFT;
end_pfn = end >> PAGE_SHIFT; last_pfn = end >> PAGE_SHIFT;
node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size,
SMP_CACHE_BYTES); SMP_CACHE_BYTES);
...@@ -217,7 +204,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, ...@@ -217,7 +204,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
/* /*
* Find a place for the bootmem map * Find a place for the bootmem map
...@@ -226,7 +213,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, ...@@ -226,7 +213,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
* early_node_mem will get that with find_e820_area instead * early_node_mem will get that with find_e820_area instead
* of alloc_bootmem, that could clash with reserved range * of alloc_bootmem, that could clash with reserved range
*/ */
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
nid = phys_to_nid(nodedata_phys); nid = phys_to_nid(nodedata_phys);
if (nid == nodeid) if (nid == nodeid)
bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
...@@ -248,7 +235,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, ...@@ -248,7 +235,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
bootmap_start >> PAGE_SHIFT, bootmap_start >> PAGE_SHIFT,
start_pfn, end_pfn); start_pfn, last_pfn);
printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
bootmap_start, bootmap_start + bootmap_size - 1, bootmap_start, bootmap_start + bootmap_size - 1,
...@@ -309,7 +296,7 @@ void __init numa_init_array(void) ...@@ -309,7 +296,7 @@ void __init numa_init_array(void)
#ifdef CONFIG_NUMA_EMU #ifdef CONFIG_NUMA_EMU
/* Numa emulation */ /* Numa emulation */
char *cmdline __initdata; static char *cmdline __initdata;
/* /*
* Setups up nid to range from addr to addr + size. If the end * Setups up nid to range from addr to addr + size. If the end
...@@ -413,15 +400,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, ...@@ -413,15 +400,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
} }
/* /*
* Sets up the system RAM area from start_pfn to end_pfn according to the * Sets up the system RAM area from start_pfn to last_pfn according to the
* numa=fake command-line option. * numa=fake command-line option.
*/ */
static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata;
static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn)
{ {
u64 size, addr = start_pfn << PAGE_SHIFT; u64 size, addr = start_pfn << PAGE_SHIFT;
u64 max_addr = end_pfn << PAGE_SHIFT; u64 max_addr = last_pfn << PAGE_SHIFT;
int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
memset(&nodes, 0, sizeof(nodes)); memset(&nodes, 0, sizeof(nodes));
...@@ -527,7 +514,7 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) ...@@ -527,7 +514,7 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
} }
#endif /* CONFIG_NUMA_EMU */ #endif /* CONFIG_NUMA_EMU */
void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) void __init numa_initmem_init(unsigned long start_pfn, unsigned long last_pfn)
{ {
int i; int i;
...@@ -535,7 +522,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -535,7 +522,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
nodes_clear(node_online_map); nodes_clear(node_online_map);
#ifdef CONFIG_NUMA_EMU #ifdef CONFIG_NUMA_EMU
if (cmdline && !numa_emulation(start_pfn, end_pfn)) if (cmdline && !numa_emulation(start_pfn, last_pfn))
return; return;
nodes_clear(node_possible_map); nodes_clear(node_possible_map);
nodes_clear(node_online_map); nodes_clear(node_online_map);
...@@ -543,7 +530,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -543,7 +530,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
#ifdef CONFIG_ACPI_NUMA #ifdef CONFIG_ACPI_NUMA
if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
end_pfn << PAGE_SHIFT)) last_pfn << PAGE_SHIFT))
return; return;
nodes_clear(node_possible_map); nodes_clear(node_possible_map);
nodes_clear(node_online_map); nodes_clear(node_online_map);
...@@ -551,7 +538,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -551,7 +538,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
#ifdef CONFIG_K8_NUMA #ifdef CONFIG_K8_NUMA
if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT,
end_pfn<<PAGE_SHIFT)) last_pfn<<PAGE_SHIFT))
return; return;
nodes_clear(node_possible_map); nodes_clear(node_possible_map);
nodes_clear(node_online_map); nodes_clear(node_online_map);
...@@ -561,7 +548,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -561,7 +548,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
printk(KERN_INFO "Faking a node at %016lx-%016lx\n", printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
start_pfn << PAGE_SHIFT, start_pfn << PAGE_SHIFT,
end_pfn << PAGE_SHIFT); last_pfn << PAGE_SHIFT);
/* setup dummy node covering all memory */ /* setup dummy node covering all memory */
memnode_shift = 63; memnode_shift = 63;
memnodemap = memnode.embedded_map; memnodemap = memnode.embedded_map;
...@@ -570,29 +557,8 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) ...@@ -570,29 +557,8 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
node_set(0, node_possible_map); node_set(0, node_possible_map);
for (i = 0; i < NR_CPUS; i++) for (i = 0; i < NR_CPUS; i++)
numa_set_node(i, 0); numa_set_node(i, 0);
/* cpumask_of_cpu() may not be available during early startup */ e820_register_active_regions(0, start_pfn, last_pfn);
memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0])); setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
cpu_set(0, node_to_cpumask_map[0]);
e820_register_active_regions(0, start_pfn, end_pfn);
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
}
__cpuinit void numa_add_cpu(int cpu)
{
set_bit(cpu,
(unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
void __cpuinit numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
if(cpu_to_node_map)
cpu_to_node_map[cpu] = node;
else if(per_cpu_offset(cpu))
per_cpu(x86_cpu_to_node_map, cpu) = node;
else
Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
} }
unsigned long __init numa_free_all_bootmem(void) unsigned long __init numa_free_all_bootmem(void)
...@@ -641,6 +607,7 @@ static __init int numa_setup(char *opt) ...@@ -641,6 +607,7 @@ static __init int numa_setup(char *opt)
} }
early_param("numa", numa_setup); early_param("numa", numa_setup);
#ifdef CONFIG_NUMA
/* /*
* Setup early cpu_to_node. * Setup early cpu_to_node.
* *
...@@ -652,14 +619,19 @@ early_param("numa", numa_setup); ...@@ -652,14 +619,19 @@ early_param("numa", numa_setup);
* is already initialized in a round robin manner at numa_init_array, * is already initialized in a round robin manner at numa_init_array,
* prior to this call, and this initialization is good enough * prior to this call, and this initialization is good enough
* for the fake NUMA cases. * for the fake NUMA cases.
*
* Called before the per_cpu areas are setup.
*/ */
void __init init_cpu_to_node(void) void __init init_cpu_to_node(void)
{ {
int i; int cpu;
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
for (i = 0; i < NR_CPUS; i++) { BUG_ON(cpu_to_apicid == NULL);
for_each_possible_cpu(cpu) {
int node; int node;
u16 apicid = x86_cpu_to_apicid_init[i]; u16 apicid = cpu_to_apicid[cpu];
if (apicid == BAD_APICID) if (apicid == BAD_APICID)
continue; continue;
...@@ -668,8 +640,9 @@ void __init init_cpu_to_node(void) ...@@ -668,8 +640,9 @@ void __init init_cpu_to_node(void)
continue; continue;
if (!node_online(node)) if (!node_online(node))
continue; continue;
numa_set_node(i, node); numa_set_node(cpu, node);
} }
} }
#endif
...@@ -376,7 +376,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) ...@@ -376,7 +376,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
if (node == NUMA_NO_NODE) if (node == NUMA_NO_NODE)
continue; continue;
if (!node_isset(node, node_possible_map)) if (!node_isset(node, node_possible_map))
numa_set_node(i, NUMA_NO_NODE); numa_clear_node(i);
} }
numa_init_array(); numa_init_array();
return 0; return 0;
......
...@@ -171,8 +171,11 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do ...@@ -171,8 +171,11 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
if (node != -1) if (node != -1)
set_mp_bus_to_node(busnum, node); set_mp_bus_to_node(busnum, node);
else else
node = get_mp_bus_to_node(busnum);
#endif #endif
node = get_mp_bus_to_node(busnum);
if (node != -1 && !node_online(node))
node = -1;
/* Allocate per-root-bus (not per bus) arch-specific data. /* Allocate per-root-bus (not per bus) arch-specific data.
* TODO: leak; this memory is never freed. * TODO: leak; this memory is never freed.
...@@ -204,14 +207,16 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do ...@@ -204,14 +207,16 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
if (!bus) if (!bus)
kfree(sd); kfree(sd);
if (bus && node != -1) {
#ifdef CONFIG_ACPI_NUMA #ifdef CONFIG_ACPI_NUMA
if (bus) { if (pxm >= 0)
if (pxm >= 0) {
printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n",
busnum, pxm, pxm_to_node(pxm)); busnum, pxm, node);
} #else
} printk(KERN_DEBUG "bus %02x -> node %d\n",
busnum, node);
#endif #endif
}
if (bus && (pci_probe & PCI_USE__CRS)) if (bus && (pci_probe & PCI_USE__CRS))
get_current_resources(device, busnum, domain, bus); get_current_resources(device, busnum, domain, bus);
......
...@@ -40,6 +40,7 @@ static ssize_t show_##name(struct sys_device *dev, char *buf) \ ...@@ -40,6 +40,7 @@ static ssize_t show_##name(struct sys_device *dev, char *buf) \
return sprintf(buf, "%d\n", topology_##name(cpu)); \ return sprintf(buf, "%d\n", topology_##name(cpu)); \
} }
#if defined(topology_thread_siblings) || defined(topology_core_siblings)
static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
{ {
ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
...@@ -54,21 +55,41 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) ...@@ -54,21 +55,41 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
} }
return n; return n;
} }
#endif
#ifdef arch_provides_topology_pointers
#define define_siblings_show_map(name) \ #define define_siblings_show_map(name) \
static inline ssize_t show_##name(struct sys_device *dev, char *buf) \ static ssize_t show_##name(struct sys_device *dev, char *buf) \
{ \ { \
unsigned int cpu = dev->id; \ unsigned int cpu = dev->id; \
return show_cpumap(0, &(topology_##name(cpu)), buf); \ return show_cpumap(0, &(topology_##name(cpu)), buf); \
} }
#define define_siblings_show_list(name) \ #define define_siblings_show_list(name) \
static inline ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
{ \ { \
unsigned int cpu = dev->id; \ unsigned int cpu = dev->id; \
return show_cpumap(1, &(topology_##name(cpu)), buf); \ return show_cpumap(1, &(topology_##name(cpu)), buf); \
} }
#else
#define define_siblings_show_map(name) \
static ssize_t show_##name(struct sys_device *dev, char *buf) \
{ \
unsigned int cpu = dev->id; \
cpumask_t mask = topology_##name(cpu); \
return show_cpumap(0, &mask, buf); \
}
#define define_siblings_show_list(name) \
static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
{ \
unsigned int cpu = dev->id; \
cpumask_t mask = topology_##name(cpu); \
return show_cpumap(1, &mask, buf); \
}
#endif
#define define_siblings_show_func(name) \ #define define_siblings_show_func(name) \
define_siblings_show_map(name); define_siblings_show_list(name) define_siblings_show_map(name); define_siblings_show_list(name)
......
...@@ -14,11 +14,9 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, ...@@ -14,11 +14,9 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
extern void numa_add_cpu(int cpu);
extern void numa_init_array(void); extern void numa_init_array(void);
extern int numa_off; extern int numa_off;
extern void numa_set_node(int cpu, int node);
extern void srat_reserve_add_area(int nodeid); extern void srat_reserve_add_area(int nodeid);
extern int hotadd_percent; extern int hotadd_percent;
...@@ -31,15 +29,16 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, ...@@ -31,15 +29,16 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
extern void __init init_cpu_to_node(void); extern void __init init_cpu_to_node(void);
extern void __cpuinit numa_set_node(int cpu, int node);
static inline void clear_node_cpumask(int cpu) extern void __cpuinit numa_clear_node(int cpu);
{ extern void __cpuinit numa_add_cpu(int cpu);
clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]); extern void __cpuinit numa_remove_cpu(int cpu);
}
#else #else
#define init_cpu_to_node() do {} while (0) static inline void init_cpu_to_node(void) { }
#define clear_node_cpumask(cpu) do {} while (0) static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void numa_add_cpu(int cpu, int node) { }
static inline void numa_remove_cpu(int cpu) { }
#endif #endif
#endif #endif
...@@ -22,6 +22,8 @@ struct x8664_pda { ...@@ -22,6 +22,8 @@ struct x8664_pda {
offset 40!!! */ offset 40!!! */
#endif #endif
char *irqstackptr; char *irqstackptr;
short nodenumber; /* number of current node (32k max) */
short in_bootmem; /* pda lives in bootmem */
unsigned int __softirq_pending; unsigned int __softirq_pending;
unsigned int __nmi_count; /* number of NMI on this CPUs */ unsigned int __nmi_count; /* number of NMI on this CPUs */
short mmu_state; short mmu_state;
...@@ -37,8 +39,7 @@ struct x8664_pda { ...@@ -37,8 +39,7 @@ struct x8664_pda {
unsigned irq_spurious_count; unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
extern struct x8664_pda *_cpu_pda[]; extern struct x8664_pda **_cpu_pda;
extern struct x8664_pda boot_cpu_pda[];
extern void pda_init(int); extern void pda_init(int);
#define cpu_pda(i) (_cpu_pda[i]) #define cpu_pda(i) (_cpu_pda[i])
......
...@@ -143,4 +143,50 @@ do { \ ...@@ -143,4 +143,50 @@ do { \
#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif /* !CONFIG_X86_64 */ #endif /* !CONFIG_X86_64 */
#ifdef CONFIG_SMP
/*
* Define the "EARLY_PER_CPU" macros. These are used for some per_cpu
* variables that are initialized and accessed before there are per_cpu
* areas allocated.
*/
#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
DEFINE_PER_CPU(_type, _name) = _initvalue; \
__typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \
{ [0 ... NR_CPUS-1] = _initvalue }; \
__typeof__(_type) *_name##_early_ptr = _name##_early_map
#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
#define DECLARE_EARLY_PER_CPU(_type, _name) \
DECLARE_PER_CPU(_type, _name); \
extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]
#define early_per_cpu_ptr(_name) (_name##_early_ptr)
#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
#define early_per_cpu(_name, _cpu) \
(early_per_cpu_ptr(_name) ? \
early_per_cpu_ptr(_name)[_cpu] : \
per_cpu(_name, _cpu))
#else /* !CONFIG_SMP */
#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
DEFINE_PER_CPU(_type, _name) = _initvalue
#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
#define DECLARE_EARLY_PER_CPU(_type, _name) \
DECLARE_PER_CPU(_type, _name)
#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
#define early_per_cpu_ptr(_name) NULL
/* no early_per_cpu_map() */
#endif /* !CONFIG_SMP */
#endif /* _ASM_X86_PERCPU_H_ */ #endif /* _ASM_X86_PERCPU_H_ */
...@@ -29,21 +29,12 @@ extern int smp_num_siblings; ...@@ -29,21 +29,12 @@ extern int smp_num_siblings;
extern unsigned int num_processors; extern unsigned int num_processors;
extern cpumask_t cpu_initialized; extern cpumask_t cpu_initialized;
#ifdef CONFIG_SMP
extern u16 x86_cpu_to_apicid_init[];
extern u16 x86_bios_cpu_apicid_init[];
extern void *x86_cpu_to_apicid_early_ptr;
extern void *x86_bios_cpu_apicid_early_ptr;
#else
#define x86_cpu_to_apicid_early_ptr NULL
#define x86_bios_cpu_apicid_early_ptr NULL
#endif
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map);
DECLARE_PER_CPU(u16, cpu_llc_id); DECLARE_PER_CPU(u16, cpu_llc_id);
DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
/* Static state in head.S used to set up a CPU */ /* Static state in head.S used to set up a CPU */
extern struct { extern struct {
......
...@@ -35,79 +35,88 @@ ...@@ -35,79 +35,88 @@
# endif # endif
#endif #endif
/* Node not present */
#define NUMA_NO_NODE (-1)
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <asm/mpspec.h> #include <asm/mpspec.h>
/* Mappings between logical cpu number and node number */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
extern int cpu_to_node_map[];
#else
/* Returns the number of the current Node. */
#define numa_node_id() (early_cpu_to_node(raw_smp_processor_id()))
#endif
DECLARE_PER_CPU(int, x86_cpu_to_node_map);
#ifdef CONFIG_SMP
extern int x86_cpu_to_node_map_init[];
extern void *x86_cpu_to_node_map_early_ptr;
#else
#define x86_cpu_to_node_map_early_ptr NULL
#endif
/* Mappings between node number and cpus on that node. */
extern cpumask_t node_to_cpumask_map[]; extern cpumask_t node_to_cpumask_map[];
#define NUMA_NO_NODE (-1) /* Mappings between logical cpu number and node number */
extern int cpu_to_node_map[];
/* Returns the number of the node containing CPU 'cpu' */ /* Returns the number of the node containing CPU 'cpu' */
#ifdef CONFIG_X86_32
#define early_cpu_to_node(cpu) cpu_to_node(cpu)
static inline int cpu_to_node(int cpu) static inline int cpu_to_node(int cpu)
{ {
return cpu_to_node_map[cpu]; return cpu_to_node_map[cpu];
} }
#define early_cpu_to_node(cpu) cpu_to_node(cpu)
/* Returns a bitmask of CPUs on Node 'node'. */
static inline cpumask_t node_to_cpumask(int node)
{
return node_to_cpumask_map[node];
}
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
#ifdef CONFIG_SMP /* Mappings between node number and cpus on that node. */
extern cpumask_t *node_to_cpumask_map;
/* Mappings between logical cpu number and node number */
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
/* Returns the number of the current Node. */
#define numa_node_id() read_pda(nodenumber)
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
extern int cpu_to_node(int cpu);
extern int early_cpu_to_node(int cpu);
extern cpumask_t *_node_to_cpumask_ptr(int node);
extern cpumask_t node_to_cpumask(int node);
#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
/* Returns the number of the node containing CPU 'cpu' */
static inline int cpu_to_node(int cpu)
{
return per_cpu(x86_cpu_to_node_map, cpu);
}
/* Same function but used if called before per_cpu areas are setup */
static inline int early_cpu_to_node(int cpu) static inline int early_cpu_to_node(int cpu)
{ {
int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; if (early_per_cpu_ptr(x86_cpu_to_node_map))
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
if (cpu_to_node_map)
return cpu_to_node_map[cpu];
else if (per_cpu_offset(cpu))
return per_cpu(x86_cpu_to_node_map, cpu); return per_cpu(x86_cpu_to_node_map, cpu);
else
return NUMA_NO_NODE;
} }
#else
#define early_cpu_to_node(cpu) cpu_to_node(cpu)
#endif
static inline int cpu_to_node(int cpu) /* Returns a pointer to the cpumask of CPUs on Node 'node'. */
static inline cpumask_t *_node_to_cpumask_ptr(int node)
{ {
#ifdef CONFIG_DEBUG_PER_CPU_MAPS return &node_to_cpumask_map[node];
if (x86_cpu_to_node_map_early_ptr) {
printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n",
(int)cpu);
dump_stack();
return ((int *)x86_cpu_to_node_map_early_ptr)[cpu];
}
#endif
return per_cpu(x86_cpu_to_node_map, cpu);
} }
#ifdef CONFIG_NUMA /* Returns a bitmask of CPUs on Node 'node'. */
static inline cpumask_t node_to_cpumask(int node)
{
return node_to_cpumask_map[node];
}
/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
/* Replace default node_to_cpumask_ptr with optimized version */
#define node_to_cpumask_ptr(v, node) \ #define node_to_cpumask_ptr(v, node) \
cpumask_t *v = &(node_to_cpumask_map[node]) cpumask_t *v = _node_to_cpumask_ptr(node)
#define node_to_cpumask_ptr_next(v, node) \ #define node_to_cpumask_ptr_next(v, node) \
v = &(node_to_cpumask_map[node]) v = _node_to_cpumask_ptr(node)
#endif
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
...@@ -117,20 +126,6 @@ static inline int cpu_to_node(int cpu) ...@@ -117,20 +126,6 @@ static inline int cpu_to_node(int cpu)
*/ */
#define parent_node(node) (node) #define parent_node(node) (node)
/* Returns a bitmask of CPUs on Node 'node'. */
static inline cpumask_t node_to_cpumask(int node)
{
return node_to_cpumask_map[node];
}
/* Returns the number of the first CPU on Node 'node'. */
static inline int node_to_first_cpu(int node)
{
cpumask_t mask = node_to_cpumask(node);
return first_cpu(mask);
}
#define pcibus_to_node(bus) __pcibus_to_node(bus) #define pcibus_to_node(bus) __pcibus_to_node(bus)
#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus) #define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
...@@ -180,12 +175,44 @@ extern int __node_distance(int, int); ...@@ -180,12 +175,44 @@ extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b) #define node_distance(a, b) __node_distance(a, b)
#endif #endif
#else /* CONFIG_NUMA */ #else /* !CONFIG_NUMA */
#define numa_node_id() 0
#define cpu_to_node(cpu) 0
#define early_cpu_to_node(cpu) 0
static inline cpumask_t *_node_to_cpumask_ptr(int node)
{
return &cpu_online_map;
}
static inline cpumask_t node_to_cpumask(int node)
{
return cpu_online_map;
}
static inline int node_to_first_cpu(int node)
{
return first_cpu(cpu_online_map);
}
/* Replace default node_to_cpumask_ptr with optimized version */
#define node_to_cpumask_ptr(v, node) \
cpumask_t *v = _node_to_cpumask_ptr(node)
#define node_to_cpumask_ptr_next(v, node) \
v = _node_to_cpumask_ptr(node)
#endif #endif
#include <asm-generic/topology.h> #include <asm-generic/topology.h>
#ifdef CONFIG_NUMA
/* Returns the number of the first CPU on Node 'node'. */
static inline int node_to_first_cpu(int node)
{
node_to_cpumask_ptr(mask, node);
return first_cpu(*mask);
}
#endif
extern cpumask_t cpu_coregroup_map(int cpu); extern cpumask_t cpu_coregroup_map(int cpu);
#ifdef ENABLE_TOPO_DEFINES #ifdef ENABLE_TOPO_DEFINES
...@@ -193,6 +220,9 @@ extern cpumask_t cpu_coregroup_map(int cpu); ...@@ -193,6 +220,9 @@ extern cpumask_t cpu_coregroup_map(int cpu);
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
/* indicates that pointers to the topology cpumask_t maps are valid */
#define arch_provides_topology_pointers yes
#endif #endif
static inline void arch_fix_phys_package_id(int num, u32 slot) static inline void arch_fix_phys_package_id(int num, u32 slot)
...@@ -220,4 +250,4 @@ static inline void set_mp_bus_to_node(int busnum, int node) ...@@ -220,4 +250,4 @@ static inline void set_mp_bus_to_node(int busnum, int node)
} }
#endif #endif
#endif #endif /* _ASM_X86_TOPOLOGY_H */
...@@ -1026,6 +1026,7 @@ extern void mem_init(void); ...@@ -1026,6 +1026,7 @@ extern void mem_init(void);
extern void show_mem(void); extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val); extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid); extern void si_meminfo_node(struct sysinfo *val, int nid);
extern int after_bootmem;
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
extern void setup_per_cpu_pageset(void); extern void setup_per_cpu_pageset(void);
......
...@@ -6538,9 +6538,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) ...@@ -6538,9 +6538,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
min_val = INT_MAX; min_val = INT_MAX;
for (i = 0; i < MAX_NUMNODES; i++) { for (i = 0; i < nr_node_ids; i++) {
/* Start at @node */ /* Start at @node */
n = (node + i) % MAX_NUMNODES; n = (node + i) % nr_node_ids;
if (!nr_cpus_node(n)) if (!nr_cpus_node(n))
continue; continue;
...@@ -6734,7 +6734,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) ...@@ -6734,7 +6734,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
if (!sched_group_nodes) if (!sched_group_nodes)
continue; continue;
for (i = 0; i < MAX_NUMNODES; i++) { for (i = 0; i < nr_node_ids; i++) {
struct sched_group *oldsg, *sg = sched_group_nodes[i]; struct sched_group *oldsg, *sg = sched_group_nodes[i];
*nodemask = node_to_cpumask(i); *nodemask = node_to_cpumask(i);
...@@ -6927,7 +6927,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, ...@@ -6927,7 +6927,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
/* /*
* Allocate the per-node list of sched groups * Allocate the per-node list of sched groups
*/ */
sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *),
GFP_KERNEL); GFP_KERNEL);
if (!sched_group_nodes) { if (!sched_group_nodes) {
printk(KERN_WARNING "Can not alloc sched group node list\n"); printk(KERN_WARNING "Can not alloc sched group node list\n");
...@@ -7066,7 +7066,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, ...@@ -7066,7 +7066,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
#endif #endif
/* Set up physical groups */ /* Set up physical groups */
for (i = 0; i < MAX_NUMNODES; i++) { for (i = 0; i < nr_node_ids; i++) {
SCHED_CPUMASK_VAR(nodemask, allmasks); SCHED_CPUMASK_VAR(nodemask, allmasks);
SCHED_CPUMASK_VAR(send_covered, allmasks); SCHED_CPUMASK_VAR(send_covered, allmasks);
...@@ -7090,7 +7090,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, ...@@ -7090,7 +7090,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
send_covered, tmpmask); send_covered, tmpmask);
} }
for (i = 0; i < MAX_NUMNODES; i++) { for (i = 0; i < nr_node_ids; i++) {
/* Set up node groups */ /* Set up node groups */
struct sched_group *sg, *prev; struct sched_group *sg, *prev;
SCHED_CPUMASK_VAR(nodemask, allmasks); SCHED_CPUMASK_VAR(nodemask, allmasks);
...@@ -7129,9 +7129,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map, ...@@ -7129,9 +7129,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
cpus_or(*covered, *covered, *nodemask); cpus_or(*covered, *covered, *nodemask);
prev = sg; prev = sg;
for (j = 0; j < MAX_NUMNODES; j++) { for (j = 0; j < nr_node_ids; j++) {
SCHED_CPUMASK_VAR(notcovered, allmasks); SCHED_CPUMASK_VAR(notcovered, allmasks);
int n = (i + j) % MAX_NUMNODES; int n = (i + j) % nr_node_ids;
node_to_cpumask_ptr(pnodemask, n); node_to_cpumask_ptr(pnodemask, n);
cpus_complement(*notcovered, *covered); cpus_complement(*notcovered, *covered);
...@@ -7184,7 +7184,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, ...@@ -7184,7 +7184,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
} }
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
for (i = 0; i < MAX_NUMNODES; i++) for (i = 0; i < nr_node_ids; i++)
init_numa_sched_groups_power(sched_group_nodes[i]); init_numa_sched_groups_power(sched_group_nodes[i]);
if (sd_allnodes) { if (sd_allnodes) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment