Commit 7a9a3e86 authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] sched: fix SCHED_SMT & numa=fake=2 lockup

This patch fixes an interaction between the numa=fake=<domains> feature,
the domain setup code and cpu_siblings_map[].  The bug leads to a bootup
crash when using numa=fake=2 on a 2-way/4-way SMP+HT box.

When SCHED_SMT is turned on the domains-setup code relies on siblings not
spanning multiple domains (which makes perfect sense).  But numa=fake=2
creates an assymetric 1101/0010 splitup between CPUs, which results in two
siblings being on different nodes.

The patch adds a check_siblings_map() function that checks the sibling maps
and fixes them up if they violate this rule.  (it also prints a warning in
that case.)

The patch also turns SCHED_DOMAIN_DEBUG back on - had this been enabled
we'd have noticed this bug much earlier.

From: Badari Pulavarty <pbadari@us.ibm.com>

  arch/x86_64/mm/numa.c: In function `numa_setup':
  arch/x86_64/mm/numa.c:332: error: `numa_fake' undeclared (first use in this function)
  arch/x86_64/mm/numa.c:332: error: (Each undeclared identifier is reported only once
  arch/x86_64/mm/numa.c:332: error: for each function it appears in.)
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent ad5f30c4
......@@ -264,11 +264,13 @@ __init int numa_setup(char *opt)
{
if (!strcmp(opt,"off"))
numa_off = 1;
#ifdef CONFIG_NUMA_EMU
if(!strncmp(opt, "fake=", 5)) {
numa_fake = simple_strtoul(opt+5,NULL,0); ;
if (numa_fake >= MAX_NUMNODES)
numa_fake = MAX_NUMNODES;
}
#endif
return 1;
}
......
......@@ -4189,6 +4189,30 @@ static int __devinit cpu_to_node_group(int cpu)
}
#endif
#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
/*
* The domains setup code relies on siblings not spanning
* multiple nodes. Make sure the architecture has a proper
* siblings map:
*/
static void check_sibling_maps(void)
{
int i, j;
for_each_online_cpu(i) {
for_each_cpu_mask(j, cpu_sibling_map[i]) {
if (cpu_to_node(i) != cpu_to_node(j)) {
printk(KERN_INFO "warning: CPU %d siblings map "
"to different node - isolating "
"them.\n", i);
cpu_sibling_map[i] = cpumask_of_cpu(i);
break;
}
}
}
}
#endif
/*
* Set up scheduler domains and groups. Callers must hold the hotplug lock.
*/
......@@ -4197,6 +4221,9 @@ static void __devinit arch_init_sched_domains(void)
int i;
cpumask_t cpu_default_map;
#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
check_sibling_maps();
#endif
/*
* Setup mask for cpus without special case scheduling requirements.
* For now this just excludes isolated cpus, but could be used to
......@@ -4319,7 +4346,7 @@ static void __devinit arch_destroy_sched_domains(void)
#endif /* ARCH_HAS_SCHED_DOMAIN */
#undef SCHED_DOMAIN_DEBUG
#define SCHED_DOMAIN_DEBUG
#ifdef SCHED_DOMAIN_DEBUG
static void sched_domain_debug(void)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment