Commit e18e19ad authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] sched: implement domains for i386 HT

From: Nick Piggin <piggin@cyberone.com.au>

The following patch builds a scheduling description for the i386
architecture using cpu_sibling_map to set up SMT if CONFIG_SCHED_SMT is
set.

It could be made more fancy and collapse degenerate domains at runtime (ie.
1 sibling per CPU, or 1 NUMA node in the computer).


From: Zwane Mwaikambo <zwane@arm.linux.org.uk>

   This fixes an oops due to cpu_sibling_map being uninitialised when a
   system with no MP table (most UP boxen) boots a CONFIG_SMT kernel.  What
   also happens is that the cpu_group lists end up not being terminated
   properly, but this oops kills it first.  Patch tested on UP w/o MP table,
   2x P2 and UP Xeon w/ no siblings.

From: "Martin J. Bligh" <mbligh@aracnet.com>,
      Nick Piggin <piggin@cyberone.com.au>

   Change arch_init_sched_domains to use cpu_online_map

From: Anton Blanchard <anton@samba.org>

   Fix build with NR_CPUS > BITS_PER_LONG
parent 7a1dc0ea
...@@ -479,6 +479,16 @@ config NR_CPUS ...@@ -479,6 +479,16 @@ config NR_CPUS
This is purely to save memory - each supported CPU adds This is purely to save memory - each supported CPU adds
approximately eight kilobytes to the kernel image. approximately eight kilobytes to the kernel image.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on SMP
default off
help
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
cost of slightly increased overhead in some places. If unsure say
N here.
config PREEMPT config PREEMPT
bool "Preemptible Kernel" bool "Preemptible Kernel"
help help
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/irq.h> #include <linux/irq.h>
...@@ -955,6 +956,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ...@@ -955,6 +956,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
current_thread_info()->cpu = 0; current_thread_info()->cpu = 0;
smp_tune_scheduling(); smp_tune_scheduling();
cpus_clear(cpu_sibling_map[0]);
cpu_set(0, cpu_sibling_map[0]);
/* /*
* If we couldn't find an SMP configuration at boot time, * If we couldn't find an SMP configuration at boot time,
...@@ -1085,7 +1088,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ...@@ -1085,7 +1088,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
* efficiently. * efficiently.
*/ */
for (cpu = 0; cpu < NR_CPUS; cpu++) for (cpu = 0; cpu < NR_CPUS; cpu++)
cpu_sibling_map[cpu] = CPU_MASK_NONE; cpus_clear(cpu_sibling_map[cpu]);
for (cpu = 0; cpu < NR_CPUS; cpu++) { for (cpu = 0; cpu < NR_CPUS; cpu++) {
int siblings = 0; int siblings = 0;
...@@ -1122,6 +1125,207 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ...@@ -1122,6 +1125,207 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
synchronize_tsc_bp(); synchronize_tsc_bp();
} }
#ifdef CONFIG_SCHED_SMT
#ifdef CONFIG_NUMA
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static struct sched_group sched_group_nodes[MAX_NUMNODES];
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
static DEFINE_PER_CPU(struct sched_domain, node_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first_cpu = NULL, *last_cpu = NULL;
/* Set up domains */
for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
struct sched_domain *node_domain = &per_cpu(node_domains, i);
int node = cpu_to_node(i);
cpumask_t nodemask = node_to_cpumask(node);
*cpu_domain = SD_SIBLING_INIT;
cpu_domain->span = cpu_sibling_map[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = nodemask;
phys_domain->flags |= SD_FLAG_IDLE;
*node_domain = SD_NODE_INIT;
node_domain->span = cpu_online_map;
}
/* Set up CPU (sibling) groups */
for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i);
int j;
first_cpu = last_cpu = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpu->cpumask = CPU_MASK_NONE;
cpu_set(j, cpu->cpumask);
if (!first_cpu)
first_cpu = cpu;
if (last_cpu)
last_cpu->next = cpu;
last_cpu = cpu;
}
last_cpu->next = first_cpu;
}
for (i = 0; i < MAX_NUMNODES; i++) {
int j;
cpumask_t nodemask;
cpus_and(nodemask, node_to_cpumask(i), cpu_online_map);
if (cpus_empty(nodemask))
continue;
first_cpu = last_cpu = NULL;
/* Set up physical groups */
for_each_cpu_mask(j, nodemask) {
struct sched_domain *cpu_domain = cpu_sched_domain(j);
struct sched_group *cpu = &sched_group_phys[j];
if (j != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
if (!first_cpu)
first_cpu = cpu;
if (last_cpu)
last_cpu->next = cpu;
last_cpu = cpu;
}
last_cpu->next = first_cpu;
}
/* Set up nodes */
first_cpu = last_cpu = NULL;
for (i = 0; i < MAX_NUMNODES; i++) {
struct sched_group *cpu = &sched_group_nodes[i];
cpumask_t nodemask;
cpus_and(nodemask, node_to_cpumask(i), cpu_online_map);
if (cpus_empty(nodemask))
continue;
cpu->cpumask = nodemask;
if (!first_cpu)
first_cpu = cpu;
if (last_cpu)
last_cpu->next = cpu;
last_cpu = cpu;
}
last_cpu->next = first_cpu;
mb();
for_each_cpu_mask(i, cpu_online_map) {
int node = cpu_to_node(i);
struct sched_domain *cpu_domain = cpu_sched_domain(i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
struct sched_domain *node_domain = &per_cpu(node_domains, i);
struct sched_group *cpu_group = &sched_group_cpus[i];
struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)];
struct sched_group *node_group = &sched_group_nodes[node];
cpu_domain->parent = phys_domain;
phys_domain->parent = node_domain;
node_domain->groups = node_group;
phys_domain->groups = phys_group;
cpu_domain->groups = cpu_group;
}
}
#else /* CONFIG_NUMA */
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first_cpu = NULL, *last_cpu = NULL;
/* Set up domains */
for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
*cpu_domain = SD_SIBLING_INIT;
cpu_domain->span = cpu_sibling_map[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = cpu_online_map;
phys_domain->flags |= SD_FLAG_IDLE;
}
/* Set up CPU (sibling) groups */
for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i);
int j;
first_cpu = last_cpu = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
if (!first_cpu)
first_cpu = cpu;
if (last_cpu)
last_cpu->next = cpu;
last_cpu = cpu;
}
last_cpu->next = first_cpu;
}
first_cpu = last_cpu = NULL;
/* Set up physical groups */
for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i);
struct sched_group *cpu = &sched_group_phys[i];
if (i != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
if (!first_cpu)
first_cpu = cpu;
if (last_cpu)
last_cpu->next = cpu;
last_cpu = cpu;
}
last_cpu->next = first_cpu;
mb();
for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
struct sched_group *cpu_group = &sched_group_cpus[i];
struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)];
cpu_domain->parent = phys_domain;
phys_domain->groups = phys_group;
cpu_domain->groups = cpu_group;
}
}
#endif /* CONFIG_NUMA */
#endif /* CONFIG_SCHED_SMT */
/* These are wrappers to interface to the new boot process. Someone /* These are wrappers to interface to the new boot process. Someone
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
void __init smp_prepare_cpus(unsigned int max_cpus) void __init smp_prepare_cpus(unsigned int max_cpus)
......
...@@ -648,4 +648,9 @@ extern inline void prefetchw(const void *x) ...@@ -648,4 +648,9 @@ extern inline void prefetchw(const void *x)
extern void select_idle_routine(const struct cpuinfo_x86 *c); extern void select_idle_routine(const struct cpuinfo_x86 *c);
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_DOMAIN
#define ARCH_HAS_SCHED_WAKE_BALANCE
#endif
#endif /* __ASM_I386_PROCESSOR_H */ #endif /* __ASM_I386_PROCESSOR_H */
...@@ -572,6 +572,22 @@ struct sched_domain { ...@@ -572,6 +572,22 @@ struct sched_domain {
unsigned int nr_balance_failed; /* initialise to 0 */ unsigned int nr_balance_failed; /* initialise to 0 */
}; };
/* Common values for SMT siblings */
#define SD_SIBLING_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.groups = NULL, \
.min_interval = 1, \
.max_interval = 2, \
.busy_factor = 8, \
.imbalance_pct = 110, \
.cache_hot_time = 0, \
.cache_nice_tries = 0, \
.flags = SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE | SD_FLAG_WAKE,\
.balance_interval = 1, \
.nr_balance_failed = 0, \
}
/* Common values for CPUs */ /* Common values for CPUs */
#define SD_CPU_INIT (struct sched_domain) { \ #define SD_CPU_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \ .span = CPU_MASK_NONE, \
......
...@@ -3295,28 +3295,20 @@ DEFINE_PER_CPU(struct sched_domain, node_domains); ...@@ -3295,28 +3295,20 @@ DEFINE_PER_CPU(struct sched_domain, node_domains);
static void __init arch_init_sched_domains(void) static void __init arch_init_sched_domains(void)
{ {
int i; int i;
cpumask_t all_cpus = CPU_MASK_NONE;
struct sched_group *first_node = NULL, *last_node = NULL; struct sched_group *first_node = NULL, *last_node = NULL;
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_possible(i))
continue;
cpu_set(i, all_cpus);
}
/* Set up domains */ /* Set up domains */
for_each_cpu_mask(i, all_cpus) { for_each_cpu_mask(i, cpu_online_map) {
int node = cpu_to_node(i); int node = cpu_to_node(i);
cpumask_t nodemask = node_to_cpumask(node); cpumask_t nodemask = node_to_cpumask(node);
struct sched_domain *node_domain = &per_cpu(node_domains, i); struct sched_domain *node_domain = &per_cpu(node_domains, i);
struct sched_domain *cpu_domain = cpu_sched_domain(i); struct sched_domain *cpu_domain = cpu_sched_domain(i);
*node_domain = SD_NODE_INIT; *node_domain = SD_NODE_INIT;
node_domain->span = all_cpus; node_domain->span = cpu_online_map;
*cpu_domain = SD_CPU_INIT; *cpu_domain = SD_CPU_INIT;
cpus_and(cpu_domain->span, nodemask, all_cpus); cpus_and(cpu_domain->span, nodemask, cpu_online_map);
cpu_domain->parent = node_domain; cpu_domain->parent = node_domain;
} }
...@@ -3326,8 +3318,9 @@ static void __init arch_init_sched_domains(void) ...@@ -3326,8 +3318,9 @@ static void __init arch_init_sched_domains(void)
int j; int j;
cpumask_t nodemask; cpumask_t nodemask;
struct sched_group *node = &sched_group_nodes[i]; struct sched_group *node = &sched_group_nodes[i];
cpumask_t tmp = node_to_cpumask(i);
cpus_and(nodemask, node_to_cpumask(i), all_cpus); cpus_and(nodemask, tmp, cpu_online_map);
if (cpus_empty(nodemask)) if (cpus_empty(nodemask))
continue; continue;
...@@ -3357,7 +3350,7 @@ static void __init arch_init_sched_domains(void) ...@@ -3357,7 +3350,7 @@ static void __init arch_init_sched_domains(void)
last_node->next = first_node; last_node->next = first_node;
mb(); mb();
for_each_cpu_mask(i, all_cpus) { for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *node_domain = &per_cpu(node_domains, i); struct sched_domain *node_domain = &per_cpu(node_domains, i);
struct sched_domain *cpu_domain = cpu_sched_domain(i); struct sched_domain *cpu_domain = cpu_sched_domain(i);
node_domain->groups = &sched_group_nodes[cpu_to_node(i)]; node_domain->groups = &sched_group_nodes[cpu_to_node(i)];
...@@ -3369,26 +3362,18 @@ static void __init arch_init_sched_domains(void) ...@@ -3369,26 +3362,18 @@ static void __init arch_init_sched_domains(void)
static void __init arch_init_sched_domains(void) static void __init arch_init_sched_domains(void)
{ {
int i; int i;
cpumask_t all_cpus = CPU_MASK_NONE;
struct sched_group *first_cpu = NULL, *last_cpu = NULL; struct sched_group *first_cpu = NULL, *last_cpu = NULL;
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_possible(i))
continue;
cpu_set(i, all_cpus);
}
/* Set up domains */ /* Set up domains */
for_each_cpu_mask(i, all_cpus) { for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i); struct sched_domain *cpu_domain = cpu_sched_domain(i);
*cpu_domain = SD_CPU_INIT; *cpu_domain = SD_CPU_INIT;
cpu_domain->span = all_cpus; cpu_domain->span = cpu_online_map;
} }
/* Set up CPU groups */ /* Set up CPU groups */
for_each_cpu_mask(i, all_cpus) { for_each_cpu_mask(i, cpu_online_map) {
struct sched_group *cpu = &sched_group_cpus[i]; struct sched_group *cpu = &sched_group_cpus[i];
cpus_clear(cpu->cpumask); cpus_clear(cpu->cpumask);
...@@ -3403,7 +3388,7 @@ static void __init arch_init_sched_domains(void) ...@@ -3403,7 +3388,7 @@ static void __init arch_init_sched_domains(void)
last_cpu->next = first_cpu; last_cpu->next = first_cpu;
mb(); mb();
for_each_cpu_mask(i, all_cpus) { for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i); struct sched_domain *cpu_domain = cpu_sched_domain(i);
cpu_domain->groups = &sched_group_cpus[i]; cpu_domain->groups = &sched_group_cpus[i];
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment