Commit 8a7a2318 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] sched: consolidate sched domains

  Teach the generic domains builder about SMT, and consolidate all
  architecture specific domain code into that.  Also, the SD_*_INIT macros can
  now be redefined by arch code without duplicating the entire setup code. 
  This can be done by defining ARCH_HASH_SCHED_TUNE.

  The generic builder has been simplified with the addition of a helper
  macro which will probably prove to be useful to arch specific code as well
  and should be exported if that is the case.
Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>

From: Matthew Dobson <colpatch@us.ibm.com>

  The attached patch is against 2.6.8-rc2-mm2, and removes Nick's
  conditional definition & population of cpu_sibling_map[] in favor of my
  unconditional ones.  This does not affect how cpu_sibling_map is used, just
  gives it broader scope.

From: Nick Piggin <nickpiggin@yahoo.com.au>

  Small fix to sched-consolidate-domains.patch picked up by

From: Suresh <suresh.b.siddha@intel.com>

  another sched consolidate domains fix

From: Nick Piggin <nickpiggin@yahoo.com.au>

  Don't use cpu_sibling_map if !CONFIG_SCHED_SMT

  This one spotted by Dimitri Sivanich <sivanich@sgi.com>
Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent c62e7cdb
......@@ -5,12 +5,13 @@ MUST be NULL terminated, and domain structures should be per-CPU as they
are locklessly updated.
Each scheduling domain spans a number of CPUs (stored in the ->span field).
A domain's span MUST be a superset of it child's span, and a base domain
for CPU i MUST span at least i. The top domain for each CPU will generally
span all CPUs in the system although strictly it doesn't have to, but this
could lead to a case where some CPUs will never be given tasks to run unless
the CPUs allowed mask is explicitly set. A sched domain's span means "balance
process load among these CPUs".
A domain's span MUST be a superset of it child's span (this restriction could
be relaxed if the need arises), and a base domain for CPU i MUST span at least
i. The top domain for each CPU will generally span all CPUs in the system
although strictly it doesn't have to, but this could lead to a case where some
CPUs will never be given tasks to run unless the CPUs allowed mask is
explicitly set. A sched domain's span means "balance process load among these
CPUs".
Each scheduling domain must have one or more CPU groups (struct sched_group)
which are organised as a circular one way linked list from the ->groups
......@@ -46,6 +47,20 @@ The implementor should read comments in include/linux/sched.h:
struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of
the specifics and what to tune.
For SMT, the architecture must define CONFIG_SCHED_SMT and provide a
cpumask_t cpu_sibling_map[NR_CPUS], where cpu_sibling_map[i] is the mask of
all "i"'s siblings as well as "i" itself.
Architectures may retain the regular override the default SD_*_INIT flags
while using the generic domain builder in kernel/sched.c if they wish to
retain the traditional SMT->SMP->NUMA topology (or some subset of that). This
can be done by #define'ing ARCH_HASH_SCHED_TUNE.
Alternatively, the architecture may completely override the generic domain
builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your
arch_init_sched_domains function. This function will attach domains to all
CPUs using cpu_attach_domain.
Implementors should change the line
#undef SCHED_DOMAIN_DEBUG
to
......
......@@ -1135,213 +1135,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
synchronize_tsc_bp();
}
#ifdef CONFIG_SCHED_SMT
#ifdef CONFIG_NUMA
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static struct sched_group sched_group_nodes[MAX_NUMNODES];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
static DEFINE_PER_CPU(struct sched_domain, node_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
struct sched_domain *node_domain = &per_cpu(node_domains, i);
int node = cpu_to_node(i);
cpumask_t nodemask = node_to_cpumask(node);
*cpu_domain = SD_SIBLING_INIT;
cpu_domain->span = cpu_sibling_map[i];
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = nodemask;
phys_domain->parent = node_domain;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
*node_domain = SD_NODE_INIT;
node_domain->span = cpu_possible_map;
node_domain->groups = &sched_group_nodes[cpu_to_node(i)];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpu->cpumask = CPU_MASK_NONE;
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
for (i = 0; i < MAX_NUMNODES; i++) {
int j;
cpumask_t nodemask;
struct sched_group *node = &sched_group_nodes[i];
cpumask_t node_cpumask = node_to_cpumask(i);
cpus_and(nodemask, node_cpumask, cpu_possible_map);
if (cpus_empty(nodemask))
continue;
first = last = NULL;
/* Set up physical groups */
for_each_cpu_mask(j, nodemask) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j);
struct sched_group *cpu = &sched_group_phys[j];
if (j != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/*
* Make each extra sibling increase power by 10% of
* the basic CPU. This is very arbitrary.
*/
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
node->cpu_power += cpu->cpu_power;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
/* Set up nodes */
first = last = NULL;
for (i = 0; i < MAX_NUMNODES; i++) {
struct sched_group *cpu = &sched_group_nodes[i];
cpumask_t nodemask;
cpumask_t node_cpumask = node_to_cpumask(i);
cpus_and(nodemask, node_cpumask, cpu_possible_map);
if (cpus_empty(nodemask))
continue;
cpu->cpumask = nodemask;
/* ->cpu_power already setup */
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#else /* !CONFIG_NUMA */
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
*cpu_domain = SD_SIBLING_INIT;
cpu_domain->span = cpu_sibling_map[i];
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = cpu_possible_map;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
first = last = NULL;
/* Set up physical groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_group *cpu = &sched_group_phys[i];
if (i != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/* See SMT+NUMA setup for comment */
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#endif /* CONFIG_NUMA */
#endif /* CONFIG_SCHED_SMT */
/* These are wrappers to interface to the new boot process. Someone
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
void __init smp_prepare_cpus(unsigned int max_cpus)
......
......@@ -1006,218 +1006,3 @@ void __init smp_cpus_done(unsigned int max_cpus)
set_cpus_allowed(current, old_mask);
}
#ifdef CONFIG_SCHED_SMT
#ifdef CONFIG_NUMA
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static struct sched_group sched_group_nodes[MAX_NUMNODES];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
static DEFINE_PER_CPU(struct sched_domain, node_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
struct sched_domain *node_domain = &per_cpu(node_domains, i);
int node = cpu_to_node(i);
cpumask_t nodemask = node_to_cpumask(node);
cpumask_t my_cpumask = cpumask_of_cpu(i);
cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
*cpu_domain = SD_SIBLING_INIT;
if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
else
cpu_domain->span = my_cpumask;
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = nodemask;
phys_domain->parent = node_domain;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
*node_domain = SD_NODE_INIT;
node_domain->span = cpu_possible_map;
node_domain->groups = &sched_group_nodes[node];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
for (i = 0; i < MAX_NUMNODES; i++) {
int j;
cpumask_t nodemask;
struct sched_group *node = &sched_group_nodes[i];
cpumask_t node_cpumask = node_to_cpumask(i);
cpus_and(nodemask, node_cpumask, cpu_possible_map);
if (cpus_empty(nodemask))
continue;
first = last = NULL;
/* Set up physical groups */
for_each_cpu_mask(j, nodemask) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j);
struct sched_group *cpu = &sched_group_phys[j];
if (j != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/*
* Make each extra sibling increase power by 10% of
* the basic CPU. This is very arbitrary.
*/
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
node->cpu_power += cpu->cpu_power;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
/* Set up nodes */
first = last = NULL;
for (i = 0; i < MAX_NUMNODES; i++) {
struct sched_group *cpu = &sched_group_nodes[i];
cpumask_t nodemask;
cpumask_t node_cpumask = node_to_cpumask(i);
cpus_and(nodemask, node_cpumask, cpu_possible_map);
if (cpus_empty(nodemask))
continue;
cpu->cpumask = nodemask;
/* ->cpu_power already setup */
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#else /* !CONFIG_NUMA */
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
cpumask_t my_cpumask = cpumask_of_cpu(i);
cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
*cpu_domain = SD_SIBLING_INIT;
if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
else
cpu_domain->span = my_cpumask;
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = cpu_possible_map;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
first = last = NULL;
/* Set up physical groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_group *cpu = &sched_group_phys[i];
if (i != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/* See SMT+NUMA setup for comment */
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#endif /* CONFIG_NUMA */
#endif /* CONFIG_SCHED_SMT */
......@@ -25,7 +25,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_SCHED_SMT) += domain.o
obj-$(CONFIG_MODULES) += module.o
......
......@@ -25,7 +25,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_SCHED_SMT) += domain.o
obj-$(CONFIG_MODULES) += module.o
......
#include <linux/init.h>
#include <linux/sched.h>
/* Don't do any NUMA setup on Opteron right now. They seem to be
better off with flat scheduling. This is just for SMT. */
#ifdef CONFIG_SCHED_SMT
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
*cpu_domain = SD_SIBLING_INIT;
/* Disable SMT NICE for CMP */
/* RED-PEN use a generic flag */
if (cpu_data[i].x86_vendor == X86_VENDOR_AMD)
cpu_domain->flags &= ~SD_SHARE_CPUPOWER;
cpu_domain->span = cpu_sibling_map[i];
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = cpu_possible_map;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
first = last = NULL;
/* Set up physical groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_group *cpu = &sched_group_phys[i];
if (i != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/*
* Make each extra sibling increase power by 10% of
* the basic CPU. This is very arbitrary.
*/
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#endif
......@@ -647,9 +647,4 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c);
#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_DOMAIN
#define ARCH_HAS_SCHED_WAKE_IDLE
#endif
#endif /* __ASM_I386_PROCESSOR_H */
......@@ -626,11 +626,6 @@ static inline void prefetchw(const void *x)
#define spin_lock_prefetch(x) prefetchw(x)
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_DOMAIN
#define ARCH_HAS_SCHED_WAKE_IDLE
#endif
#endif /* ASSEMBLY */
/*
......
......@@ -456,9 +456,4 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_DOMAIN
#define ARCH_HAS_SCHED_WAKE_IDLE
#endif
#endif /* __ASM_X86_64_PROCESSOR_H */
......@@ -612,6 +612,9 @@ struct sched_domain {
unsigned int nr_balance_failed; /* initialise to 0 */
};
#ifndef ARCH_HAS_SCHED_TUNE
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_WAKE_IDLE
/* Common values for SMT siblings */
#define SD_SIBLING_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
......@@ -633,6 +636,7 @@ struct sched_domain {
.balance_interval = 1, \
.nr_balance_failed = 0, \
}
#endif
/* Common values for CPUs */
#define SD_CPU_INIT (struct sched_domain) { \
......@@ -675,6 +679,7 @@ struct sched_domain {
.nr_balance_failed = 0, \
}
#endif
#endif /* ARCH_HAS_SCHED_TUNE */
extern void cpu_attach_domain(struct sched_domain *sd, int cpu);
......
......@@ -3659,118 +3659,182 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu)
#ifdef ARCH_HAS_SCHED_DOMAIN
extern void __init arch_init_sched_domains(void);
#else
static struct sched_group sched_group_cpus[NR_CPUS];
#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
#ifdef CONFIG_NUMA
static struct sched_group sched_group_nodes[MAX_NUMNODES];
static DEFINE_PER_CPU(struct sched_domain, node_domains);
static void __init arch_init_sched_domains(void)
static struct sched_group sched_group_cpus[NR_CPUS];
__init static int cpu_to_cpu_group(int cpu)
{
int i;
struct sched_group *first_node = NULL, *last_node = NULL;
return cpu;
}
#endif
/* Set up domains */
for_each_cpu(i) {
int node = cpu_to_node(i);
cpumask_t nodemask = node_to_cpumask(node);
struct sched_domain *node_sd = &per_cpu(node_domains, i);
struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
static struct sched_group sched_group_phys[NR_CPUS];
__init static int cpu_to_phys_group(int cpu)
{
#ifdef CONFIG_SCHED_SMT
return first_cpu(cpu_sibling_map[cpu]);
#else
return cpu;
#endif
}
*node_sd = SD_NODE_INIT;
node_sd->span = cpu_possible_map;
node_sd->groups = &sched_group_nodes[cpu_to_node(i)];
#ifdef CONFIG_NUMA
static DEFINE_PER_CPU(struct sched_domain, node_domains);
static struct sched_group sched_group_nodes[MAX_NUMNODES];
__init static int cpu_to_node_group(int cpu)
{
return cpu_to_node(cpu);
}
#endif
*cpu_sd = SD_CPU_INIT;
cpus_and(cpu_sd->span, nodemask, cpu_possible_map);
cpu_sd->groups = &sched_group_cpus[i];
cpu_sd->parent = node_sd;
}
/*
* init_sched_build_groups takes an array of groups, the cpumask we wish
* to span, and a pointer to a function which identifies what group a CPU
* belongs to. The return value of group_fn must be a valid index into the
* groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we
* keep track of groups covered with a cpumask_t).
*
* init_sched_build_groups will build a circular linked list of the groups
* covered by the given span, and will set each group's ->cpumask correctly,
* and ->cpu_power to 0.
*/
__init static void init_sched_build_groups(struct sched_group groups[],
cpumask_t span, int (*group_fn)(int cpu))
{
struct sched_group *first = NULL, *last = NULL;
cpumask_t covered = CPU_MASK_NONE;
int i;
/* Set up groups */
for (i = 0; i < MAX_NUMNODES; i++) {
cpumask_t tmp = node_to_cpumask(i);
cpumask_t nodemask;
struct sched_group *first_cpu = NULL, *last_cpu = NULL;
struct sched_group *node = &sched_group_nodes[i];
for_each_cpu_mask(i, span) {
int group = group_fn(i);
struct sched_group *sg = &groups[group];
int j;
cpus_and(nodemask, tmp, cpu_possible_map);
if (cpus_empty(nodemask))
if (cpu_isset(i, covered))
continue;
node->cpumask = nodemask;
node->cpu_power = SCHED_LOAD_SCALE * cpus_weight(node->cpumask);
for_each_cpu_mask(j, node->cpumask) {
struct sched_group *cpu = &sched_group_cpus[j];
sg->cpumask = CPU_MASK_NONE;
sg->cpu_power = 0;
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
for_each_cpu_mask(j, span) {
if (group_fn(j) != group)
continue;
if (!first_cpu)
first_cpu = cpu;
if (last_cpu)
last_cpu->next = cpu;
last_cpu = cpu;
cpu_set(j, covered);
cpu_set(j, sg->cpumask);
}
last_cpu->next = first_cpu;
if (!first_node)
first_node = node;
if (last_node)
last_node->next = node;
last_node = node;
}
last_node->next = first_node;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_sd, i);
if (!first)
first = sg;
if (last)
last->next = sg;
last = sg;
}
last->next = first;
}
#else /* !CONFIG_NUMA */
static void __init arch_init_sched_domains(void)
__init static void arch_init_sched_domains(void)
{
int i;
struct sched_group *first_cpu = NULL, *last_cpu = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
int group;
struct sched_domain *sd = NULL, *p;
cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
#ifdef CONFIG_NUMA
sd = &per_cpu(node_domains, i);
group = cpu_to_node_group(i);
*sd = SD_NODE_INIT;
sd->span = cpu_possible_map;
sd->groups = &sched_group_nodes[group];
#endif
*cpu_sd = SD_CPU_INIT;
cpu_sd->span = cpu_possible_map;
cpu_sd->groups = &sched_group_cpus[i];
p = sd;
sd = &per_cpu(phys_domains, i);
group = cpu_to_phys_group(i);
*sd = SD_CPU_INIT;
sd->span = nodemask;
sd->parent = p;
sd->groups = &sched_group_phys[group];
#ifdef CONFIG_SCHED_SMT
p = sd;
sd = &per_cpu(cpu_domains, i);
group = cpu_to_cpu_group(i);
*sd = SD_SIBLING_INIT;
sd->span = cpu_sibling_map[i];
sd->parent = p;
sd->groups = &sched_group_cpus[group];
#endif
}
/* Set up CPU groups */
for_each_cpu_mask(i, cpu_possible_map) {
struct sched_group *cpu = &sched_group_cpus[i];
#ifdef CONFIG_SCHED_SMT
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
if (i != first_cpu(cpu_sibling_map[i]))
continue;
cpus_clear(cpu->cpumask);
cpu_set(i, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
init_sched_build_groups(sched_group_cpus, cpu_sibling_map[i],
&cpu_to_cpu_group);
}
#endif
/* Set up physical groups */
for (i = 0; i < MAX_NUMNODES; i++) {
cpumask_t nodemask = node_to_cpumask(i);
if (!first_cpu)
first_cpu = cpu;
if (last_cpu)
last_cpu->next = cpu;
last_cpu = cpu;
cpus_and(nodemask, nodemask, cpu_possible_map);
if (cpus_empty(nodemask))
continue;
init_sched_build_groups(sched_group_phys, nodemask,
&cpu_to_phys_group);
}
last_cpu->next = first_cpu;
mb(); /* domains were modified outside the lock */
#ifdef CONFIG_NUMA
/* Set up node groups */
init_sched_build_groups(sched_group_nodes, cpu_possible_map,
&cpu_to_node_group);
#endif
/* Calculate CPU power for physical packages and nodes */
for_each_cpu(i) {
int power;
struct sched_domain *sd;
#ifdef CONFIG_SCHED_SMT
sd = &per_cpu(cpu_domains, i);
power = SCHED_LOAD_SCALE;
sd->groups->cpu_power = power;
#endif
sd = &per_cpu(phys_domains, i);
power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
(cpus_weight(sd->groups->cpumask)-1) / 10;
sd->groups->cpu_power = power;
#ifdef CONFIG_NUMA
if (i == first_cpu(sd->groups->cpumask)) {
/* Only add "power" once for each physical package. */
sd = &per_cpu(node_domains, i);
sd->groups->cpu_power += power;
}
#endif
}
/* Attach the domains */
for_each_cpu(i) {
struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_sd, i);
struct sched_domain *sd;
#ifdef CONFIG_SCHED_SMT
sd = &per_cpu(cpu_domains, i);
#else
sd = &per_cpu(phys_domains, i);
#endif
cpu_attach_domain(sd, i);
}
}
#endif /* CONFIG_NUMA */
#endif /* ARCH_HAS_SCHED_DOMAIN */
#define SCHED_DOMAIN_DEBUG
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment