Commit 2f0cfcb9 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] Add support for NUMA discovery on AMD dual core to x86-64

Following the CPUID extensions and APIC-ID setup presented by Richard
Brunner at Kernel Summit.

Without this the kernel's node discovery would assign the wrong nodes on
dual core systems and cause performance degradation.

Only handles power of two cored CPUs, others will be ignored.

/proc/cpuinfo has a new field cpu cores: <number of CPU cores> and also
show the other core as HT sibling (this is needed to make old CPU based
license managers to count one CPU package as only one physical CPU)

FIXME this currently misses the disable code for SMT idle.  That's because
2.6.9rc1-bk6 removed the arch hooks need for that.  Needs to be readded
later.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 78d2ba74
...@@ -681,6 +681,26 @@ static int __init init_amd(struct cpuinfo_x86 *c) ...@@ -681,6 +681,26 @@ static int __init init_amd(struct cpuinfo_x86 *c)
} }
} }
display_cacheinfo(c); display_cacheinfo(c);
if (c->cpuid_level >= 0x80000008) {
c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
if (c->x86_num_cores & (c->x86_num_cores - 1))
c->x86_num_cores = 1;
#ifdef CONFIG_NUMA
/* On a dual core setup the lower bits of apic id
distingush the cores. Fix up the CPU<->node mappings
here based on that.
Assumes number of cores is a power of two. */
if (c->x86_num_cores > 1) {
int cpu = c->x86_apicid;
cpu_to_node[cpu] = cpu >> hweight32(c->x86_num_cores - 1);
printk(KERN_INFO "CPU %d -> Node %d\n",
cpu, cpu_to_node[cpu]);
}
#endif
}
return r; return r;
} }
...@@ -905,6 +925,8 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) ...@@ -905,6 +925,8 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_model_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */
c->x86_clflush_size = 64; c->x86_clflush_size = 64;
c->x86_cache_alignment = c->x86_clflush_size; c->x86_cache_alignment = c->x86_clflush_size;
c->x86_num_cores = 1;
c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data;
memset(&c->x86_capability, 0, sizeof c->x86_capability); memset(&c->x86_capability, 0, sizeof c->x86_capability);
/* Get vendor name */ /* Get vendor name */
...@@ -932,6 +954,7 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) ...@@ -932,6 +954,7 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
} }
if (c->x86_capability[0] & (1<<19)) if (c->x86_capability[0] & (1<<19))
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
c->x86_apicid = misc >> 24;
} else { } else {
/* Have CPUID level 0 only - unheard of */ /* Have CPUID level 0 only - unheard of */
c->x86 = 4; c->x86 = 4;
...@@ -951,8 +974,10 @@ void __init identify_cpu(struct cpuinfo_x86 *c) ...@@ -951,8 +974,10 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
/* AMD-defined flags: level 0x80000001 */ /* AMD-defined flags: level 0x80000001 */
xlvl = cpuid_eax(0x80000000); xlvl = cpuid_eax(0x80000000);
if ( (xlvl & 0xffff0000) == 0x80000000 ) { if ( (xlvl & 0xffff0000) == 0x80000000 ) {
if ( xlvl >= 0x80000001 ) if ( xlvl >= 0x80000001 ) {
c->x86_capability[1] = cpuid_edx(0x80000001); c->x86_capability[1] = cpuid_edx(0x80000001);
c->x86_capability[5] = cpuid_ecx(0x80000001);
}
if ( xlvl >= 0x80000004 ) if ( xlvl >= 0x80000004 )
get_model_name(c); /* Default name */ get_model_name(c); /* Default name */
} }
...@@ -1152,6 +1177,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) ...@@ -1152,6 +1177,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
} }
} }
if (c->x86_num_cores > 1)
seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
seq_printf(m, "\n\n"); seq_printf(m, "\n\n");
return 0; return 0;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#ifndef __ASM_X8664_CPUFEATURE_H #ifndef __ASM_X8664_CPUFEATURE_H
#define __ASM_X8664_CPUFEATURE_H #define __ASM_X8664_CPUFEATURE_H
#define NCAPINTS 5 /* Currently we have 4 32-bit words worth of info */ #define NCAPINTS 6
/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */ /* Intel-defined CPU features, CPUID level 0x00000001, word 0 */
#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ #define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
...@@ -72,6 +72,9 @@ ...@@ -72,6 +72,9 @@
#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 5 */
#define X86_FEATURE_HTVALID (5*32+ 0) /* HyperThreading valid, otherwise CMP */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) #define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) #define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
......
...@@ -60,6 +60,8 @@ struct cpuinfo_x86 { ...@@ -60,6 +60,8 @@ struct cpuinfo_x86 {
int x86_cache_alignment; int x86_cache_alignment;
int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/
__u8 x86_virt_bits, x86_phys_bits; __u8 x86_virt_bits, x86_phys_bits;
__u8 x86_num_cores;
__u8 x86_apicid;
__u32 x86_power; __u32 x86_power;
unsigned long loops_per_jiffy; unsigned long loops_per_jiffy;
} ____cacheline_aligned; } ____cacheline_aligned;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment