Commit 7d20dd32 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-apic-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/apic update from Thomas Gleixner:
 "A single commit which reduces cache misses in __x2apic_send_IPI_mask()
  significantly by converting x86_cpu_to_logical_apicid() to an array
  instead of using per CPU storage.

  This reduces the cost for a full broadcast on a dual socket system
  with 256 CPUs from 33 down to 11 microseconds"

* tag 'x86-apic-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/apic: Reduce cache line misses in __x2apic_send_IPI_mask()
parents 9a7e0a90 cc95a07f
...@@ -15,9 +15,15 @@ struct cluster_mask { ...@@ -15,9 +15,15 @@ struct cluster_mask {
struct cpumask mask; struct cpumask mask;
}; };
static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); /*
* __x2apic_send_IPI_mask() possibly needs to read
* x86_cpu_to_logical_apicid for all online cpus in a sequential way.
* Using per cpu variable would cost one cache line per cpu.
*/
static u32 *x86_cpu_to_logical_apicid __read_mostly;
static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks); static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks);
static struct cluster_mask *cluster_hotplug_mask; static struct cluster_mask *cluster_hotplug_mask;
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
...@@ -27,7 +33,7 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) ...@@ -27,7 +33,7 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
static void x2apic_send_IPI(int cpu, int vector) static void x2apic_send_IPI(int cpu, int vector)
{ {
u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); u32 dest = x86_cpu_to_logical_apicid[cpu];
/* x2apic MSRs are special and need a special fence: */ /* x2apic MSRs are special and need a special fence: */
weak_wrmsr_fence(); weak_wrmsr_fence();
...@@ -58,7 +64,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) ...@@ -58,7 +64,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
dest = 0; dest = 0;
for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask) for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu); dest |= x86_cpu_to_logical_apicid[clustercpu];
if (!dest) if (!dest)
continue; continue;
...@@ -94,7 +100,7 @@ static void x2apic_send_IPI_all(int vector) ...@@ -94,7 +100,7 @@ static void x2apic_send_IPI_all(int vector)
static u32 x2apic_calc_apicid(unsigned int cpu) static u32 x2apic_calc_apicid(unsigned int cpu)
{ {
return per_cpu(x86_cpu_to_logical_apicid, cpu); return x86_cpu_to_logical_apicid[cpu];
} }
static void init_x2apic_ldr(void) static void init_x2apic_ldr(void)
...@@ -103,7 +109,7 @@ static void init_x2apic_ldr(void) ...@@ -103,7 +109,7 @@ static void init_x2apic_ldr(void)
u32 cluster, apicid = apic_read(APIC_LDR); u32 cluster, apicid = apic_read(APIC_LDR);
unsigned int cpu; unsigned int cpu;
this_cpu_write(x86_cpu_to_logical_apicid, apicid); x86_cpu_to_logical_apicid[smp_processor_id()] = apicid;
if (cmsk) if (cmsk)
goto update; goto update;
...@@ -166,12 +172,21 @@ static int x2apic_dead_cpu(unsigned int dead_cpu) ...@@ -166,12 +172,21 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
static int x2apic_cluster_probe(void) static int x2apic_cluster_probe(void)
{ {
u32 slots;
if (!x2apic_mode) if (!x2apic_mode)
return 0; return 0;
slots = max_t(u32, L1_CACHE_BYTES/sizeof(u32), nr_cpu_ids);
x86_cpu_to_logical_apicid = kcalloc(slots, sizeof(u32), GFP_KERNEL);
if (!x86_cpu_to_logical_apicid)
return 0;
if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare", if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
x2apic_prepare_cpu, x2apic_dead_cpu) < 0) { x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
pr_err("Failed to register X2APIC_PREPARE\n"); pr_err("Failed to register X2APIC_PREPARE\n");
kfree(x86_cpu_to_logical_apicid);
x86_cpu_to_logical_apicid = NULL;
return 0; return 0;
} }
init_x2apic_ldr(); init_x2apic_ldr();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment