Commit 6dec3a10 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'x86/x2apic' into x86/core

Conflicts:

	include/asm-x86/i8259.h
	include/asm-x86/msidef.h
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents 29308333 10a010f6
...@@ -1420,6 +1420,12 @@ and is between 256 and 4096 characters. It is defined in the file ...@@ -1420,6 +1420,12 @@ and is between 256 and 4096 characters. It is defined in the file
nolapic_timer [X86-32,APIC] Do not use the local APIC timer. nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
nox2apic [X86-64,APIC] Do not enable x2APIC mode.
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
noltlbs [PPC] Do not use large page/tlb entries for kernel noltlbs [PPC] Do not use large page/tlb entries for kernel
lowmem mapping on PPC40x. lowmem mapping on PPC40x.
......
...@@ -1652,6 +1652,14 @@ config DMAR_FLOPPY_WA ...@@ -1652,6 +1652,14 @@ config DMAR_FLOPPY_WA
workaround will setup a 1:1 mapping for the first workaround will setup a 1:1 mapping for the first
16M to make floppy (an ISA device) work. 16M to make floppy (an ISA device) work.
config INTR_REMAP
bool "Support for Interrupt Remapping (EXPERIMENTAL)"
depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
help
Supports Interrupt remapping for IO-APIC and MSI devices.
To use x2apic mode in the CPU's which support x2APIC enhancements or
to support platforms with CPU's having > 8 bit APIC ID, say Y.
source "drivers/pci/pcie/Kconfig" source "drivers/pci/pcie/Kconfig"
source "drivers/pci/Kconfig" source "drivers/pci/Kconfig"
......
...@@ -104,6 +104,8 @@ obj-$(CONFIG_OLPC) += olpc.o ...@@ -104,6 +104,8 @@ obj-$(CONFIG_OLPC) += olpc.o
ifeq ($(CONFIG_X86_64),y) ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
obj-y += bios_uv.o obj-y += bios_uv.o
obj-y += genx2apic_cluster.o
obj-y += genx2apic_phys.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_AUDIT) += audit_64.o
......
...@@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address) ...@@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
set_fixmap_nocache(FIX_APIC_BASE, address); set_fixmap_nocache(FIX_APIC_BASE, address);
if (boot_cpu_physical_apicid == -1U) { if (boot_cpu_physical_apicid == -1U) {
boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); boot_cpu_physical_apicid = read_apic_id();
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
apic_version[boot_cpu_physical_apicid] = apic_version[boot_cpu_physical_apicid] =
GET_APIC_VERSION(apic_read(APIC_LVR)); GET_APIC_VERSION(apic_read(APIC_LVR));
...@@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void) ...@@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void)
acpi_ioapic = 1; acpi_ioapic = 1;
smp_found_config = 1; smp_found_config = 1;
#ifdef CONFIG_X86_32
setup_apic_routing(); setup_apic_routing();
#endif
} }
} }
if (error == -EINVAL) { if (error == -EINVAL) {
......
...@@ -145,13 +145,18 @@ static int modern_apic(void) ...@@ -145,13 +145,18 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14; return lapic_get_version() >= 0x14;
} }
void apic_wait_icr_idle(void) /*
* Paravirt kernels also might be using these below ops. So we still
* use generic apic_read()/apic_write(), which might be pointing to different
* ops in PARAVIRT case.
*/
void xapic_wait_icr_idle(void)
{ {
while (apic_read(APIC_ICR) & APIC_ICR_BUSY) while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax(); cpu_relax();
} }
u32 safe_apic_wait_icr_idle(void) u32 safe_xapic_wait_icr_idle(void)
{ {
u32 send_status; u32 send_status;
int timeout; int timeout;
...@@ -167,6 +172,34 @@ u32 safe_apic_wait_icr_idle(void) ...@@ -167,6 +172,34 @@ u32 safe_apic_wait_icr_idle(void)
return send_status; return send_status;
} }
void xapic_icr_write(u32 low, u32 id)
{
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
apic_write(APIC_ICR, low);
}
u64 xapic_icr_read(void)
{
u32 icr1, icr2;
icr2 = apic_read(APIC_ICR2);
icr1 = apic_read(APIC_ICR);
return icr1 | ((u64)icr2 << 32);
}
static struct apic_ops xapic_ops = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = xapic_icr_read,
.icr_write = xapic_icr_write,
.wait_icr_idle = xapic_wait_icr_idle,
.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
};
struct apic_ops __read_mostly *apic_ops = &xapic_ops;
EXPORT_SYMBOL_GPL(apic_ops);
/** /**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/ */
...@@ -1205,7 +1238,7 @@ void __init init_apic_mappings(void) ...@@ -1205,7 +1238,7 @@ void __init init_apic_mappings(void)
* default configuration (or the MP table is broken). * default configuration (or the MP table is broken).
*/ */
if (boot_cpu_physical_apicid == -1U) if (boot_cpu_physical_apicid == -1U)
boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); boot_cpu_physical_apicid = read_apic_id();
} }
...@@ -1242,7 +1275,7 @@ int __init APIC_init_uniprocessor(void) ...@@ -1242,7 +1275,7 @@ int __init APIC_init_uniprocessor(void)
* might be zero if read from MP tables. Get it from LAPIC. * might be zero if read from MP tables. Get it from LAPIC.
*/ */
#ifdef CONFIG_CRASH_DUMP #ifdef CONFIG_CRASH_DUMP
boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); boot_cpu_physical_apicid = read_apic_id();
#endif #endif
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/clockchips.h> #include <linux/clockchips.h>
#include <linux/acpi_pmtmr.h> #include <linux/acpi_pmtmr.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/dmar.h>
#include <asm/atomic.h> #include <asm/atomic.h>
#include <asm/smp.h> #include <asm/smp.h>
...@@ -39,6 +40,7 @@ ...@@ -39,6 +40,7 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/timex.h> #include <asm/timex.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/i8259.h>
#include <mach_ipi.h> #include <mach_ipi.h>
#include <mach_apic.h> #include <mach_apic.h>
...@@ -46,6 +48,11 @@ ...@@ -46,6 +48,11 @@
static int disable_apic_timer __cpuinitdata; static int disable_apic_timer __cpuinitdata;
static int apic_calibrate_pmtmr __initdata; static int apic_calibrate_pmtmr __initdata;
int disable_apic; int disable_apic;
int disable_x2apic;
int x2apic;
/* x2apic enabled before OS handover */
int x2apic_preenabled;
/* Local APIC timer works in C2 */ /* Local APIC timer works in C2 */
int local_apic_timer_c2_ok; int local_apic_timer_c2_ok;
...@@ -119,13 +126,13 @@ static int modern_apic(void) ...@@ -119,13 +126,13 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14; return lapic_get_version() >= 0x14;
} }
void apic_wait_icr_idle(void) void xapic_wait_icr_idle(void)
{ {
while (apic_read(APIC_ICR) & APIC_ICR_BUSY) while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax(); cpu_relax();
} }
u32 safe_apic_wait_icr_idle(void) u32 safe_xapic_wait_icr_idle(void)
{ {
u32 send_status; u32 send_status;
int timeout; int timeout;
...@@ -141,6 +148,69 @@ u32 safe_apic_wait_icr_idle(void) ...@@ -141,6 +148,69 @@ u32 safe_apic_wait_icr_idle(void)
return send_status; return send_status;
} }
void xapic_icr_write(u32 low, u32 id)
{
apic_write(APIC_ICR2, id << 24);
apic_write(APIC_ICR, low);
}
u64 xapic_icr_read(void)
{
u32 icr1, icr2;
icr2 = apic_read(APIC_ICR2);
icr1 = apic_read(APIC_ICR);
return (icr1 | ((u64)icr2 << 32));
}
static struct apic_ops xapic_ops = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = xapic_icr_read,
.icr_write = xapic_icr_write,
.wait_icr_idle = xapic_wait_icr_idle,
.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
};
struct apic_ops __read_mostly *apic_ops = &xapic_ops;
EXPORT_SYMBOL_GPL(apic_ops);
static void x2apic_wait_icr_idle(void)
{
/* no need to wait for icr idle in x2apic */
return;
}
static u32 safe_x2apic_wait_icr_idle(void)
{
/* no need to wait for icr idle in x2apic */
return 0;
}
void x2apic_icr_write(u32 low, u32 id)
{
wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
}
u64 x2apic_icr_read(void)
{
unsigned long val;
rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
return val;
}
static struct apic_ops x2apic_ops = {
.read = native_apic_msr_read,
.write = native_apic_msr_write,
.icr_read = x2apic_icr_read,
.icr_write = x2apic_icr_write,
.wait_icr_idle = x2apic_wait_icr_idle,
.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
};
/** /**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/ */
...@@ -630,10 +700,10 @@ int __init verify_local_APIC(void) ...@@ -630,10 +700,10 @@ int __init verify_local_APIC(void)
/* /*
* The ID register is read/write in a real APIC. * The ID register is read/write in a real APIC.
*/ */
reg0 = read_apic_id(); reg0 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
reg1 = read_apic_id(); reg1 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
apic_write(APIC_ID, reg0); apic_write(APIC_ID, reg0);
if (reg1 != (reg0 ^ APIC_ID_MASK)) if (reg1 != (reg0 ^ APIC_ID_MASK))
...@@ -834,6 +904,125 @@ void __cpuinit end_local_APIC_setup(void) ...@@ -834,6 +904,125 @@ void __cpuinit end_local_APIC_setup(void)
apic_pm_activate(); apic_pm_activate();
} }
void check_x2apic(void)
{
int msr, msr2;
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (msr & X2APIC_ENABLE) {
printk("x2apic enabled by BIOS, switching to x2apic ops\n");
x2apic_preenabled = x2apic = 1;
apic_ops = &x2apic_ops;
}
}
void enable_x2apic(void)
{
int msr, msr2;
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (!(msr & X2APIC_ENABLE)) {
printk("Enabling x2apic\n");
wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
}
}
void enable_IR_x2apic(void)
{
#ifdef CONFIG_INTR_REMAP
int ret;
unsigned long flags;
if (!cpu_has_x2apic)
return;
if (!x2apic_preenabled && disable_x2apic) {
printk(KERN_INFO
"Skipped enabling x2apic and Interrupt-remapping "
"because of nox2apic\n");
return;
}
if (x2apic_preenabled && disable_x2apic)
panic("Bios already enabled x2apic, can't enforce nox2apic");
if (!x2apic_preenabled && skip_ioapic_setup) {
printk(KERN_INFO
"Skipped enabling x2apic and Interrupt-remapping "
"because of skipping io-apic setup\n");
return;
}
ret = dmar_table_init();
if (ret) {
printk(KERN_INFO
"dmar_table_init() failed with %d:\n", ret);
if (x2apic_preenabled)
panic("x2apic enabled by bios. But IR enabling failed");
else
printk(KERN_INFO
"Not enabling x2apic,Intr-remapping\n");
return;
}
local_irq_save(flags);
mask_8259A();
save_mask_IO_APIC_setup();
ret = enable_intr_remapping(1);
if (ret && x2apic_preenabled) {
local_irq_restore(flags);
panic("x2apic enabled by bios. But IR enabling failed");
}
if (ret)
goto end;
if (!x2apic) {
x2apic = 1;
apic_ops = &x2apic_ops;
enable_x2apic();
}
end:
if (ret)
/*
* IR enabling failed
*/
restore_IO_APIC_setup();
else
reinit_intr_remapped_IO_APIC(x2apic_preenabled);
unmask_8259A();
local_irq_restore(flags);
if (!ret) {
if (!x2apic_preenabled)
printk(KERN_INFO
"Enabled x2apic and interrupt-remapping\n");
else
printk(KERN_INFO
"Enabled Interrupt-remapping\n");
} else
printk(KERN_ERR
"Failed to enable Interrupt-remapping and x2apic\n");
#else
if (!cpu_has_x2apic)
return;
if (x2apic_preenabled)
panic("x2apic enabled prior OS handover,"
" enable CONFIG_INTR_REMAP");
printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
" and x2apic\n");
#endif
return;
}
/* /*
* Detect and enable local APICs on non-SMP boards. * Detect and enable local APICs on non-SMP boards.
* Original code written by Keir Fraser. * Original code written by Keir Fraser.
...@@ -873,7 +1062,7 @@ void __init early_init_lapic_mapping(void) ...@@ -873,7 +1062,7 @@ void __init early_init_lapic_mapping(void)
* Fetch the APIC ID of the BSP in case we have a * Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken). * default configuration (or the MP table is broken).
*/ */
boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); boot_cpu_physical_apicid = read_apic_id();
} }
/** /**
...@@ -881,6 +1070,11 @@ void __init early_init_lapic_mapping(void) ...@@ -881,6 +1070,11 @@ void __init early_init_lapic_mapping(void)
*/ */
void __init init_apic_mappings(void) void __init init_apic_mappings(void)
{ {
if (x2apic) {
boot_cpu_physical_apicid = read_apic_id();
return;
}
/* /*
* If no local APIC can be found then set up a fake all * If no local APIC can be found then set up a fake all
* zeroes page to simulate the local APIC and another * zeroes page to simulate the local APIC and another
...@@ -900,7 +1094,7 @@ void __init init_apic_mappings(void) ...@@ -900,7 +1094,7 @@ void __init init_apic_mappings(void)
* Fetch the APIC ID of the BSP in case we have a * Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken). * default configuration (or the MP table is broken).
*/ */
boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); boot_cpu_physical_apicid = read_apic_id();
} }
/* /*
...@@ -919,6 +1113,9 @@ int __init APIC_init_uniprocessor(void) ...@@ -919,6 +1113,9 @@ int __init APIC_init_uniprocessor(void)
return -1; return -1;
} }
enable_IR_x2apic();
setup_apic_routing();
verify_local_APIC(); verify_local_APIC();
connect_bsp_APIC(); connect_bsp_APIC();
...@@ -1100,6 +1297,11 @@ void __cpuinit generic_processor_info(int apicid, int version) ...@@ -1100,6 +1297,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
cpu_set(cpu, cpu_present_map); cpu_set(cpu, cpu_present_map);
} }
int hard_smp_processor_id(void)
{
return read_apic_id();
}
/* /*
* Power management * Power management
*/ */
...@@ -1136,7 +1338,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) ...@@ -1136,7 +1338,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
maxlvt = lapic_get_maxlvt(); maxlvt = lapic_get_maxlvt();
apic_pm_state.apic_id = read_apic_id(); apic_pm_state.apic_id = apic_read(APIC_ID);
apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_ldr = apic_read(APIC_LDR);
apic_pm_state.apic_dfr = apic_read(APIC_DFR); apic_pm_state.apic_dfr = apic_read(APIC_DFR);
...@@ -1171,10 +1373,14 @@ static int lapic_resume(struct sys_device *dev) ...@@ -1171,10 +1373,14 @@ static int lapic_resume(struct sys_device *dev)
maxlvt = lapic_get_maxlvt(); maxlvt = lapic_get_maxlvt();
local_irq_save(flags); local_irq_save(flags);
if (!x2apic) {
rdmsr(MSR_IA32_APICBASE, l, h); rdmsr(MSR_IA32_APICBASE, l, h);
l &= ~MSR_IA32_APICBASE_BASE; l &= ~MSR_IA32_APICBASE_BASE;
l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
wrmsr(MSR_IA32_APICBASE, l, h); wrmsr(MSR_IA32_APICBASE, l, h);
} else
enable_x2apic();
apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_ID, apic_pm_state.apic_id);
apic_write(APIC_DFR, apic_pm_state.apic_dfr); apic_write(APIC_DFR, apic_pm_state.apic_dfr);
...@@ -1314,6 +1520,15 @@ __cpuinit int apic_is_clustered_box(void) ...@@ -1314,6 +1520,15 @@ __cpuinit int apic_is_clustered_box(void)
return (clusters > 2); return (clusters > 2);
} }
static __init int setup_nox2apic(char *str)
{
disable_x2apic = 1;
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
return 0;
}
early_param("nox2apic", setup_nox2apic);
/* /*
* APIC command line parameters * APIC command line parameters
*/ */
......
...@@ -597,6 +597,8 @@ void __cpuinit cpu_init(void) ...@@ -597,6 +597,8 @@ void __cpuinit cpu_init(void)
barrier(); barrier();
check_efer(); check_efer();
if (cpu != 0 && x2apic)
enable_x2apic();
/* /*
* set up and load the per-CPU TSS * set up and load the per-CPU TSS
......
...@@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { ...@@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
/* Intel-defined (#2) */ /* Intel-defined (#2) */
"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* VIA/Cyrix/Centaur-defined */ /* VIA/Cyrix/Centaur-defined */
......
...@@ -21,81 +21,52 @@ ...@@ -21,81 +21,52 @@
#include <asm/ipi.h> #include <asm/ipi.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#ifdef CONFIG_ACPI extern struct genapic apic_flat;
#include <acpi/acpi_bus.h> extern struct genapic apic_physflat;
#endif extern struct genapic apic_x2xpic_uv_x;
extern struct genapic apic_x2apic_phys;
DEFINE_PER_CPU(int, x2apic_extra_bits); extern struct genapic apic_x2apic_cluster;
struct genapic __read_mostly *genapic = &apic_flat; struct genapic __read_mostly *genapic = &apic_flat;
static enum uv_system_type uv_system_type; static struct genapic *apic_probe[] __initdata = {
&apic_x2apic_uv_x,
&apic_x2apic_phys,
&apic_x2apic_cluster,
&apic_physflat,
NULL,
};
/* /*
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
*/ */
void __init setup_apic_routing(void) void __init setup_apic_routing(void)
{ {
if (uv_system_type == UV_NON_UNIQUE_APIC) if (genapic == &apic_flat) {
genapic = &apic_x2apic_uv_x; if (max_physical_apicid >= 8)
else
#ifdef CONFIG_ACPI
/*
* Quirk: some x86_64 machines can only use physical APIC mode
* regardless of how many processors are present (x86_64 ES7000
* is an example).
*/
if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
genapic = &apic_physflat; genapic = &apic_physflat;
else
#endif
if (max_physical_apicid < 8)
genapic = &apic_flat;
else
genapic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
}
} }
/* Same for both flat and physical. */ /* Same for both flat and physical. */
void send_IPI_self(int vector) void apic_send_IPI_self(int vector)
{ {
__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
} }
int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{ {
if (!strcmp(oem_id, "SGI")) { int i;
if (!strcmp(oem_table_id, "UVL"))
uv_system_type = UV_LEGACY_APIC; for (i = 0; apic_probe[i]; ++i) {
else if (!strcmp(oem_table_id, "UVX")) if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
uv_system_type = UV_X2APIC; genapic = apic_probe[i];
else if (!strcmp(oem_table_id, "UVH")) printk(KERN_INFO "Setting APIC routing to %s.\n",
uv_system_type = UV_NON_UNIQUE_APIC; genapic->name);
return 1;
}
} }
return 0; return 0;
} }
unsigned int read_apic_id(void)
{
unsigned int id;
WARN_ON(preemptible() && num_online_cpus() > 1);
id = apic_read(APIC_ID);
if (uv_system_type >= UV_X2APIC)
id |= __get_cpu_var(x2apic_extra_bits);
return id;
}
enum uv_system_type get_uv_system_type(void)
{
return uv_system_type;
}
int is_uv_system(void)
{
return uv_system_type != UV_NONE;
}
...@@ -15,9 +15,20 @@ ...@@ -15,9 +15,20 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/hardirq.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/ipi.h> #include <asm/ipi.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#include <mach_apicdef.h>
#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
#endif
static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
return 1;
}
static cpumask_t flat_target_cpus(void) static cpumask_t flat_target_cpus(void)
{ {
...@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector) ...@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
} }
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
id = (((x)>>24) & 0xFFu);
return id;
}
static unsigned long set_apic_id(unsigned int id)
{
unsigned long x;
x = ((id & 0xFFu)<<24);
return x;
}
static unsigned int read_xapic_id(void)
{
unsigned int id;
id = get_apic_id(apic_read(APIC_ID));
return id;
}
static int flat_apic_id_registered(void) static int flat_apic_id_registered(void)
{ {
return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); return physid_isset(read_xapic_id(), phys_cpu_present_map);
} }
static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
...@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb) ...@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
struct genapic apic_flat = { struct genapic apic_flat = {
.name = "flat", .name = "flat",
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
.int_delivery_mode = dest_LowestPrio, .int_delivery_mode = dest_LowestPrio,
.int_dest_mode = (APIC_DEST_LOGICAL != 0), .int_dest_mode = (APIC_DEST_LOGICAL != 0),
.target_cpus = flat_target_cpus, .target_cpus = flat_target_cpus,
...@@ -121,8 +157,12 @@ struct genapic apic_flat = { ...@@ -121,8 +157,12 @@ struct genapic apic_flat = {
.send_IPI_all = flat_send_IPI_all, .send_IPI_all = flat_send_IPI_all,
.send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_allbutself = flat_send_IPI_allbutself,
.send_IPI_mask = flat_send_IPI_mask, .send_IPI_mask = flat_send_IPI_mask,
.send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id, .phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFu<<24),
}; };
/* /*
...@@ -130,6 +170,21 @@ struct genapic apic_flat = { ...@@ -130,6 +170,21 @@ struct genapic apic_flat = {
* We cannot use logical delivery in this case because the mask * We cannot use logical delivery in this case because the mask
* overflows, so use physical mode. * overflows, so use physical mode.
*/ */
static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
#ifdef CONFIG_ACPI
/*
* Quirk: some x86_64 machines can only use physical APIC mode
* regardless of how many processors are present (x86_64 ES7000
* is an example).
*/
if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
return 1;
#endif
return 0;
}
static cpumask_t physflat_target_cpus(void) static cpumask_t physflat_target_cpus(void)
{ {
...@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) ...@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
struct genapic apic_physflat = { struct genapic apic_physflat = {
.name = "physical flat", .name = "physical flat",
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed, .int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0), .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = physflat_target_cpus, .target_cpus = physflat_target_cpus,
...@@ -185,6 +241,10 @@ struct genapic apic_physflat = { ...@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
.send_IPI_all = physflat_send_IPI_all, .send_IPI_all = physflat_send_IPI_all,
.send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_allbutself = physflat_send_IPI_allbutself,
.send_IPI_mask = physflat_send_IPI_mask, .send_IPI_mask = physflat_send_IPI_mask,
.send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id, .phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFu<<24),
}; };
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/dmar.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (cpu_has_x2apic && intr_remapping_enabled)
return 1;
return 0;
}
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
static cpumask_t x2apic_target_cpus(void)
{
return cpumask_of_cpu(0);
}
/*
* for now each logical cpu is in its own vector allocation domain.
*/
static cpumask_t x2apic_vector_allocation_domain(int cpu)
{
cpumask_t domain = CPU_MASK_NONE;
cpu_set(cpu, domain);
return domain;
}
static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
unsigned int dest)
{
unsigned long cfg;
cfg = __prepare_ICR(0, vector, dest);
/*
* send the IPI.
*/
x2apic_icr_write(cfg, apicid);
}
/*
* for now, we send the IPI's one by one in the cpumask.
* TBD: Based on the cpu mask, we can send the IPI's to the cluster group
* at once. We have 16 cpu's in a cluster. This will minimize IPI register
* writes.
*/
static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
local_irq_save(flags);
for_each_cpu_mask(query_cpu, mask) {
__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
vector, APIC_DEST_LOGICAL);
}
local_irq_restore(flags);
}
static void x2apic_send_IPI_allbutself(int vector)
{
cpumask_t mask = cpu_online_map;
cpu_clear(smp_processor_id(), mask);
if (!cpus_empty(mask))
x2apic_send_IPI_mask(mask, vector);
}
static void x2apic_send_IPI_all(int vector)
{
x2apic_send_IPI_mask(cpu_online_map, vector);
}
static int x2apic_apic_id_registered(void)
{
return 1;
}
static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
{
int cpu;
/*
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
cpu = first_cpu(cpumask);
if ((unsigned)cpu < NR_CPUS)
return per_cpu(x86_cpu_to_logical_apicid, cpu);
else
return BAD_APICID;
}
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
id = x;
return id;
}
static unsigned long set_apic_id(unsigned int id)
{
unsigned long x;
x = id;
return x;
}
static unsigned int x2apic_read_id(void)
{
return apic_read(APIC_ID);
}
static unsigned int phys_pkg_id(int index_msb)
{
return x2apic_read_id() >> index_msb;
}
static void x2apic_send_IPI_self(int vector)
{
apic_write(APIC_SELF_IPI, vector);
}
static void init_x2apic_ldr(void)
{
int cpu = smp_processor_id();
per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
return;
}
struct genapic apic_x2apic_cluster = {
.name = "cluster x2apic",
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
.int_delivery_mode = dest_LowestPrio,
.int_dest_mode = (APIC_DEST_LOGICAL != 0),
.target_cpus = x2apic_target_cpus,
.vector_allocation_domain = x2apic_vector_allocation_domain,
.apic_id_registered = x2apic_apic_id_registered,
.init_apic_ldr = init_x2apic_ldr,
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_self = x2apic_send_IPI_self,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFFFFFFFu),
};
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/dmar.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
static int x2apic_phys;
static int set_x2apic_phys_mode(char *arg)
{
x2apic_phys = 1;
return 0;
}
early_param("x2apic_phys", set_x2apic_phys_mode);
static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys)
return 1;
return 0;
}
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
static cpumask_t x2apic_target_cpus(void)
{
return cpumask_of_cpu(0);
}
static cpumask_t x2apic_vector_allocation_domain(int cpu)
{
cpumask_t domain = CPU_MASK_NONE;
cpu_set(cpu, domain);
return domain;
}
static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
unsigned int dest)
{
unsigned long cfg;
cfg = __prepare_ICR(0, vector, dest);
/*
* send the IPI.
*/
x2apic_icr_write(cfg, apicid);
}
static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
local_irq_save(flags);
for_each_cpu_mask(query_cpu, mask) {
__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
vector, APIC_DEST_PHYSICAL);
}
local_irq_restore(flags);
}
static void x2apic_send_IPI_allbutself(int vector)
{
cpumask_t mask = cpu_online_map;
cpu_clear(smp_processor_id(), mask);
if (!cpus_empty(mask))
x2apic_send_IPI_mask(mask, vector);
}
static void x2apic_send_IPI_all(int vector)
{
x2apic_send_IPI_mask(cpu_online_map, vector);
}
static int x2apic_apic_id_registered(void)
{
return 1;
}
static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
{
int cpu;
/*
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
cpu = first_cpu(cpumask);
if ((unsigned)cpu < NR_CPUS)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
id = x;
return id;
}
static unsigned long set_apic_id(unsigned int id)
{
unsigned long x;
x = id;
return x;
}
static unsigned int x2apic_read_id(void)
{
return apic_read(APIC_ID);
}
static unsigned int phys_pkg_id(int index_msb)
{
return x2apic_read_id() >> index_msb;
}
void x2apic_send_IPI_self(int vector)
{
apic_write(APIC_SELF_IPI, vector);
}
void init_x2apic_ldr(void)
{
return;
}
struct genapic apic_x2apic_phys = {
.name = "physical x2apic",
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = x2apic_target_cpus,
.vector_allocation_domain = x2apic_vector_allocation_domain,
.apic_id_registered = x2apic_apic_id_registered,
.init_apic_ldr = init_x2apic_ldr,
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_self = x2apic_send_IPI_self,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFFFFFFFu),
};
...@@ -12,12 +12,12 @@ ...@@ -12,12 +12,12 @@
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/hardirq.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/ipi.h> #include <asm/ipi.h>
#include <asm/genapic.h> #include <asm/genapic.h>
...@@ -26,6 +26,35 @@ ...@@ -26,6 +26,35 @@
#include <asm/uv/uv_hub.h> #include <asm/uv/uv_hub.h>
#include <asm/uv/bios.h> #include <asm/uv/bios.h>
DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (!strcmp(oem_id, "SGI")) {
if (!strcmp(oem_table_id, "UVL"))
uv_system_type = UV_LEGACY_APIC;
else if (!strcmp(oem_table_id, "UVX"))
uv_system_type = UV_X2APIC;
else if (!strcmp(oem_table_id, "UVH")) {
uv_system_type = UV_NON_UNIQUE_APIC;
return 1;
}
}
return 0;
}
enum uv_system_type get_uv_system_type(void)
{
return uv_system_type;
}
int is_uv_system(void)
{
return uv_system_type != UV_NONE;
}
DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
...@@ -123,6 +152,10 @@ static int uv_apic_id_registered(void) ...@@ -123,6 +152,10 @@ static int uv_apic_id_registered(void)
return 1; return 1;
} }
static void uv_init_apic_ldr(void)
{
}
static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
{ {
int cpu; int cpu;
...@@ -138,9 +171,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) ...@@ -138,9 +171,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
return BAD_APICID; return BAD_APICID;
} }
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
WARN_ON(preemptible() && num_online_cpus() > 1);
id = x | __get_cpu_var(x2apic_extra_bits);
return id;
}
static unsigned long set_apic_id(unsigned int id)
{
unsigned long x;
/* maskout x2apic_extra_bits ? */
x = id;
return x;
}
static unsigned int uv_read_apic_id(void)
{
return get_apic_id(apic_read(APIC_ID));
}
static unsigned int phys_pkg_id(int index_msb) static unsigned int phys_pkg_id(int index_msb)
{ {
return GET_APIC_ID(read_apic_id()) >> index_msb; return uv_read_apic_id() >> index_msb;
} }
#ifdef ZZZ /* Needs x2apic patch */ #ifdef ZZZ /* Needs x2apic patch */
...@@ -152,17 +210,22 @@ static void uv_send_IPI_self(int vector) ...@@ -152,17 +210,22 @@ static void uv_send_IPI_self(int vector)
struct genapic apic_x2apic_uv_x = { struct genapic apic_x2apic_uv_x = {
.name = "UV large system", .name = "UV large system",
.acpi_madt_oem_check = uv_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed, .int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0), .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = uv_target_cpus, .target_cpus = uv_target_cpus,
.vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
.apic_id_registered = uv_apic_id_registered, .apic_id_registered = uv_apic_id_registered,
.init_apic_ldr = uv_init_apic_ldr,
.send_IPI_all = uv_send_IPI_all, .send_IPI_all = uv_send_IPI_all,
.send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_allbutself = uv_send_IPI_allbutself,
.send_IPI_mask = uv_send_IPI_mask, .send_IPI_mask = uv_send_IPI_mask,
/* ZZZ.send_IPI_self = uv_send_IPI_self, */ /* ZZZ.send_IPI_self = uv_send_IPI_self, */
.cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFFFFFFFu),
}; };
static __cpuinit void set_x2apic_extra_bits(int pnode) static __cpuinit void set_x2apic_extra_bits(int pnode)
...@@ -399,3 +462,5 @@ void __cpuinit uv_cpu_init(void) ...@@ -399,3 +462,5 @@ void __cpuinit uv_cpu_init(void)
if (get_uv_system_type() == UV_NON_UNIQUE_APIC) if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
set_x2apic_extra_bits(uv_hub_info->pnode); set_x2apic_extra_bits(uv_hub_info->pnode);
} }
...@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void) ...@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
device_initcall(i8259A_init_sysfs); device_initcall(i8259A_init_sysfs);
void mask_8259A(void)
{
unsigned long flags;
spin_lock_irqsave(&i8259A_lock, flags);
outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
spin_unlock_irqrestore(&i8259A_lock, flags);
}
void unmask_8259A(void)
{
unsigned long flags;
spin_lock_irqsave(&i8259A_lock, flags);
outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
spin_unlock_irqrestore(&i8259A_lock, flags);
}
void init_8259A(int auto_eoi) void init_8259A(int auto_eoi)
{ {
unsigned long flags; unsigned long flags;
......
...@@ -1494,7 +1494,7 @@ void /*__init*/ print_local_APIC(void *dummy) ...@@ -1494,7 +1494,7 @@ void /*__init*/ print_local_APIC(void *dummy)
smp_processor_id(), hard_smp_processor_id()); smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID); v = apic_read(APIC_ID);
printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
GET_APIC_ID(read_apic_id())); GET_APIC_ID(v));
v = apic_read(APIC_LVR); v = apic_read(APIC_LVR);
printk(KERN_INFO "... APIC VERSION: %08x\n", v); printk(KERN_INFO "... APIC VERSION: %08x\n", v);
ver = GET_APIC_VERSION(v); ver = GET_APIC_VERSION(v);
...@@ -1702,8 +1702,7 @@ void disable_IO_APIC(void) ...@@ -1702,8 +1702,7 @@ void disable_IO_APIC(void)
entry.dest_mode = 0; /* Physical */ entry.dest_mode = 0; /* Physical */
entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.delivery_mode = dest_ExtINT; /* ExtInt */
entry.vector = 0; entry.vector = 0;
entry.dest.physical.physical_dest = entry.dest.physical.physical_dest = read_apic_id();
GET_APIC_ID(read_apic_id());
/* /*
* Add it to the IO-APIC irq-routing table: * Add it to the IO-APIC irq-routing table:
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <acpi/acpi_bus.h> #include <acpi/acpi_bus.h>
#endif #endif
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/dmar.h>
#include <asm/idle.h> #include <asm/idle.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -49,6 +50,7 @@ ...@@ -49,6 +50,7 @@
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/msidef.h> #include <asm/msidef.h>
#include <asm/hypertransport.h> #include <asm/hypertransport.h>
#include <asm/irq_remapping.h>
#include <mach_ipi.h> #include <mach_ipi.h>
#include <mach_apic.h> #include <mach_apic.h>
...@@ -108,6 +110,9 @@ DEFINE_SPINLOCK(vector_lock); ...@@ -108,6 +110,9 @@ DEFINE_SPINLOCK(vector_lock);
*/ */
int nr_ioapic_registers[MAX_IO_APICS]; int nr_ioapic_registers[MAX_IO_APICS];
/* I/O APIC RTE contents at the OS boot up */
struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
/* I/O APIC entries */ /* I/O APIC entries */
struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics; int nr_ioapics;
...@@ -303,6 +308,11 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) ...@@ -303,6 +308,11 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
pin = entry->pin; pin = entry->pin;
if (pin == -1) if (pin == -1)
break; break;
/*
* With interrupt-remapping, destination information comes
* from interrupt-remapping table entry.
*/
if (!irq_remapped(irq))
io_apic_write(apic, 0x11 + pin*2, dest); io_apic_write(apic, 0x11 + pin*2, dest);
reg = io_apic_read(apic, 0x10 + pin*2); reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg &= ~IO_APIC_REDIR_VECTOR_MASK;
...@@ -440,6 +450,69 @@ static void clear_IO_APIC (void) ...@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
clear_IO_APIC_pin(apic, pin); clear_IO_APIC_pin(apic, pin);
} }
/*
* Saves and masks all the unmasked IO-APIC RTE's
*/
int save_mask_IO_APIC_setup(void)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
int apic, pin;
/*
* The number of IO-APIC IRQ registers (== #pins):
*/
for (apic = 0; apic < nr_ioapics; apic++) {
spin_lock_irqsave(&ioapic_lock, flags);
reg_01.raw = io_apic_read(apic, 1);
spin_unlock_irqrestore(&ioapic_lock, flags);
nr_ioapic_registers[apic] = reg_01.bits.entries+1;
}
for (apic = 0; apic < nr_ioapics; apic++) {
early_ioapic_entries[apic] =
kzalloc(sizeof(struct IO_APIC_route_entry) *
nr_ioapic_registers[apic], GFP_KERNEL);
if (!early_ioapic_entries[apic])
return -ENOMEM;
}
for (apic = 0; apic < nr_ioapics; apic++)
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
struct IO_APIC_route_entry entry;
entry = early_ioapic_entries[apic][pin] =
ioapic_read_entry(apic, pin);
if (!entry.mask) {
entry.mask = 1;
ioapic_write_entry(apic, pin, entry);
}
}
return 0;
}
void restore_IO_APIC_setup(void)
{
int apic, pin;
for (apic = 0; apic < nr_ioapics; apic++)
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
ioapic_write_entry(apic, pin,
early_ioapic_entries[apic][pin]);
}
void reinit_intr_remapped_IO_APIC(int intr_remapping)
{
/*
* for now plain restore of previous settings.
* TBD: In the case of OS enabling interrupt-remapping,
* IO-APIC RTE's need to be setup to point to interrupt-remapping
* table entries. for now, do a plain restore, and wait for
* the setup_IO_APIC_irqs() to do proper initialization.
*/
restore_IO_APIC_setup();
}
int skip_ioapic_setup; int skip_ioapic_setup;
int ioapic_force; int ioapic_force;
...@@ -834,18 +907,98 @@ void setup_vector_irq(int cpu) ...@@ -834,18 +907,98 @@ void setup_vector_irq(int cpu)
static struct irq_chip ioapic_chip; static struct irq_chip ioapic_chip;
#ifdef CONFIG_INTR_REMAP
static struct irq_chip ir_ioapic_chip;
#endif
static void ioapic_register_intr(int irq, unsigned long trigger) static void ioapic_register_intr(int irq, unsigned long trigger)
{ {
if (trigger) { if (trigger)
irq_desc[irq].status |= IRQ_LEVEL; irq_desc[irq].status |= IRQ_LEVEL;
set_irq_chip_and_handler_name(irq, &ioapic_chip, else
handle_fasteoi_irq, "fasteoi");
} else {
irq_desc[irq].status &= ~IRQ_LEVEL; irq_desc[irq].status &= ~IRQ_LEVEL;
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
if (trigger)
set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
handle_fasteoi_irq,
"fasteoi");
else
set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
handle_edge_irq, "edge");
return;
}
#endif
if (trigger)
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_fasteoi_irq,
"fasteoi");
else
set_irq_chip_and_handler_name(irq, &ioapic_chip, set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_edge_irq, "edge"); handle_edge_irq, "edge");
}
static int setup_ioapic_entry(int apic, int irq,
struct IO_APIC_route_entry *entry,
unsigned int destination, int trigger,
int polarity, int vector)
{
/*
* add it to the IO-APIC irq-routing table:
*/
memset(entry,0,sizeof(*entry));
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled) {
struct intel_iommu *iommu = map_ioapic_to_ir(apic);
struct irte irte;
struct IR_IO_APIC_route_entry *ir_entry =
(struct IR_IO_APIC_route_entry *) entry;
int index;
if (!iommu)
panic("No mapping iommu for ioapic %d\n", apic);
index = alloc_irte(iommu, irq, 1);
if (index < 0)
panic("Failed to allocate IRTE for ioapic %d\n", apic);
memset(&irte, 0, sizeof(irte));
irte.present = 1;
irte.dst_mode = INT_DEST_MODE;
irte.trigger_mode = trigger;
irte.dlvry_mode = INT_DELIVERY_MODE;
irte.vector = vector;
irte.dest_id = IRTE_DEST(destination);
modify_irte(irq, &irte);
ir_entry->index2 = (index >> 15) & 0x1;
ir_entry->zero = 0;
ir_entry->format = 1;
ir_entry->index = (index & 0x7fff);
} else
#endif
{
entry->delivery_mode = INT_DELIVERY_MODE;
entry->dest_mode = INT_DEST_MODE;
entry->dest = destination;
} }
entry->mask = 0; /* enable IRQ */
entry->trigger = trigger;
entry->polarity = polarity;
entry->vector = vector;
/* Mask level triggered irqs.
* Use IRQ_DELAYED_DISABLE for edge triggered irqs.
*/
if (trigger)
entry->mask = 1;
return 0;
} }
static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
...@@ -870,24 +1023,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, ...@@ -870,24 +1023,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
irq, trigger, polarity); irq, trigger, polarity);
/*
* add it to the IO-APIC irq-routing table:
*/
memset(&entry,0,sizeof(entry));
entry.delivery_mode = INT_DELIVERY_MODE;
entry.dest_mode = INT_DEST_MODE;
entry.dest = cpu_mask_to_apicid(mask);
entry.mask = 0; /* enable IRQ */
entry.trigger = trigger;
entry.polarity = polarity;
entry.vector = cfg->vector;
/* Mask level triggered irqs. if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
* Use IRQ_DELAYED_DISABLE for edge triggered irqs. cpu_mask_to_apicid(mask), trigger, polarity,
*/ cfg->vector)) {
if (trigger) printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
entry.mask = 1; mp_ioapics[apic].mp_apicid, pin);
__clear_irq_vector(irq);
return;
}
ioapic_register_intr(irq, trigger); ioapic_register_intr(irq, trigger);
if (irq < 16) if (irq < 16)
...@@ -939,6 +1083,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ...@@ -939,6 +1083,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
{ {
struct IO_APIC_route_entry entry; struct IO_APIC_route_entry entry;
if (intr_remapping_enabled)
return;
memset(&entry, 0, sizeof(entry)); memset(&entry, 0, sizeof(entry));
/* /*
...@@ -1085,6 +1232,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) ...@@ -1085,6 +1232,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
void __apicdebuginit print_local_APIC(void * dummy) void __apicdebuginit print_local_APIC(void * dummy)
{ {
unsigned int v, ver, maxlvt; unsigned int v, ver, maxlvt;
unsigned long icr;
if (apic_verbosity == APIC_QUIET) if (apic_verbosity == APIC_QUIET)
return; return;
...@@ -1092,7 +1240,7 @@ void __apicdebuginit print_local_APIC(void * dummy) ...@@ -1092,7 +1240,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), hard_smp_processor_id()); smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID); v = apic_read(APIC_ID);
printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
v = apic_read(APIC_LVR); v = apic_read(APIC_LVR);
printk(KERN_INFO "... APIC VERSION: %08x\n", v); printk(KERN_INFO "... APIC VERSION: %08x\n", v);
ver = GET_APIC_VERSION(v); ver = GET_APIC_VERSION(v);
...@@ -1128,10 +1276,9 @@ void __apicdebuginit print_local_APIC(void * dummy) ...@@ -1128,10 +1276,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
v = apic_read(APIC_ESR); v = apic_read(APIC_ESR);
printk(KERN_DEBUG "... APIC ESR: %08x\n", v); printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
v = apic_read(APIC_ICR); icr = apic_icr_read();
printk(KERN_DEBUG "... APIC ICR: %08x\n", v); printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
v = apic_read(APIC_ICR2); printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
v = apic_read(APIC_LVTT); v = apic_read(APIC_LVTT);
printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
...@@ -1286,7 +1433,7 @@ void disable_IO_APIC(void) ...@@ -1286,7 +1433,7 @@ void disable_IO_APIC(void)
entry.dest_mode = 0; /* Physical */ entry.dest_mode = 0; /* Physical */
entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.delivery_mode = dest_ExtINT; /* ExtInt */
entry.vector = 0; entry.vector = 0;
entry.dest = GET_APIC_ID(read_apic_id()); entry.dest = read_apic_id();
/* /*
* Add it to the IO-APIC irq-routing table: * Add it to the IO-APIC irq-routing table:
...@@ -1392,6 +1539,147 @@ static int ioapic_retrigger_irq(unsigned int irq) ...@@ -1392,6 +1539,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/ */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#ifdef CONFIG_INTR_REMAP
static void ir_irq_migration(struct work_struct *work);
static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
/*
* Migrate the IO-APIC irq in the presence of intr-remapping.
*
* For edge triggered, irq migration is a simple atomic update(of vector
* and cpu destination) of IRTE and flush the hardware cache.
*
* For level triggered, we need to modify the io-apic RTE aswell with the update
* vector information, along with modifying IRTE with vector and destination.
* So irq migration for level triggered is little bit more complex compared to
* edge triggered migration. But the good news is, we use the same algorithm
* for level triggered migration as we have today, only difference being,
* we now initiate the irq migration from process context instead of the
* interrupt context.
*
* In future, when we do a directed EOI (combined with cpu EOI broadcast
* suppression) to the IO-APIC, level triggered irq migration will also be
* as simple as edge triggered migration and we can do the irq migration
* with a simple atomic update to IO-APIC RTE.
*/
static void migrate_ioapic_irq(int irq, cpumask_t mask)
{
struct irq_cfg *cfg = irq_cfg + irq;
struct irq_desc *desc = irq_desc + irq;
cpumask_t tmp, cleanup_mask;
struct irte irte;
int modify_ioapic_rte = desc->status & IRQ_LEVEL;
unsigned int dest;
unsigned long flags;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
if (get_irte(irq, &irte))
return;
if (assign_irq_vector(irq, mask))
return;
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
if (modify_ioapic_rte) {
spin_lock_irqsave(&ioapic_lock, flags);
__target_IO_APIC_irq(irq, dest, cfg->vector);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
/*
* Modified the IRTE and flushes the Interrupt entry cache.
*/
modify_irte(irq, &irte);
if (cfg->move_in_progress) {
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
cfg->move_in_progress = 0;
}
irq_desc[irq].affinity = mask;
}
static int migrate_irq_remapped_level(int irq)
{
int ret = -1;
mask_IO_APIC_irq(irq);
if (io_apic_level_ack_pending(irq)) {
/*
* Interrupt in progress. Migrating irq now will change the
* vector information in the IO-APIC RTE and that will confuse
* the EOI broadcast performed by cpu.
* So, delay the irq migration to the next instance.
*/
schedule_delayed_work(&ir_migration_work, 1);
goto unmask;
}
/* everthing is clear. we have right of way */
migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
ret = 0;
irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
cpus_clear(irq_desc[irq].pending_mask);
unmask:
unmask_IO_APIC_irq(irq);
return ret;
}
static void ir_irq_migration(struct work_struct *work)
{
int irq;
for (irq = 0; irq < NR_IRQS; irq++) {
struct irq_desc *desc = irq_desc + irq;
if (desc->status & IRQ_MOVE_PENDING) {
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
if (!desc->chip->set_affinity ||
!(desc->status & IRQ_MOVE_PENDING)) {
desc->status &= ~IRQ_MOVE_PENDING;
spin_unlock_irqrestore(&desc->lock, flags);
continue;
}
desc->chip->set_affinity(irq,
irq_desc[irq].pending_mask);
spin_unlock_irqrestore(&desc->lock, flags);
}
}
}
/*
* Migrates the IRQ destination in the process context.
*/
static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
{
if (irq_desc[irq].status & IRQ_LEVEL) {
irq_desc[irq].status |= IRQ_MOVE_PENDING;
irq_desc[irq].pending_mask = mask;
migrate_irq_remapped_level(irq);
return;
}
migrate_ioapic_irq(irq, mask);
}
#endif
asmlinkage void smp_irq_move_cleanup_interrupt(void) asmlinkage void smp_irq_move_cleanup_interrupt(void)
{ {
unsigned vector, me; unsigned vector, me;
...@@ -1448,6 +1736,17 @@ static void irq_complete_move(unsigned int irq) ...@@ -1448,6 +1736,17 @@ static void irq_complete_move(unsigned int irq)
#else #else
static inline void irq_complete_move(unsigned int irq) {} static inline void irq_complete_move(unsigned int irq) {}
#endif #endif
#ifdef CONFIG_INTR_REMAP
static void ack_x2apic_level(unsigned int irq)
{
ack_x2APIC_irq();
}
static void ack_x2apic_edge(unsigned int irq)
{
ack_x2APIC_irq();
}
#endif
static void ack_apic_edge(unsigned int irq) static void ack_apic_edge(unsigned int irq)
{ {
...@@ -1522,6 +1821,21 @@ static struct irq_chip ioapic_chip __read_mostly = { ...@@ -1522,6 +1821,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
.retrigger = ioapic_retrigger_irq, .retrigger = ioapic_retrigger_irq,
}; };
#ifdef CONFIG_INTR_REMAP
static struct irq_chip ir_ioapic_chip __read_mostly = {
.name = "IR-IO-APIC",
.startup = startup_ioapic_irq,
.mask = mask_IO_APIC_irq,
.unmask = unmask_IO_APIC_irq,
.ack = ack_x2apic_edge,
.eoi = ack_x2apic_level,
#ifdef CONFIG_SMP
.set_affinity = set_ir_ioapic_affinity_irq,
#endif
.retrigger = ioapic_retrigger_irq,
};
#endif
static inline void init_IO_APIC_traps(void) static inline void init_IO_APIC_traps(void)
{ {
int irq; int irq;
...@@ -1707,6 +2021,8 @@ static inline void __init check_timer(void) ...@@ -1707,6 +2021,8 @@ static inline void __init check_timer(void)
* 8259A. * 8259A.
*/ */
if (pin1 == -1) { if (pin1 == -1) {
if (intr_remapping_enabled)
panic("BIOS bug: timer not connected to IO-APIC");
pin1 = pin2; pin1 = pin2;
apic1 = apic2; apic1 = apic2;
no_pin1 = 1; no_pin1 = 1;
...@@ -1733,6 +2049,8 @@ static inline void __init check_timer(void) ...@@ -1733,6 +2049,8 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1); clear_IO_APIC_pin(0, pin1);
goto out; goto out;
} }
if (intr_remapping_enabled)
panic("timer doesn't work through Interrupt-remapped IO-APIC");
clear_IO_APIC_pin(apic1, pin1); clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1) if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
...@@ -1972,6 +2290,9 @@ void destroy_irq(unsigned int irq) ...@@ -1972,6 +2290,9 @@ void destroy_irq(unsigned int irq)
dynamic_irq_cleanup(irq); dynamic_irq_cleanup(irq);
#ifdef CONFIG_INTR_REMAP
free_irte(irq);
#endif
spin_lock_irqsave(&vector_lock, flags); spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq); __clear_irq_vector(irq);
spin_unlock_irqrestore(&vector_lock, flags); spin_unlock_irqrestore(&vector_lock, flags);
...@@ -1990,10 +2311,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms ...@@ -1990,10 +2311,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
tmp = TARGET_CPUS; tmp = TARGET_CPUS;
err = assign_irq_vector(irq, tmp); err = assign_irq_vector(irq, tmp);
if (!err) { if (err)
return err;
cpus_and(tmp, cfg->domain, tmp); cpus_and(tmp, cfg->domain, tmp);
dest = cpu_mask_to_apicid(tmp); dest = cpu_mask_to_apicid(tmp);
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
struct irte irte;
int ir_index;
u16 sub_handle;
ir_index = map_irq_to_irte_handle(irq, &sub_handle);
BUG_ON(ir_index == -1);
memset (&irte, 0, sizeof(irte));
irte.present = 1;
irte.dst_mode = INT_DEST_MODE;
irte.trigger_mode = 0; /* edge */
irte.dlvry_mode = INT_DELIVERY_MODE;
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
modify_irte(irq, &irte);
msg->address_hi = MSI_ADDR_BASE_HI;
msg->data = sub_handle;
msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
MSI_ADDR_IR_SHV |
MSI_ADDR_IR_INDEX1(ir_index) |
MSI_ADDR_IR_INDEX2(ir_index);
} else
#endif
{
msg->address_hi = MSI_ADDR_BASE_HI; msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo = msg->address_lo =
MSI_ADDR_BASE_LO | MSI_ADDR_BASE_LO |
...@@ -2044,6 +2396,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) ...@@ -2044,6 +2396,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
write_msi_msg(irq, &msg); write_msi_msg(irq, &msg);
irq_desc[irq].affinity = mask; irq_desc[irq].affinity = mask;
} }
#ifdef CONFIG_INTR_REMAP
/*
* Migrate the MSI irq to another cpumask. This migration is
* done in the process context using interrupt-remapping hardware.
*/
static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
{
struct irq_cfg *cfg = irq_cfg + irq;
unsigned int dest;
cpumask_t tmp, cleanup_mask;
struct irte irte;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
if (get_irte(irq, &irte))
return;
if (assign_irq_vector(irq, mask))
return;
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
/*
* atomically update the IRTE with the new destination and vector.
*/
modify_irte(irq, &irte);
/*
* After this point, all the interrupts will start arriving
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
if (cfg->move_in_progress) {
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
cfg->move_in_progress = 0;
}
irq_desc[irq].affinity = mask;
}
#endif
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
/* /*
...@@ -2061,26 +2462,157 @@ static struct irq_chip msi_chip = { ...@@ -2061,26 +2462,157 @@ static struct irq_chip msi_chip = {
.retrigger = ioapic_retrigger_irq, .retrigger = ioapic_retrigger_irq,
}; };
int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) #ifdef CONFIG_INTR_REMAP
static struct irq_chip msi_ir_chip = {
.name = "IR-PCI-MSI",
.unmask = unmask_msi_irq,
.mask = mask_msi_irq,
.ack = ack_x2apic_edge,
#ifdef CONFIG_SMP
.set_affinity = ir_set_msi_irq_affinity,
#endif
.retrigger = ioapic_retrigger_irq,
};
/*
* Map the PCI dev to the corresponding remapping hardware unit
* and allocate 'nvec' consecutive interrupt-remapping table entries
* in it.
*/
static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
{ {
struct intel_iommu *iommu;
int index;
iommu = map_dev_to_ir(dev);
if (!iommu) {
printk(KERN_ERR
"Unable to map PCI %s to iommu\n", pci_name(dev));
return -ENOENT;
}
index = alloc_irte(iommu, irq, nvec);
if (index < 0) {
printk(KERN_ERR
"Unable to allocate %d IRTE for PCI %s\n", nvec,
pci_name(dev));
return -ENOSPC;
}
return index;
}
#endif
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
{
int ret;
struct msi_msg msg; struct msi_msg msg;
ret = msi_compose_msg(dev, irq, &msg);
if (ret < 0)
return ret;
set_irq_msi(irq, desc);
write_msi_msg(irq, &msg);
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
struct irq_desc *desc = irq_desc + irq;
/*
* irq migration in process context
*/
desc->status |= IRQ_MOVE_PCNTXT;
set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
} else
#endif
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
return 0;
}
int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
int irq, ret; int irq, ret;
irq = create_irq(); irq = create_irq();
if (irq < 0) if (irq < 0)
return irq; return irq;
ret = msi_compose_msg(dev, irq, &msg); #ifdef CONFIG_INTR_REMAP
if (!intr_remapping_enabled)
goto no_ir;
ret = msi_alloc_irte(dev, irq, 1);
if (ret < 0)
goto error;
no_ir:
#endif
ret = setup_msi_irq(dev, desc, irq);
if (ret < 0) { if (ret < 0) {
destroy_irq(irq); destroy_irq(irq);
return ret; return ret;
} }
return 0;
set_irq_msi(irq, desc); #ifdef CONFIG_INTR_REMAP
write_msi_msg(irq, &msg); error:
destroy_irq(irq);
return ret;
#endif
}
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int irq, ret, sub_handle;
struct msi_desc *desc;
#ifdef CONFIG_INTR_REMAP
struct intel_iommu *iommu = 0;
int index = 0;
#endif
sub_handle = 0;
list_for_each_entry(desc, &dev->msi_list, list) {
irq = create_irq();
if (irq < 0)
return irq;
#ifdef CONFIG_INTR_REMAP
if (!intr_remapping_enabled)
goto no_ir;
if (!sub_handle) {
/*
* allocate the consecutive block of IRTE's
* for 'nvec'
*/
index = msi_alloc_irte(dev, irq, nvec);
if (index < 0) {
ret = index;
goto error;
}
} else {
iommu = map_dev_to_ir(dev);
if (!iommu) {
ret = -ENOENT;
goto error;
}
/*
* setup the mapping between the irq and the IRTE
* base index, the sub_handle pointing to the
* appropriate interrupt remap table entry.
*/
set_irte_irq(irq, iommu, index, sub_handle);
}
no_ir:
#endif
ret = setup_msi_irq(dev, desc, irq);
if (ret < 0)
goto error;
sub_handle++;
}
return 0; return 0;
error:
destroy_irq(irq);
return ret;
} }
void arch_teardown_msi_irq(unsigned int irq) void arch_teardown_msi_irq(unsigned int irq)
...@@ -2328,6 +2860,10 @@ void __init setup_ioapic_dest(void) ...@@ -2328,6 +2860,10 @@ void __init setup_ioapic_dest(void)
setup_IO_APIC_irq(ioapic, pin, irq, setup_IO_APIC_irq(ioapic, pin, irq,
irq_trigger(irq_entry), irq_trigger(irq_entry),
irq_polarity(irq_entry)); irq_polarity(irq_entry));
#ifdef CONFIG_INTR_REMAP
else if (intr_remapping_enabled)
set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
#endif
else else
set_ioapic_affinity_irq(irq, TARGET_CPUS); set_ioapic_affinity_irq(irq, TARGET_CPUS);
} }
......
...@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) ...@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
generic_bigsmp_probe(); generic_bigsmp_probe();
#endif #endif
#ifdef CONFIG_X86_32
setup_apic_routing(); setup_apic_routing();
#endif
if (!num_processors) if (!num_processors)
printk(KERN_ERR "MPTABLE: no processors registered!\n"); printk(KERN_ERR "MPTABLE: no processors registered!\n");
return num_processors; return num_processors;
......
...@@ -373,8 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = { ...@@ -373,8 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
struct pv_apic_ops pv_apic_ops = { struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
.apic_write = native_apic_write,
.apic_read = native_apic_read,
.setup_boot_clock = setup_boot_APIC_clock, .setup_boot_clock = setup_boot_APIC_clock,
.setup_secondary_clock = setup_secondary_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock,
.startup_ipi_hook = paravirt_nop, .startup_ipi_hook = paravirt_nop,
......
...@@ -731,6 +731,8 @@ void __init setup_arch(char **cmdline_p) ...@@ -731,6 +731,8 @@ void __init setup_arch(char **cmdline_p)
num_physpages = max_pfn; num_physpages = max_pfn;
check_efer(); check_efer();
if (cpu_has_x2apic)
check_x2apic();
/* How many end-of-memory variables you have, grandma! */ /* How many end-of-memory variables you have, grandma! */
/* need this before calling reserve_initrd */ /* need this before calling reserve_initrd */
......
...@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); ...@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
static atomic_t init_deasserted; static atomic_t init_deasserted;
static int boot_cpu_logical_apicid;
/* representing cpus for which sibling maps can be computed */ /* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map; static cpumask_t cpu_sibling_setup_map;
...@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu) ...@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
#endif #endif
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
static int boot_cpu_logical_apicid;
u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = BAD_APICID }; { [0 ... NR_CPUS-1] = BAD_APICID };
...@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void) ...@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
/* /*
* (This works even if the APIC is not enabled.) * (This works even if the APIC is not enabled.)
*/ */
phys_id = GET_APIC_ID(read_apic_id()); phys_id = read_apic_id();
cpuid = smp_processor_id(); cpuid = smp_processor_id();
if (cpu_isset(cpuid, cpu_callin_map)) { if (cpu_isset(cpuid, cpu_callin_map)) {
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
...@@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid) ...@@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid)
printk(KERN_CONT printk(KERN_CONT
"a previous APIC delivery may have failed\n"); "a previous APIC delivery may have failed\n");
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
timeout = 0; timeout = 0;
do { do {
...@@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) ...@@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
int maxlvt; int maxlvt;
/* Target chip */ /* Target chip */
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
/* Boot on the stack */ /* Boot on the stack */
/* Kick the second */ /* Kick the second */
apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
pr_debug("Waiting for send to finish...\n"); pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle(); send_status = safe_apic_wait_icr_idle();
...@@ -636,13 +634,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -636,13 +634,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
/* /*
* Turn INIT on target chip * Turn INIT on target chip
*/ */
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* /*
* Send IPI * Send IPI
*/ */
apic_write(APIC_ICR, apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); phys_apicid);
pr_debug("Waiting for send to finish...\n"); pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle(); send_status = safe_apic_wait_icr_idle();
...@@ -652,10 +648,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -652,10 +648,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
pr_debug("Deasserting INIT.\n"); pr_debug("Deasserting INIT.\n");
/* Target chip */ /* Target chip */
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Send IPI */ /* Send IPI */
apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
pr_debug("Waiting for send to finish...\n"); pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle(); send_status = safe_apic_wait_icr_idle();
...@@ -698,11 +692,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -698,11 +692,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
*/ */
/* Target chip */ /* Target chip */
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Boot on the stack */ /* Boot on the stack */
/* Kick the second */ /* Kick the second */
apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
phys_apicid);
/* /*
* Give the other CPU some time to accept the IPI. * Give the other CPU some time to accept the IPI.
...@@ -1136,10 +1129,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) ...@@ -1136,10 +1129,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
* Setup boot CPU information * Setup boot CPU information
*/ */
smp_store_cpu_info(0); /* Final full version of the data */ smp_store_cpu_info(0); /* Final full version of the data */
#ifdef CONFIG_X86_32
boot_cpu_logical_apicid = logical_smp_processor_id(); boot_cpu_logical_apicid = logical_smp_processor_id();
#endif
current_thread_info()->cpu = 0; /* needed? */ current_thread_info()->cpu = 0; /* needed? */
set_cpu_sibling_map(0); set_cpu_sibling_map(0);
#ifdef CONFIG_X86_64
enable_IR_x2apic();
setup_apic_routing();
#endif
if (smp_sanity_check(max_cpus) < 0) { if (smp_sanity_check(max_cpus) < 0) {
printk(KERN_INFO "SMP disabled\n"); printk(KERN_INFO "SMP disabled\n");
disable_smp(); disable_smp();
...@@ -1147,9 +1147,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) ...@@ -1147,9 +1147,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
} }
preempt_disable(); preempt_disable();
if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { if (read_apic_id() != boot_cpu_physical_apicid) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)", panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); read_apic_id(), boot_cpu_physical_apicid);
/* Or can we switch back to PIC here? */ /* Or can we switch back to PIC here? */
} }
preempt_enable(); preempt_enable();
......
...@@ -904,8 +904,8 @@ static inline int __init activate_vmi(void) ...@@ -904,8 +904,8 @@ static inline int __init activate_vmi(void)
#endif #endif
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
para_fill(pv_apic_ops.apic_read, APICRead); para_fill(apic_ops->read, APICRead);
para_fill(pv_apic_ops.apic_write, APICWrite); para_fill(apic_ops->write, APICWrite);
#endif #endif
/* /*
......
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#include <linux/lguest_launcher.h> #include <linux/lguest_launcher.h>
#include <linux/virtio_console.h> #include <linux/virtio_console.h>
#include <linux/pm.h> #include <linux/pm.h>
#include <asm/apic.h>
#include <asm/lguest.h> #include <asm/lguest.h>
#include <asm/paravirt.h> #include <asm/paravirt.h>
#include <asm/param.h> #include <asm/param.h>
...@@ -783,14 +784,44 @@ static void lguest_wbinvd(void) ...@@ -783,14 +784,44 @@ static void lguest_wbinvd(void)
* code qualifies for Advanced. It will also never interrupt anything. It * code qualifies for Advanced. It will also never interrupt anything. It
* does, however, allow us to get through the Linux boot code. */ * does, however, allow us to get through the Linux boot code. */
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
static void lguest_apic_write(unsigned long reg, u32 v) static void lguest_apic_write(u32 reg, u32 v)
{ {
} }
static u32 lguest_apic_read(unsigned long reg) static u32 lguest_apic_read(u32 reg)
{ {
return 0; return 0;
} }
static u64 lguest_apic_icr_read(void)
{
return 0;
}
static void lguest_apic_icr_write(u32 low, u32 id)
{
/* Warn to see if there's any stray references */
WARN_ON(1);
}
static void lguest_apic_wait_icr_idle(void)
{
return;
}
static u32 lguest_apic_safe_wait_icr_idle(void)
{
return 0;
}
static struct apic_ops lguest_basic_apic_ops = {
.read = lguest_apic_read,
.write = lguest_apic_write,
.icr_read = lguest_apic_icr_read,
.icr_write = lguest_apic_icr_write,
.wait_icr_idle = lguest_apic_wait_icr_idle,
.safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
};
#endif #endif
/* STOP! Until an interrupt comes in. */ /* STOP! Until an interrupt comes in. */
...@@ -990,8 +1021,7 @@ __init void lguest_init(void) ...@@ -990,8 +1021,7 @@ __init void lguest_init(void)
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
/* apic read/write intercepts */ /* apic read/write intercepts */
pv_apic_ops.apic_write = lguest_apic_write; apic_ops = &lguest_basic_apic_ops;
pv_apic_ops.apic_read = lguest_apic_read;
#endif #endif
/* time operations */ /* time operations */
......
...@@ -5,17 +5,16 @@ ...@@ -5,17 +5,16 @@
#define APIC_DEFINITION 1 #define APIC_DEFINITION 1
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <asm/smp.h>
#include <asm/mpspec.h> #include <asm/mpspec.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#include <asm/apicdef.h> #include <asm/apicdef.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/dmi.h> #include <linux/dmi.h>
#include <asm/mach-bigsmp/mach_apic.h>
#include <asm/mach-bigsmp/mach_apicdef.h> #include <asm/mach-bigsmp/mach_apicdef.h>
#include <linux/smp.h>
#include <asm/mach-bigsmp/mach_apic.h>
#include <asm/mach-bigsmp/mach_ipi.h> #include <asm/mach-bigsmp/mach_ipi.h>
#include <asm/mach-default/mach_mpparse.h> #include <asm/mach-default/mach_mpparse.h>
......
...@@ -4,16 +4,15 @@ ...@@ -4,16 +4,15 @@
#define APIC_DEFINITION 1 #define APIC_DEFINITION 1
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <asm/smp.h>
#include <asm/mpspec.h> #include <asm/mpspec.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#include <asm/apicdef.h> #include <asm/apicdef.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/smp.h>
#include <linux/init.h> #include <linux/init.h>
#include <asm/mach-es7000/mach_apicdef.h> #include <asm/mach-es7000/mach_apicdef.h>
#include <linux/smp.h>
#include <asm/mach-es7000/mach_apic.h> #include <asm/mach-es7000/mach_apic.h>
#include <asm/mach-es7000/mach_ipi.h> #include <asm/mach-es7000/mach_ipi.h>
#include <asm/mach-es7000/mach_mpparse.h> #include <asm/mach-es7000/mach_mpparse.h>
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
#define APIC_DEFINITION 1 #define APIC_DEFINITION 1
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/smp.h>
#include <asm/mpspec.h> #include <asm/mpspec.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
...@@ -12,8 +11,9 @@ ...@@ -12,8 +11,9 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/init.h> #include <linux/init.h>
#include <asm/mach-numaq/mach_apic.h>
#include <asm/mach-numaq/mach_apicdef.h> #include <asm/mach-numaq/mach_apicdef.h>
#include <linux/smp.h>
#include <asm/mach-numaq/mach_apic.h>
#include <asm/mach-numaq/mach_ipi.h> #include <asm/mach-numaq/mach_ipi.h>
#include <asm/mach-numaq/mach_mpparse.h> #include <asm/mach-numaq/mach_mpparse.h>
#include <asm/mach-numaq/mach_wakecpu.h> #include <asm/mach-numaq/mach_wakecpu.h>
......
...@@ -4,17 +4,16 @@ ...@@ -4,17 +4,16 @@
#define APIC_DEFINITION 1 #define APIC_DEFINITION 1
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <asm/smp.h>
#include <asm/mpspec.h> #include <asm/mpspec.h>
#include <asm/genapic.h> #include <asm/genapic.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#include <asm/apicdef.h> #include <asm/apicdef.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/smp.h>
#include <linux/init.h> #include <linux/init.h>
#include <asm/mach-summit/mach_apic.h>
#include <asm/mach-summit/mach_apicdef.h> #include <asm/mach-summit/mach_apicdef.h>
#include <linux/smp.h>
#include <asm/mach-summit/mach_apic.h>
#include <asm/mach-summit/mach_ipi.h> #include <asm/mach-summit/mach_ipi.h>
#include <asm/mach-summit/mach_mpparse.h> #include <asm/mach-summit/mach_mpparse.h>
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <xen/hvc-console.h> #include <xen/hvc-console.h>
#include <asm/paravirt.h> #include <asm/paravirt.h>
#include <asm/apic.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
...@@ -580,16 +581,47 @@ static void xen_io_delay(void) ...@@ -580,16 +581,47 @@ static void xen_io_delay(void)
} }
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
static u32 xen_apic_read(unsigned long reg) static u32 xen_apic_read(u32 reg)
{ {
return 0; return 0;
} }
static void xen_apic_write(unsigned long reg, u32 val) static void xen_apic_write(u32 reg, u32 val)
{ {
/* Warn to see if there's any stray references */ /* Warn to see if there's any stray references */
WARN_ON(1); WARN_ON(1);
} }
static u64 xen_apic_icr_read(void)
{
return 0;
}
static void xen_apic_icr_write(u32 low, u32 id)
{
/* Warn to see if there's any stray references */
WARN_ON(1);
}
static void xen_apic_wait_icr_idle(void)
{
return;
}
static u32 xen_safe_apic_wait_icr_idle(void)
{
return 0;
}
static struct apic_ops xen_basic_apic_ops = {
.read = xen_apic_read,
.write = xen_apic_write,
.icr_read = xen_apic_icr_read,
.icr_write = xen_apic_icr_write,
.wait_icr_idle = xen_apic_wait_icr_idle,
.safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
};
#endif #endif
static void xen_flush_tlb(void) static void xen_flush_tlb(void)
...@@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { ...@@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
static const struct pv_apic_ops xen_apic_ops __initdata = { static const struct pv_apic_ops xen_apic_ops __initdata = {
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
.apic_write = xen_apic_write,
.apic_read = xen_apic_read,
.setup_boot_clock = paravirt_nop, .setup_boot_clock = paravirt_nop,
.setup_secondary_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop,
.startup_ipi_hook = paravirt_nop, .startup_ipi_hook = paravirt_nop,
...@@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void) ...@@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void)
pv_apic_ops = xen_apic_ops; pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops; pv_mmu_ops = xen_mmu_ops;
#ifdef CONFIG_X86_LOCAL_APIC
/*
* set up the basic apic ops.
*/
apic_ops = &xen_basic_apic_ops;
#endif
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
......
...@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o ...@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
# Build Intel IOMMU support # Build Intel IOMMU support
obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
# #
# Some architectures use the generic PCI setup functions # Some architectures use the generic PCI setup functions
# #
......
#ifndef _DMA_REMAPPING_H
#define _DMA_REMAPPING_H
/*
* We need a fixed PAGE_SIZE of 4K irrespective of
* arch PAGE_SIZE for IOMMU page tables.
*/
#define PAGE_SHIFT_4K (12)
#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
/*
* 0: Present
* 1-11: Reserved
* 12-63: Context Ptr (12 - (haw-1))
* 64-127: Reserved
*/
struct root_entry {
u64 val;
u64 rsvd1;
};
#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
static inline bool root_present(struct root_entry *root)
{
return (root->val & 1);
}
static inline void set_root_present(struct root_entry *root)
{
root->val |= 1;
}
static inline void set_root_value(struct root_entry *root, unsigned long value)
{
root->val |= value & PAGE_MASK_4K;
}
struct context_entry;
static inline struct context_entry *
get_context_addr_from_root(struct root_entry *root)
{
return (struct context_entry *)
(root_present(root)?phys_to_virt(
root->val & PAGE_MASK_4K):
NULL);
}
/*
* low 64 bits:
* 0: present
* 1: fault processing disable
* 2-3: translation type
* 12-63: address space root
* high 64 bits:
* 0-2: address width
* 3-6: aval
* 8-23: domain id
*/
struct context_entry {
u64 lo;
u64 hi;
};
#define context_present(c) ((c).lo & 1)
#define context_fault_disable(c) (((c).lo >> 1) & 1)
#define context_translation_type(c) (((c).lo >> 2) & 3)
#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
#define context_address_width(c) ((c).hi & 7)
#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
#define context_set_present(c) do {(c).lo |= 1;} while (0)
#define context_set_fault_enable(c) \
do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
#define context_set_translation_type(c, val) \
do { \
(c).lo &= (((u64)-1) << 4) | 3; \
(c).lo |= ((val) & 3) << 2; \
} while (0)
#define CONTEXT_TT_MULTI_LEVEL 0
#define context_set_address_root(c, val) \
do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
#define context_set_domain_id(c, val) \
do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
/*
* 0: readable
* 1: writable
* 2-6: reserved
* 7: super page
* 8-11: available
* 12-63: Host physcial address
*/
struct dma_pte {
u64 val;
};
#define dma_clear_pte(p) do {(p).val = 0;} while (0)
#define DMA_PTE_READ (1)
#define DMA_PTE_WRITE (2)
#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
#define dma_set_pte_prot(p, prot) \
do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
#define dma_set_pte_addr(p, addr) do {\
(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
#define dma_pte_present(p) (((p).val & 3) != 0)
struct intel_iommu;
struct dmar_domain {
int id; /* domain id */
struct intel_iommu *iommu; /* back pointer to owning iommu */
struct list_head devices; /* all devices' list */
struct iova_domain iovad; /* iova's that belong to this domain */
struct dma_pte *pgd; /* virtual address */
spinlock_t mapping_lock; /* page table lock */
int gaw; /* max guest address width */
/* adjusted guest address width, 0 is level 2 30-bit */
int agaw;
#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
int flags;
};
/* PCI domain-device relationship */
struct device_domain_info {
struct list_head link; /* link to domain siblings */
struct list_head global; /* link to global list */
u8 bus; /* PCI bus numer */
u8 devfn; /* PCI devfn number */
struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
struct dmar_domain *domain; /* pointer to domain */
};
extern int init_dmars(void);
extern void free_dmar_iommu(struct intel_iommu *iommu);
extern int dmar_disabled;
#ifndef CONFIG_DMAR_GFX_WA
static inline void iommu_prepare_gfx_mapping(void)
{
return;
}
#endif /* !CONFIG_DMAR_GFX_WA */
#endif
...@@ -19,13 +19,16 @@ ...@@ -19,13 +19,16 @@
* Author: Shaohua Li <shaohua.li@intel.com> * Author: Shaohua Li <shaohua.li@intel.com>
* Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
* *
* This file implements early detection/parsing of DMA Remapping Devices * This file implements early detection/parsing of Remapping Devices
* reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
* tables. * tables.
*
* These routines are used by both DMA-remapping and Interrupt-remapping
*/ */
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/dmar.h> #include <linux/dmar.h>
#include <linux/timer.h>
#include "iova.h" #include "iova.h"
#include "intel-iommu.h" #include "intel-iommu.h"
...@@ -37,7 +40,6 @@ ...@@ -37,7 +40,6 @@
* these units are not supported by the architecture. * these units are not supported by the architecture.
*/ */
LIST_HEAD(dmar_drhd_units); LIST_HEAD(dmar_drhd_units);
LIST_HEAD(dmar_rmrr_units);
static struct acpi_table_header * __initdata dmar_tbl; static struct acpi_table_header * __initdata dmar_tbl;
...@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) ...@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
list_add(&drhd->list, &dmar_drhd_units); list_add(&drhd->list, &dmar_drhd_units);
} }
static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
{
list_add(&rmrr->list, &dmar_rmrr_units);
}
static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
struct pci_dev **dev, u16 segment) struct pci_dev **dev, u16 segment)
{ {
...@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) ...@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
struct acpi_dmar_hardware_unit *drhd; struct acpi_dmar_hardware_unit *drhd;
struct dmar_drhd_unit *dmaru; struct dmar_drhd_unit *dmaru;
int ret = 0; int ret = 0;
static int include_all;
dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
if (!dmaru) if (!dmaru)
return -ENOMEM; return -ENOMEM;
dmaru->hdr = header;
drhd = (struct acpi_dmar_hardware_unit *)header; drhd = (struct acpi_dmar_hardware_unit *)header;
dmaru->reg_base_addr = drhd->address; dmaru->reg_base_addr = drhd->address;
dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
ret = alloc_iommu(dmaru);
if (ret) {
kfree(dmaru);
return ret;
}
dmar_register_drhd_unit(dmaru);
return 0;
}
static int __init
dmar_parse_dev(struct dmar_drhd_unit *dmaru)
{
struct acpi_dmar_hardware_unit *drhd;
static int include_all;
int ret;
drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
if (!dmaru->include_all) if (!dmaru->include_all)
ret = dmar_parse_dev_scope((void *)(drhd + 1), ret = dmar_parse_dev_scope((void *)(drhd + 1),
((void *)drhd) + header->length, ((void *)drhd) + drhd->header.length,
&dmaru->devices_cnt, &dmaru->devices, &dmaru->devices_cnt, &dmaru->devices,
drhd->segment); drhd->segment);
else { else {
...@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) ...@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
include_all = 1; include_all = 1;
} }
if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
list_del(&dmaru->list);
kfree(dmaru); kfree(dmaru);
else }
dmar_register_drhd_unit(dmaru);
return ret; return ret;
} }
#ifdef CONFIG_DMAR
LIST_HEAD(dmar_rmrr_units);
static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
{
list_add(&rmrr->list, &dmar_rmrr_units);
}
static int __init static int __init
dmar_parse_one_rmrr(struct acpi_dmar_header *header) dmar_parse_one_rmrr(struct acpi_dmar_header *header)
{ {
struct acpi_dmar_reserved_memory *rmrr; struct acpi_dmar_reserved_memory *rmrr;
struct dmar_rmrr_unit *rmrru; struct dmar_rmrr_unit *rmrru;
int ret = 0;
rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
if (!rmrru) if (!rmrru)
return -ENOMEM; return -ENOMEM;
rmrru->hdr = header;
rmrr = (struct acpi_dmar_reserved_memory *)header; rmrr = (struct acpi_dmar_reserved_memory *)header;
rmrru->base_address = rmrr->base_address; rmrru->base_address = rmrr->base_address;
rmrru->end_address = rmrr->end_address; rmrru->end_address = rmrr->end_address;
dmar_register_rmrr_unit(rmrru);
return 0;
}
static int __init
rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
{
struct acpi_dmar_reserved_memory *rmrr;
int ret;
rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
ret = dmar_parse_dev_scope((void *)(rmrr + 1), ret = dmar_parse_dev_scope((void *)(rmrr + 1),
((void *)rmrr) + header->length, ((void *)rmrr) + rmrr->header.length,
&rmrru->devices_cnt, &rmrru->devices, rmrr->segment); &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
if (ret || (rmrru->devices_cnt == 0)) if (ret || (rmrru->devices_cnt == 0)) {
list_del(&rmrru->list);
kfree(rmrru); kfree(rmrru);
else }
dmar_register_rmrr_unit(rmrru);
return ret; return ret;
} }
#endif
static void __init static void __init
dmar_table_print_dmar_entry(struct acpi_dmar_header *header) dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
...@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) ...@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
} }
} }
/** /**
* parse_dmar_table - parses the DMA reporting table * parse_dmar_table - parses the DMA reporting table
*/ */
...@@ -284,7 +322,9 @@ parse_dmar_table(void) ...@@ -284,7 +322,9 @@ parse_dmar_table(void)
ret = dmar_parse_one_drhd(entry_header); ret = dmar_parse_one_drhd(entry_header);
break; break;
case ACPI_DMAR_TYPE_RESERVED_MEMORY: case ACPI_DMAR_TYPE_RESERVED_MEMORY:
#ifdef CONFIG_DMAR
ret = dmar_parse_one_rmrr(entry_header); ret = dmar_parse_one_rmrr(entry_header);
#endif
break; break;
default: default:
printk(KERN_WARNING PREFIX printk(KERN_WARNING PREFIX
...@@ -300,14 +340,76 @@ parse_dmar_table(void) ...@@ -300,14 +340,76 @@ parse_dmar_table(void)
return ret; return ret;
} }
int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
struct pci_dev *dev)
{
int index;
int __init dmar_table_init(void) while (dev) {
for (index = 0; index < cnt; index++)
if (dev == devices[index])
return 1;
/* Check our parent */
dev = dev->bus->self;
}
return 0;
}
struct dmar_drhd_unit *
dmar_find_matched_drhd_unit(struct pci_dev *dev)
{ {
struct dmar_drhd_unit *drhd = NULL;
list_for_each_entry(drhd, &dmar_drhd_units, list) {
if (drhd->include_all || dmar_pci_device_match(drhd->devices,
drhd->devices_cnt, dev))
return drhd;
}
return NULL;
}
int __init dmar_dev_scope_init(void)
{
struct dmar_drhd_unit *drhd;
int ret = -ENODEV;
for_each_drhd_unit(drhd) {
ret = dmar_parse_dev(drhd);
if (ret)
return ret;
}
#ifdef CONFIG_DMAR
{
struct dmar_rmrr_unit *rmrr;
for_each_rmrr_units(rmrr) {
ret = rmrr_parse_dev(rmrr);
if (ret)
return ret;
}
}
#endif
return ret;
}
int __init dmar_table_init(void)
{
static int dmar_table_initialized;
int ret; int ret;
if (dmar_table_initialized)
return 0;
dmar_table_initialized = 1;
ret = parse_dmar_table(); ret = parse_dmar_table();
if (ret) { if (ret) {
if (ret != -ENODEV)
printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
return ret; return ret;
} }
...@@ -317,9 +419,14 @@ int __init dmar_table_init(void) ...@@ -317,9 +419,14 @@ int __init dmar_table_init(void)
return -ENODEV; return -ENODEV;
} }
#ifdef CONFIG_DMAR
if (list_empty(&dmar_rmrr_units)) if (list_empty(&dmar_rmrr_units))
printk(KERN_INFO PREFIX "No RMRR found\n"); printk(KERN_INFO PREFIX "No RMRR found\n");
#endif
#ifdef CONFIG_INTR_REMAP
parse_ioapics_under_ir();
#endif
return 0; return 0;
} }
...@@ -341,3 +448,255 @@ int __init early_dmar_detect(void) ...@@ -341,3 +448,255 @@ int __init early_dmar_detect(void)
return (ACPI_SUCCESS(status) ? 1 : 0); return (ACPI_SUCCESS(status) ? 1 : 0);
} }
void __init detect_intel_iommu(void)
{
int ret;
ret = early_dmar_detect();
#ifdef CONFIG_DMAR
{
struct acpi_table_dmar *dmar;
/*
* for now we will disable dma-remapping when interrupt
* remapping is enabled.
* When support for queued invalidation for IOTLB invalidation
* is added, we will not need this any more.
*/
dmar = (struct acpi_table_dmar *) dmar_tbl;
if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
printk(KERN_INFO
"Queued invalidation will be enabled to support "
"x2apic and Intr-remapping.\n");
printk(KERN_INFO
"Disabling IOMMU detection, because of missing "
"queued invalidation support for IOTLB "
"invalidation\n");
printk(KERN_INFO
"Use \"nox2apic\", if you want to use Intel "
" IOMMU for DMA-remapping and don't care about "
" x2apic support\n");
dmar_disabled = 1;
return;
}
if (ret && !no_iommu && !iommu_detected && !swiotlb &&
!dmar_disabled)
iommu_detected = 1;
}
#endif
}
int alloc_iommu(struct dmar_drhd_unit *drhd)
{
struct intel_iommu *iommu;
int map_size;
u32 ver;
static int iommu_allocated = 0;
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
return -ENOMEM;
iommu->seq_id = iommu_allocated++;
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
goto error;
}
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
map_size = PAGE_ALIGN_4K(map_size);
if (map_size > PAGE_SIZE_4K) {
iounmap(iommu->reg);
iommu->reg = ioremap(drhd->reg_base_addr, map_size);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
goto error;
}
}
ver = readl(iommu->reg + DMAR_VER_REG);
pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
iommu->cap, iommu->ecap);
spin_lock_init(&iommu->register_lock);
drhd->iommu = iommu;
return 0;
error:
kfree(iommu);
return -1;
}
void free_iommu(struct intel_iommu *iommu)
{
if (!iommu)
return;
#ifdef CONFIG_DMAR
free_dmar_iommu(iommu);
#endif
if (iommu->reg)
iounmap(iommu->reg);
kfree(iommu);
}
/*
* Reclaim all the submitted descriptors which have completed its work.
*/
static inline void reclaim_free_desc(struct q_inval *qi)
{
while (qi->desc_status[qi->free_tail] == QI_DONE) {
qi->desc_status[qi->free_tail] = QI_FREE;
qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
qi->free_cnt++;
}
}
/*
* Submit the queued invalidation descriptor to the remapping
* hardware unit and wait for its completion.
*/
void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
{
struct q_inval *qi = iommu->qi;
struct qi_desc *hw, wait_desc;
int wait_index, index;
unsigned long flags;
if (!qi)
return;
hw = qi->desc;
spin_lock(&qi->q_lock);
while (qi->free_cnt < 3) {
spin_unlock(&qi->q_lock);
cpu_relax();
spin_lock(&qi->q_lock);
}
index = qi->free_head;
wait_index = (index + 1) % QI_LENGTH;
qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
hw[index] = *desc;
wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
hw[wait_index] = wait_desc;
__iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
__iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
qi->free_head = (qi->free_head + 2) % QI_LENGTH;
qi->free_cnt -= 2;
spin_lock_irqsave(&iommu->register_lock, flags);
/*
* update the HW tail register indicating the presence of
* new descriptors.
*/
writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
spin_unlock_irqrestore(&iommu->register_lock, flags);
while (qi->desc_status[wait_index] != QI_DONE) {
spin_unlock(&qi->q_lock);
cpu_relax();
spin_lock(&qi->q_lock);
}
qi->desc_status[index] = QI_DONE;
reclaim_free_desc(qi);
spin_unlock(&qi->q_lock);
}
/*
* Flush the global interrupt entry cache.
*/
void qi_global_iec(struct intel_iommu *iommu)
{
struct qi_desc desc;
desc.low = QI_IEC_TYPE;
desc.high = 0;
qi_submit_sync(&desc, iommu);
}
/*
* Enable Queued Invalidation interface. This is a must to support
* interrupt-remapping. Also used by DMA-remapping, which replaces
* register based IOTLB invalidation.
*/
int dmar_enable_qi(struct intel_iommu *iommu)
{
u32 cmd, sts;
unsigned long flags;
struct q_inval *qi;
if (!ecap_qis(iommu->ecap))
return -ENOENT;
/*
* queued invalidation is already setup and enabled.
*/
if (iommu->qi)
return 0;
iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
if (!iommu->qi)
return -ENOMEM;
qi = iommu->qi;
qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
if (!qi->desc) {
kfree(qi);
iommu->qi = 0;
return -ENOMEM;
}
qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
if (!qi->desc_status) {
free_page((unsigned long) qi->desc);
kfree(qi);
iommu->qi = 0;
return -ENOMEM;
}
qi->free_head = qi->free_tail = 0;
qi->free_cnt = QI_LENGTH;
spin_lock_init(&qi->q_lock);
spin_lock_irqsave(&iommu->register_lock, flags);
/* write zero to the tail reg */
writel(0, iommu->reg + DMAR_IQT_REG);
dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
cmd = iommu->gcmd | DMA_GCMD_QIE;
iommu->gcmd |= DMA_GCMD_QIE;
writel(cmd, iommu->reg + DMAR_GCMD_REG);
/* Make sure hardware complete it */
IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
spin_unlock_irqrestore(&iommu->register_lock, flags);
return 0;
}
...@@ -49,8 +49,6 @@ ...@@ -49,8 +49,6 @@
#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
...@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data); ...@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data);
DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
static struct intel_iommu *g_iommus;
#define HIGH_WATER_MARK 250 #define HIGH_WATER_MARK 250
struct deferred_flush_tables { struct deferred_flush_tables {
int next; int next;
...@@ -80,7 +76,7 @@ static long list_size; ...@@ -80,7 +76,7 @@ static long list_size;
static void domain_remove_dev_info(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain);
static int dmar_disabled; int dmar_disabled;
static int __initdata dmar_map_gfx = 1; static int __initdata dmar_map_gfx = 1;
static int dmar_forcedac; static int dmar_forcedac;
static int intel_iommu_strict; static int intel_iommu_strict;
...@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova) ...@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova); kmem_cache_free(iommu_iova_cache, iova);
} }
static inline void __iommu_flush_cache(
struct intel_iommu *iommu, void *addr, int size)
{
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(addr, size);
}
/* Gets context entry for a given bus and devfn */ /* Gets context entry for a given bus and devfn */
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
u8 bus, u8 devfn) u8 bus, u8 devfn)
...@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) ...@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
return 0; return 0;
} }
#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
{\
cycles_t start_time = get_cycles();\
while (1) {\
sts = op (iommu->reg + offset);\
if (cond)\
break;\
if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
panic("DMAR hardware is malfunctioning\n");\
cpu_relax();\
}\
}
static void iommu_set_root_entry(struct intel_iommu *iommu) static void iommu_set_root_entry(struct intel_iommu *iommu)
{ {
void *addr; void *addr;
...@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu) ...@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
return -ENOMEM; return -ENOMEM;
} }
spin_lock_init(&iommu->lock);
/* /*
* if Caching mode is set, then invalid translations are tagged * if Caching mode is set, then invalid translations are tagged
* with domainid 0. Hence we need to pre-allocate it. * with domainid 0. Hence we need to pre-allocate it.
...@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu) ...@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
set_bit(0, iommu->domain_ids); set_bit(0, iommu->domain_ids);
return 0; return 0;
} }
static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
struct dmar_drhd_unit *drhd)
{
int ret;
int map_size;
u32 ver;
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
goto error;
}
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
map_size = PAGE_ALIGN_4K(map_size);
if (map_size > PAGE_SIZE_4K) {
iounmap(iommu->reg);
iommu->reg = ioremap(drhd->reg_base_addr, map_size);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
goto error;
}
}
ver = readl(iommu->reg + DMAR_VER_REG);
pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
iommu->cap, iommu->ecap);
ret = iommu_init_domains(iommu);
if (ret)
goto error_unmap;
spin_lock_init(&iommu->lock);
spin_lock_init(&iommu->register_lock);
drhd->iommu = iommu;
return iommu;
error_unmap:
iounmap(iommu->reg);
error:
kfree(iommu);
return NULL;
}
static void domain_exit(struct dmar_domain *domain); static void domain_exit(struct dmar_domain *domain);
static void free_iommu(struct intel_iommu *iommu)
void free_dmar_iommu(struct intel_iommu *iommu)
{ {
struct dmar_domain *domain; struct dmar_domain *domain;
int i; int i;
if (!iommu)
return;
i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
for (; i < cap_ndoms(iommu->cap); ) { for (; i < cap_ndoms(iommu->cap); ) {
domain = iommu->domains[i]; domain = iommu->domains[i];
...@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu) ...@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu)
/* free context mapping */ /* free context mapping */
free_context_table(iommu); free_context_table(iommu);
if (iommu->reg)
iounmap(iommu->reg);
kfree(iommu);
} }
static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
...@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev) ...@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev)
return NULL; return NULL;
} }
static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
struct pci_dev *dev)
{
int index;
while (dev) {
for (index = 0; index < cnt; index++)
if (dev == devices[index])
return 1;
/* Check our parent */
dev = dev->bus->self;
}
return 0;
}
static struct dmar_drhd_unit *
dmar_find_matched_drhd_unit(struct pci_dev *dev)
{
struct dmar_drhd_unit *drhd = NULL;
list_for_each_entry(drhd, &dmar_drhd_units, list) {
if (drhd->include_all || dmar_pci_device_match(drhd->devices,
drhd->devices_cnt, dev))
return drhd;
}
return NULL;
}
/* domain is initialized */ /* domain is initialized */
static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
{ {
...@@ -1729,8 +1625,6 @@ int __init init_dmars(void) ...@@ -1729,8 +1625,6 @@ int __init init_dmars(void)
* endfor * endfor
*/ */
for_each_drhd_unit(drhd) { for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
g_num_of_iommus++; g_num_of_iommus++;
/* /*
* lock not needed as this is only incremented in the single * lock not needed as this is only incremented in the single
...@@ -1739,12 +1633,6 @@ int __init init_dmars(void) ...@@ -1739,12 +1633,6 @@ int __init init_dmars(void)
*/ */
} }
g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
if (!g_iommus) {
ret = -ENOMEM;
goto error;
}
deferred_flush = kzalloc(g_num_of_iommus * deferred_flush = kzalloc(g_num_of_iommus *
sizeof(struct deferred_flush_tables), GFP_KERNEL); sizeof(struct deferred_flush_tables), GFP_KERNEL);
if (!deferred_flush) { if (!deferred_flush) {
...@@ -1752,16 +1640,15 @@ int __init init_dmars(void) ...@@ -1752,16 +1640,15 @@ int __init init_dmars(void)
goto error; goto error;
} }
i = 0;
for_each_drhd_unit(drhd) { for_each_drhd_unit(drhd) {
if (drhd->ignored) if (drhd->ignored)
continue; continue;
iommu = alloc_iommu(&g_iommus[i], drhd);
i++; iommu = drhd->iommu;
if (!iommu) {
ret = -ENOMEM; ret = iommu_init_domains(iommu);
if (ret)
goto error; goto error;
}
/* /*
* TBD: * TBD:
...@@ -1845,7 +1732,6 @@ int __init init_dmars(void) ...@@ -1845,7 +1732,6 @@ int __init init_dmars(void)
iommu = drhd->iommu; iommu = drhd->iommu;
free_iommu(iommu); free_iommu(iommu);
} }
kfree(g_iommus);
return ret; return ret;
} }
...@@ -2002,7 +1888,10 @@ static void flush_unmaps(void) ...@@ -2002,7 +1888,10 @@ static void flush_unmaps(void)
/* just flush them all */ /* just flush them all */
for (i = 0; i < g_num_of_iommus; i++) { for (i = 0; i < g_num_of_iommus; i++) {
if (deferred_flush[i].next) { if (deferred_flush[i].next) {
iommu_flush_iotlb_global(&g_iommus[i], 0); struct intel_iommu *iommu =
deferred_flush[i].domain[0]->iommu;
iommu_flush_iotlb_global(iommu, 0);
for (j = 0; j < deferred_flush[i].next; j++) { for (j = 0; j < deferred_flush[i].next; j++) {
__free_iova(&deferred_flush[i].domain[j]->iovad, __free_iova(&deferred_flush[i].domain[j]->iovad,
deferred_flush[i].iova[j]); deferred_flush[i].iova[j]);
...@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) ...@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
if (list_size == HIGH_WATER_MARK) if (list_size == HIGH_WATER_MARK)
flush_unmaps(); flush_unmaps();
iommu_id = dom->iommu - g_iommus; iommu_id = dom->iommu->seq_id;
next = deferred_flush[iommu_id].next; next = deferred_flush[iommu_id].next;
deferred_flush[iommu_id].domain[next] = dom; deferred_flush[iommu_id].domain[next] = dom;
deferred_flush[iommu_id].iova[next] = iova; deferred_flush[iommu_id].iova[next] = iova;
...@@ -2348,15 +2238,6 @@ static void __init iommu_exit_mempool(void) ...@@ -2348,15 +2238,6 @@ static void __init iommu_exit_mempool(void)
} }
void __init detect_intel_iommu(void)
{
if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
return;
if (early_dmar_detect()) {
iommu_detected = 1;
}
}
static void __init init_no_remapping_devices(void) static void __init init_no_remapping_devices(void)
{ {
struct dmar_drhd_unit *drhd; struct dmar_drhd_unit *drhd;
...@@ -2403,10 +2284,17 @@ int __init intel_iommu_init(void) ...@@ -2403,10 +2284,17 @@ int __init intel_iommu_init(void)
{ {
int ret = 0; int ret = 0;
if (no_iommu || swiotlb || dmar_disabled) if (dmar_table_init())
return -ENODEV; return -ENODEV;
if (dmar_table_init()) if (dmar_dev_scope_init())
return -ENODEV;
/*
* Check the need for DMA-remapping initialization now.
* Above initialization will also be used by Interrupt-remapping.
*/
if (no_iommu || swiotlb || dmar_disabled)
return -ENODEV; return -ENODEV;
iommu_init_mempool(); iommu_init_mempool();
......
...@@ -27,19 +27,8 @@ ...@@ -27,19 +27,8 @@
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include "iova.h" #include "iova.h"
#include <linux/io.h> #include <linux/io.h>
#include <asm/cacheflush.h>
/* #include "dma_remapping.h"
* We need a fixed PAGE_SIZE of 4K irrespective of
* arch PAGE_SIZE for IOMMU page tables.
*/
#define PAGE_SHIFT_4K (12)
#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
/* /*
* Intel IOMMU register specification per version 1.0 public spec. * Intel IOMMU register specification per version 1.0 public spec.
...@@ -63,6 +52,11 @@ ...@@ -63,6 +52,11 @@
#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
#define OFFSET_STRIDE (9) #define OFFSET_STRIDE (9)
/* /*
...@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) ...@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define ecap_max_iotlb_offset(e) \ #define ecap_max_iotlb_offset(e) \
(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
#define ecap_coherent(e) ((e) & 0x1) #define ecap_coherent(e) ((e) & 0x1)
#define ecap_qis(e) ((e) & 0x2)
#define ecap_eim_support(e) ((e >> 4) & 0x1)
#define ecap_ir_support(e) ((e >> 3) & 0x1)
#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
/* IOTLB_REG */ /* IOTLB_REG */
...@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) ...@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) #define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
#define DMA_TLB_MAX_SIZE (0x3f) #define DMA_TLB_MAX_SIZE (0x3f)
/* INVALID_DESC */
#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
#define DMA_ID_TLB_ADDR(addr) (addr)
#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
/* PMEN_REG */ /* PMEN_REG */
#define DMA_PMEN_EPM (((u32)1)<<31) #define DMA_PMEN_EPM (((u32)1)<<31)
#define DMA_PMEN_PRS (((u32)1)<<0) #define DMA_PMEN_PRS (((u32)1)<<0)
...@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) ...@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_SFL (((u32)1) << 29)
#define DMA_GCMD_EAFL (((u32)1) << 28) #define DMA_GCMD_EAFL (((u32)1) << 28)
#define DMA_GCMD_WBF (((u32)1) << 27) #define DMA_GCMD_WBF (((u32)1) << 27)
#define DMA_GCMD_QIE (((u32)1) << 26)
#define DMA_GCMD_SIRTP (((u32)1) << 24)
#define DMA_GCMD_IRE (((u32) 1) << 25)
/* GSTS_REG */ /* GSTS_REG */
#define DMA_GSTS_TES (((u32)1) << 31) #define DMA_GSTS_TES (((u32)1) << 31)
...@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) ...@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_FLS (((u32)1) << 29)
#define DMA_GSTS_AFLS (((u32)1) << 28) #define DMA_GSTS_AFLS (((u32)1) << 28)
#define DMA_GSTS_WBFS (((u32)1) << 27) #define DMA_GSTS_WBFS (((u32)1) << 27)
#define DMA_GSTS_QIES (((u32)1) << 26)
#define DMA_GSTS_IRTPS (((u32)1) << 24)
#define DMA_GSTS_IRES (((u32)1) << 25)
/* CCMD_REG */ /* CCMD_REG */
#define DMA_CCMD_ICC (((u64)1) << 63) #define DMA_CCMD_ICC (((u64)1) << 63)
...@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) ...@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define dma_frcd_source_id(c) (c & 0xffff) #define dma_frcd_source_id(c) (c & 0xffff)
#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
/* #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
* 0: Present
* 1-11: Reserved #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
* 12-63: Context Ptr (12 - (haw-1)) {\
* 64-127: Reserved cycles_t start_time = get_cycles();\
*/ while (1) {\
struct root_entry { sts = op (iommu->reg + offset);\
u64 val; if (cond)\
u64 rsvd1; break;\
}; if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) panic("DMAR hardware is malfunctioning\n");\
static inline bool root_present(struct root_entry *root) cpu_relax();\
{ }\
return (root->val & 1);
}
static inline void set_root_present(struct root_entry *root)
{
root->val |= 1;
}
static inline void set_root_value(struct root_entry *root, unsigned long value)
{
root->val |= value & PAGE_MASK_4K;
} }
struct context_entry; #define QI_LENGTH 256 /* queue length */
static inline struct context_entry *
get_context_addr_from_root(struct root_entry *root)
{
return (struct context_entry *)
(root_present(root)?phys_to_virt(
root->val & PAGE_MASK_4K):
NULL);
}
/*
* low 64 bits:
* 0: present
* 1: fault processing disable
* 2-3: translation type
* 12-63: address space root
* high 64 bits:
* 0-2: address width
* 3-6: aval
* 8-23: domain id
*/
struct context_entry {
u64 lo;
u64 hi;
};
#define context_present(c) ((c).lo & 1)
#define context_fault_disable(c) (((c).lo >> 1) & 1)
#define context_translation_type(c) (((c).lo >> 2) & 3)
#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
#define context_address_width(c) ((c).hi & 7)
#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
#define context_set_present(c) do {(c).lo |= 1;} while (0)
#define context_set_fault_enable(c) \
do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
#define context_set_translation_type(c, val) \
do { \
(c).lo &= (((u64)-1) << 4) | 3; \
(c).lo |= ((val) & 3) << 2; \
} while (0)
#define CONTEXT_TT_MULTI_LEVEL 0
#define context_set_address_root(c, val) \
do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
#define context_set_domain_id(c, val) \
do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
/* enum {
* 0: readable QI_FREE,
* 1: writable QI_IN_USE,
* 2-6: reserved QI_DONE
* 7: super page
* 8-11: available
* 12-63: Host physcial address
*/
struct dma_pte {
u64 val;
}; };
#define dma_clear_pte(p) do {(p).val = 0;} while (0)
#define DMA_PTE_READ (1)
#define DMA_PTE_WRITE (2)
#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) #define QI_CC_TYPE 0x1
#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) #define QI_IOTLB_TYPE 0x2
#define dma_set_pte_prot(p, prot) \ #define QI_DIOTLB_TYPE 0x3
do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) #define QI_IEC_TYPE 0x4
#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) #define QI_IWD_TYPE 0x5
#define dma_set_pte_addr(p, addr) do {\
(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
#define dma_pte_present(p) (((p).val & 3) != 0)
struct intel_iommu; #define QI_IEC_SELECTIVE (((u64)1) << 4)
#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
struct dmar_domain { #define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
int id; /* domain id */ #define QI_IWD_STATUS_WRITE (((u64)1) << 5)
struct intel_iommu *iommu; /* back pointer to owning iommu */
struct list_head devices; /* all devices' list */ struct qi_desc {
struct iova_domain iovad; /* iova's that belong to this domain */ u64 low, high;
};
struct dma_pte *pgd; /* virtual address */ struct q_inval {
spinlock_t mapping_lock; /* page table lock */ spinlock_t q_lock;
int gaw; /* max guest address width */ struct qi_desc *desc; /* invalidation queue */
int *desc_status; /* desc status */
int free_head; /* first free entry */
int free_tail; /* last free entry */
int free_cnt;
};
/* adjusted guest address width, 0 is level 2 30-bit */ #ifdef CONFIG_INTR_REMAP
int agaw; /* 1MB - maximum possible interrupt remapping table size */
#define INTR_REMAP_PAGE_ORDER 8
#define INTR_REMAP_TABLE_REG_SIZE 0xf
#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 #define INTR_REMAP_TABLE_ENTRIES 65536
int flags;
};
/* PCI domain-device relationship */ struct ir_table {
struct device_domain_info { struct irte *base;
struct list_head link; /* link to domain siblings */
struct list_head global; /* link to global list */
u8 bus; /* PCI bus numer */
u8 devfn; /* PCI devfn number */
struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
struct dmar_domain *domain; /* pointer to domain */
}; };
#endif
extern int init_dmars(void);
struct intel_iommu { struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */ void __iomem *reg; /* Pointer to hardware regs, virtual addr */
u64 cap; u64 cap;
u64 ecap; u64 ecap;
unsigned long *domain_ids; /* bitmap of domains */
struct dmar_domain **domains; /* ptr to domains */
int seg; int seg;
u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
spinlock_t lock; /* protect context, domain ids */
spinlock_t register_lock; /* protect register handling */ spinlock_t register_lock; /* protect register handling */
int seq_id; /* sequence id of the iommu */
#ifdef CONFIG_DMAR
unsigned long *domain_ids; /* bitmap of domains */
struct dmar_domain **domains; /* ptr to domains */
spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */ struct root_entry *root_entry; /* virtual address */
unsigned int irq; unsigned int irq;
unsigned char name[7]; /* Device Name */ unsigned char name[7]; /* Device Name */
struct msi_msg saved_msg; struct msi_msg saved_msg;
struct sys_device sysdev; struct sys_device sysdev;
#endif
struct q_inval *qi; /* Queued invalidation info */
#ifdef CONFIG_INTR_REMAP
struct ir_table *ir_table; /* Interrupt remapping info */
#endif
}; };
#ifndef CONFIG_DMAR_GFX_WA static inline void __iommu_flush_cache(
static inline void iommu_prepare_gfx_mapping(void) struct intel_iommu *iommu, void *addr, int size)
{ {
return; if (!ecap_coherent(iommu->ecap))
clflush_cache_range(addr, size);
} }
#endif /* !CONFIG_DMAR_GFX_WA */
extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
extern int alloc_iommu(struct dmar_drhd_unit *drhd);
extern void free_iommu(struct intel_iommu *iommu);
extern int dmar_enable_qi(struct intel_iommu *iommu);
extern void qi_global_iec(struct intel_iommu *iommu);
extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
#endif #endif
#include <linux/dmar.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
#include <linux/pci.h>
#include <linux/irq.h>
#include <asm/io_apic.h>
#include "intel-iommu.h"
#include "intr_remapping.h"
static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
static int ir_ioapic_num;
int intr_remapping_enabled;
static struct {
struct intel_iommu *iommu;
u16 irte_index;
u16 sub_handle;
u8 irte_mask;
} irq_2_iommu[NR_IRQS];
static DEFINE_SPINLOCK(irq_2_ir_lock);
int irq_remapped(int irq)
{
if (irq > NR_IRQS)
return 0;
if (!irq_2_iommu[irq].iommu)
return 0;
return 1;
}
int get_irte(int irq, struct irte *entry)
{
int index;
if (!entry || irq > NR_IRQS)
return -1;
spin_lock(&irq_2_ir_lock);
if (!irq_2_iommu[irq].iommu) {
spin_unlock(&irq_2_ir_lock);
return -1;
}
index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
*entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
spin_unlock(&irq_2_ir_lock);
return 0;
}
int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
{
struct ir_table *table = iommu->ir_table;
u16 index, start_index;
unsigned int mask = 0;
int i;
if (!count)
return -1;
/*
* start the IRTE search from index 0.
*/
index = start_index = 0;
if (count > 1) {
count = __roundup_pow_of_two(count);
mask = ilog2(count);
}
if (mask > ecap_max_handle_mask(iommu->ecap)) {
printk(KERN_ERR
"Requested mask %x exceeds the max invalidation handle"
" mask value %Lx\n", mask,
ecap_max_handle_mask(iommu->ecap));
return -1;
}
spin_lock(&irq_2_ir_lock);
do {
for (i = index; i < index + count; i++)
if (table->base[i].present)
break;
/* empty index found */
if (i == index + count)
break;
index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
if (index == start_index) {
spin_unlock(&irq_2_ir_lock);
printk(KERN_ERR "can't allocate an IRTE\n");
return -1;
}
} while (1);
for (i = index; i < index + count; i++)
table->base[i].present = 1;
irq_2_iommu[irq].iommu = iommu;
irq_2_iommu[irq].irte_index = index;
irq_2_iommu[irq].sub_handle = 0;
irq_2_iommu[irq].irte_mask = mask;
spin_unlock(&irq_2_ir_lock);
return index;
}
static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
{
struct qi_desc desc;
desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
| QI_IEC_SELECTIVE;
desc.high = 0;
qi_submit_sync(&desc, iommu);
}
int map_irq_to_irte_handle(int irq, u16 *sub_handle)
{
int index;
spin_lock(&irq_2_ir_lock);
if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
spin_unlock(&irq_2_ir_lock);
return -1;
}
*sub_handle = irq_2_iommu[irq].sub_handle;
index = irq_2_iommu[irq].irte_index;
spin_unlock(&irq_2_ir_lock);
return index;
}
int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
{
spin_lock(&irq_2_ir_lock);
if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
spin_unlock(&irq_2_ir_lock);
return -1;
}
irq_2_iommu[irq].iommu = iommu;
irq_2_iommu[irq].irte_index = index;
irq_2_iommu[irq].sub_handle = subhandle;
irq_2_iommu[irq].irte_mask = 0;
spin_unlock(&irq_2_ir_lock);
return 0;
}
int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
{
spin_lock(&irq_2_ir_lock);
if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
spin_unlock(&irq_2_ir_lock);
return -1;
}
irq_2_iommu[irq].iommu = NULL;
irq_2_iommu[irq].irte_index = 0;
irq_2_iommu[irq].sub_handle = 0;
irq_2_iommu[irq].irte_mask = 0;
spin_unlock(&irq_2_ir_lock);
return 0;
}
int modify_irte(int irq, struct irte *irte_modified)
{
int index;
struct irte *irte;
struct intel_iommu *iommu;
spin_lock(&irq_2_ir_lock);
if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
spin_unlock(&irq_2_ir_lock);
return -1;
}
iommu = irq_2_iommu[irq].iommu;
index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
irte = &iommu->ir_table->base[index];
set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
__iommu_flush_cache(iommu, irte, sizeof(*irte));
qi_flush_iec(iommu, index, 0);
spin_unlock(&irq_2_ir_lock);
return 0;
}
int flush_irte(int irq)
{
int index;
struct intel_iommu *iommu;
spin_lock(&irq_2_ir_lock);
if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
spin_unlock(&irq_2_ir_lock);
return -1;
}
iommu = irq_2_iommu[irq].iommu;
index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
spin_unlock(&irq_2_ir_lock);
return 0;
}
struct intel_iommu *map_ioapic_to_ir(int apic)
{
int i;
for (i = 0; i < MAX_IO_APICS; i++)
if (ir_ioapic[i].id == apic)
return ir_ioapic[i].iommu;
return NULL;
}
struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
{
struct dmar_drhd_unit *drhd;
drhd = dmar_find_matched_drhd_unit(dev);
if (!drhd)
return NULL;
return drhd->iommu;
}
int free_irte(int irq)
{
int index, i;
struct irte *irte;
struct intel_iommu *iommu;
spin_lock(&irq_2_ir_lock);
if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
spin_unlock(&irq_2_ir_lock);
return -1;
}
iommu = irq_2_iommu[irq].iommu;
index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
irte = &iommu->ir_table->base[index];
if (!irq_2_iommu[irq].sub_handle) {
for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
set_64bit((unsigned long *)irte, 0);
qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
}
irq_2_iommu[irq].iommu = NULL;
irq_2_iommu[irq].irte_index = 0;
irq_2_iommu[irq].sub_handle = 0;
irq_2_iommu[irq].irte_mask = 0;
spin_unlock(&irq_2_ir_lock);
return 0;
}
static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
{
u64 addr;
u32 cmd, sts;
unsigned long flags;
addr = virt_to_phys((void *)iommu->ir_table->base);
spin_lock_irqsave(&iommu->register_lock, flags);
dmar_writeq(iommu->reg + DMAR_IRTA_REG,
(addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
/* Set interrupt-remapping table pointer */
cmd = iommu->gcmd | DMA_GCMD_SIRTP;
writel(cmd, iommu->reg + DMAR_GCMD_REG);
IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
readl, (sts & DMA_GSTS_IRTPS), sts);
spin_unlock_irqrestore(&iommu->register_lock, flags);
/*
* global invalidation of interrupt entry cache before enabling
* interrupt-remapping.
*/
qi_global_iec(iommu);
spin_lock_irqsave(&iommu->register_lock, flags);
/* Enable interrupt-remapping */
cmd = iommu->gcmd | DMA_GCMD_IRE;
iommu->gcmd |= DMA_GCMD_IRE;
writel(cmd, iommu->reg + DMAR_GCMD_REG);
IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
readl, (sts & DMA_GSTS_IRES), sts);
spin_unlock_irqrestore(&iommu->register_lock, flags);
}
static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
{
struct ir_table *ir_table;
struct page *pages;
ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
GFP_KERNEL);
if (!iommu->ir_table)
return -ENOMEM;
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
if (!pages) {
printk(KERN_ERR "failed to allocate pages of order %d\n",
INTR_REMAP_PAGE_ORDER);
kfree(iommu->ir_table);
return -ENOMEM;
}
ir_table->base = page_address(pages);
iommu_set_intr_remapping(iommu, mode);
return 0;
}
int __init enable_intr_remapping(int eim)
{
struct dmar_drhd_unit *drhd;
int setup = 0;
/*
* check for the Interrupt-remapping support
*/
for_each_drhd_unit(drhd) {
struct intel_iommu *iommu = drhd->iommu;
if (!ecap_ir_support(iommu->ecap))
continue;
if (eim && !ecap_eim_support(iommu->ecap)) {
printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
" ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
return -1;
}
}
/*
* Enable queued invalidation for all the DRHD's.
*/
for_each_drhd_unit(drhd) {
int ret;
struct intel_iommu *iommu = drhd->iommu;
ret = dmar_enable_qi(iommu);
if (ret) {
printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
" invalidation, ecap %Lx, ret %d\n",
drhd->reg_base_addr, iommu->ecap, ret);
return -1;
}
}
/*
* Setup Interrupt-remapping for all the DRHD's now.
*/
for_each_drhd_unit(drhd) {
struct intel_iommu *iommu = drhd->iommu;
if (!ecap_ir_support(iommu->ecap))
continue;
if (setup_intr_remapping(iommu, eim))
goto error;
setup = 1;
}
if (!setup)
goto error;
intr_remapping_enabled = 1;
return 0;
error:
/*
* handle error condition gracefully here!
*/
return -1;
}
static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
struct intel_iommu *iommu)
{
struct acpi_dmar_hardware_unit *drhd;
struct acpi_dmar_device_scope *scope;
void *start, *end;
drhd = (struct acpi_dmar_hardware_unit *)header;
start = (void *)(drhd + 1);
end = ((void *)drhd) + header->length;
while (start < end) {
scope = start;
if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
if (ir_ioapic_num == MAX_IO_APICS) {
printk(KERN_WARNING "Exceeded Max IO APICS\n");
return -1;
}
printk(KERN_INFO "IOAPIC id %d under DRHD base"
" 0x%Lx\n", scope->enumeration_id,
drhd->address);
ir_ioapic[ir_ioapic_num].iommu = iommu;
ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
ir_ioapic_num++;
}
start += scope->length;
}
return 0;
}
/*
* Finds the assocaition between IOAPIC's and its Interrupt-remapping
* hardware unit.
*/
int __init parse_ioapics_under_ir(void)
{
struct dmar_drhd_unit *drhd;
int ir_supported = 0;
for_each_drhd_unit(drhd) {
struct intel_iommu *iommu = drhd->iommu;
if (ecap_ir_support(iommu->ecap)) {
if (ir_parse_ioapic_scope(drhd->hdr, iommu))
return -1;
ir_supported = 1;
}
}
if (ir_supported && ir_ioapic_num != nr_ioapics) {
printk(KERN_WARNING
"Not all IO-APIC's listed under remapping hardware\n");
return -1;
}
return ir_supported;
}
#include "intel-iommu.h"
struct ioapic_scope {
struct intel_iommu *iommu;
unsigned int id;
};
#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include <asm/apicdef.h> #include <asm/apicdef.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1 #define ARCH_APICTIMER_STOPS_ON_C3 1
...@@ -47,15 +49,13 @@ extern int disable_apic; ...@@ -47,15 +49,13 @@ extern int disable_apic;
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h> #include <asm/paravirt.h>
#else #else
#define apic_write native_apic_write
#define apic_read native_apic_read
#define setup_boot_clock setup_boot_APIC_clock #define setup_boot_clock setup_boot_APIC_clock
#define setup_secondary_clock setup_secondary_APIC_clock #define setup_secondary_clock setup_secondary_APIC_clock
#endif #endif
extern int is_vsmp_box(void); extern int is_vsmp_box(void);
static inline void native_apic_write(unsigned long reg, u32 v) static inline void native_apic_mem_write(u32 reg, u32 v)
{ {
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
...@@ -64,15 +64,68 @@ static inline void native_apic_write(unsigned long reg, u32 v) ...@@ -64,15 +64,68 @@ static inline void native_apic_write(unsigned long reg, u32 v)
ASM_OUTPUT2("0" (v), "m" (*addr))); ASM_OUTPUT2("0" (v), "m" (*addr)));
} }
static inline u32 native_apic_read(unsigned long reg) static inline u32 native_apic_mem_read(u32 reg)
{ {
return *((volatile u32 *)(APIC_BASE + reg)); return *((volatile u32 *)(APIC_BASE + reg));
} }
extern void apic_wait_icr_idle(void); static inline void native_apic_msr_write(u32 reg, u32 v)
extern u32 safe_apic_wait_icr_idle(void); {
if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
reg == APIC_LVR)
return;
wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
}
static inline u32 native_apic_msr_read(u32 reg)
{
u32 low, high;
if (reg == APIC_DFR)
return -1;
rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
return low;
}
#ifndef CONFIG_X86_32
extern int x2apic, x2apic_preenabled;
extern void check_x2apic(void);
extern void enable_x2apic(void);
extern void enable_IR_x2apic(void);
extern void x2apic_icr_write(u32 low, u32 id);
#endif
struct apic_ops {
u32 (*read)(u32 reg);
void (*write)(u32 reg, u32 v);
u64 (*icr_read)(void);
void (*icr_write)(u32 low, u32 high);
void (*wait_icr_idle)(void);
u32 (*safe_wait_icr_idle)(void);
};
extern struct apic_ops *apic_ops;
#define apic_read (apic_ops->read)
#define apic_write (apic_ops->write)
#define apic_icr_read (apic_ops->icr_read)
#define apic_icr_write (apic_ops->icr_write)
#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
extern int get_physical_broadcast(void); extern int get_physical_broadcast(void);
#ifdef CONFIG_X86_64
static inline void ack_x2APIC_irq(void)
{
/* Docs say use 0 for future compatibility */
native_apic_msr_write(APIC_EOI, 0);
}
#endif
static inline void ack_APIC_irq(void) static inline void ack_APIC_irq(void)
{ {
/* /*
......
...@@ -105,6 +105,7 @@ ...@@ -105,6 +105,7 @@
#define APIC_TMICT 0x380 #define APIC_TMICT 0x380
#define APIC_TMCCT 0x390 #define APIC_TMCCT 0x390
#define APIC_TDCR 0x3E0 #define APIC_TDCR 0x3E0
#define APIC_SELF_IPI 0x3F0
#define APIC_TDR_DIV_TMBASE (1 << 2) #define APIC_TDR_DIV_TMBASE (1 << 2)
#define APIC_TDR_DIV_1 0xB #define APIC_TDR_DIV_1 0xB
#define APIC_TDR_DIV_2 0x0 #define APIC_TDR_DIV_2 0x0
...@@ -128,6 +129,8 @@ ...@@ -128,6 +129,8 @@
#define APIC_EILVT3 0x530 #define APIC_EILVT3 0x530
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
#define APIC_BASE_MSR 0x800
#define X2APIC_ENABLE (1UL << 10)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
# define MAX_IO_APICS 64 # define MAX_IO_APICS 64
......
...@@ -91,6 +91,7 @@ ...@@ -91,6 +91,7 @@
#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ #define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
...@@ -189,6 +190,7 @@ extern const char * const x86_power_flags[32]; ...@@ -189,6 +190,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1 # define cpu_has_invlpg 1
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
struct genapic { struct genapic {
char *name; char *name;
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
u32 int_delivery_mode; u32 int_delivery_mode;
u32 int_dest_mode; u32 int_dest_mode;
int (*apic_id_registered)(void); int (*apic_id_registered)(void);
...@@ -24,17 +25,24 @@ struct genapic { ...@@ -24,17 +25,24 @@ struct genapic {
void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_mask)(cpumask_t mask, int vector);
void (*send_IPI_allbutself)(int vector); void (*send_IPI_allbutself)(int vector);
void (*send_IPI_all)(int vector); void (*send_IPI_all)(int vector);
void (*send_IPI_self)(int vector);
/* */ /* */
unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
unsigned int (*phys_pkg_id)(int index_msb); unsigned int (*phys_pkg_id)(int index_msb);
unsigned int (*get_apic_id)(unsigned long x);
unsigned long (*set_apic_id)(unsigned int id);
unsigned long apic_id_mask;
}; };
extern struct genapic *genapic; extern struct genapic *genapic;
extern struct genapic apic_flat; extern struct genapic apic_flat;
extern struct genapic apic_physflat; extern struct genapic apic_physflat;
extern struct genapic apic_x2apic_cluster;
extern struct genapic apic_x2apic_phys;
extern int acpi_madt_oem_check(char *, char *); extern int acpi_madt_oem_check(char *, char *);
extern void apic_send_IPI_self(int vector);
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
extern enum uv_system_type get_uv_system_type(void); extern enum uv_system_type get_uv_system_type(void);
extern int is_uv_system(void); extern int is_uv_system(void);
......
...@@ -73,7 +73,9 @@ extern void enable_IO_APIC(void); ...@@ -73,7 +73,9 @@ extern void enable_IO_APIC(void);
#endif #endif
/* IPI functions */ /* IPI functions */
#ifdef CONFIG_X86_32
extern void send_IPI_self(int vector); extern void send_IPI_self(int vector);
#endif
extern void send_IPI(int dest, int vector); extern void send_IPI(int dest, int vector);
/* Statistics */ /* Statistics */
......
...@@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port) ...@@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port)
extern struct irq_chip i8259A_chip; extern struct irq_chip i8259A_chip;
extern void mask_8259A(void);
extern void unmask_8259A(void);
#endif /* ASM_X86__I8259_H */ #endif /* ASM_X86__I8259_H */
...@@ -107,6 +107,20 @@ struct IO_APIC_route_entry { ...@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
} __attribute__ ((packed)); } __attribute__ ((packed));
struct IR_IO_APIC_route_entry {
__u64 vector : 8,
zero : 3,
index2 : 1,
delivery_status : 1,
polarity : 1,
irr : 1,
trigger : 1,
mask : 1,
reserved : 31,
format : 1,
index : 15;
} __attribute__ ((packed));
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
/* /*
...@@ -183,6 +197,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, ...@@ -183,6 +197,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern int (*ioapic_renumber_irq)(int ioapic, int irq);
extern void ioapic_init_mappings(void); extern void ioapic_init_mappings(void);
#ifdef CONFIG_X86_64
extern int save_mask_IO_APIC_setup(void);
extern void restore_IO_APIC_setup(void);
extern void reinit_intr_remapped_IO_APIC(int);
#endif
#else /* !CONFIG_X86_IO_APIC */ #else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0 #define io_apic_assign_pci_irqs 0
static const int timer_through_8259 = 0; static const int timer_through_8259 = 0;
......
...@@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask) ...@@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask)
return SET_APIC_DEST_FIELD(mask); return SET_APIC_DEST_FIELD(mask);
} }
static inline void __xapic_wait_icr_idle(void)
{
while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
unsigned int dest) unsigned int dest)
{ {
...@@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, ...@@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
/* /*
* Wait for idle. * Wait for idle.
*/ */
apic_wait_icr_idle(); __xapic_wait_icr_idle();
/* /*
* No need to touch the target chip field * No need to touch the target chip field
...@@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, ...@@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
/* /*
* Send the IPI. The write to APIC_ICR fires this off. * Send the IPI. The write to APIC_ICR fires this off.
*/ */
apic_write(APIC_ICR, cfg); native_apic_mem_write(APIC_ICR, cfg);
} }
/* /*
...@@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, ...@@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
if (unlikely(vector == NMI_VECTOR)) if (unlikely(vector == NMI_VECTOR))
safe_apic_wait_icr_idle(); safe_apic_wait_icr_idle();
else else
apic_wait_icr_idle(); __xapic_wait_icr_idle();
/* /*
* prepare target chip field * prepare target chip field
*/ */
cfg = __prepare_ICR2(mask); cfg = __prepare_ICR2(mask);
apic_write(APIC_ICR2, cfg); native_apic_mem_write(APIC_ICR2, cfg);
/* /*
* program the ICR * program the ICR
...@@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, ...@@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
/* /*
* Send the IPI. The write to APIC_ICR fires this off. * Send the IPI. The write to APIC_ICR fires this off.
*/ */
apic_write(APIC_ICR, cfg); native_apic_mem_write(APIC_ICR, cfg);
} }
static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
......
#ifndef _ASM_IRQ_REMAPPING_H
#define _ASM_IRQ_REMAPPING_H
extern int x2apic;
#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
#endif
...@@ -30,6 +30,8 @@ static inline cpumask_t target_cpus(void) ...@@ -30,6 +30,8 @@ static inline cpumask_t target_cpus(void)
#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
#define phys_pkg_id (genapic->phys_pkg_id) #define phys_pkg_id (genapic->phys_pkg_id)
#define vector_allocation_domain (genapic->vector_allocation_domain) #define vector_allocation_domain (genapic->vector_allocation_domain)
#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
#define send_IPI_self (genapic->send_IPI_self)
extern void setup_apic_routing(void); extern void setup_apic_routing(void);
#else #else
#define INT_DELIVERY_MODE dest_LowestPrio #define INT_DELIVERY_MODE dest_LowestPrio
...@@ -54,7 +56,7 @@ static inline void init_apic_ldr(void) ...@@ -54,7 +56,7 @@ static inline void init_apic_ldr(void)
static inline int apic_id_registered(void) static inline int apic_id_registered(void)
{ {
return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); return physid_isset(read_apic_id(), phys_cpu_present_map);
} }
static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
......
...@@ -4,9 +4,9 @@ ...@@ -4,9 +4,9 @@
#include <asm/apic.h> #include <asm/apic.h>
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define APIC_ID_MASK (0xFFu<<24) #define APIC_ID_MASK (genapic->apic_id_mask)
#define GET_APIC_ID(x) (((x)>>24)&0xFFu) #define GET_APIC_ID(x) (genapic->get_apic_id(x))
#define SET_APIC_ID(x) (((x)<<24)) #define SET_APIC_ID(x) (genapic->set_apic_id(x))
#else #else
#define APIC_ID_MASK (0xF<<24) #define APIC_ID_MASK (0xF<<24)
static inline unsigned get_apic_id(unsigned long x) static inline unsigned get_apic_id(unsigned long x)
......
...@@ -141,7 +141,7 @@ static inline void setup_portio_remap(void) ...@@ -141,7 +141,7 @@ static inline void setup_portio_remap(void)
extern unsigned int boot_cpu_physical_apicid; extern unsigned int boot_cpu_physical_apicid;
static inline int check_phys_apicid_present(int cpu_physical_apicid) static inline int check_phys_apicid_present(int cpu_physical_apicid)
{ {
boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); boot_cpu_physical_apicid = read_apic_id();
return (1); return (1);
} }
......
...@@ -48,4 +48,8 @@ ...@@ -48,4 +48,8 @@
#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
MSI_ADDR_DEST_ID_MASK) MSI_ADDR_DEST_ID_MASK)
#define MSI_ADDR_IR_EXT_INT (1 << 4)
#define MSI_ADDR_IR_SHV (1 << 3)
#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
#endif /* ASM_X86__MSIDEF_H */ #endif /* ASM_X86__MSIDEF_H */
...@@ -200,12 +200,6 @@ struct pv_irq_ops { ...@@ -200,12 +200,6 @@ struct pv_irq_ops {
struct pv_apic_ops { struct pv_apic_ops {
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
/*
* Direct APIC operations, principally for VMI. Ideally
* these shouldn't be in this interface.
*/
void (*apic_write)(unsigned long reg, u32 v);
u32 (*apic_read)(unsigned long reg);
void (*setup_boot_clock)(void); void (*setup_boot_clock)(void);
void (*setup_secondary_clock)(void); void (*setup_secondary_clock)(void);
...@@ -898,19 +892,6 @@ static inline void slow_down_io(void) ...@@ -898,19 +892,6 @@ static inline void slow_down_io(void)
} }
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
/*
* Basic functions accessing APICs.
*/
static inline void apic_write(unsigned long reg, u32 v)
{
PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
}
static inline u32 apic_read(unsigned long reg)
{
return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
}
static inline void setup_boot_clock(void) static inline void setup_boot_clock(void)
{ {
PVOP_VCALL0(pv_apic_ops.setup_boot_clock); PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
......
...@@ -165,30 +165,33 @@ extern int safe_smp_processor_id(void); ...@@ -165,30 +165,33 @@ extern int safe_smp_processor_id(void);
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
#ifndef CONFIG_X86_64
static inline int logical_smp_processor_id(void) static inline int logical_smp_processor_id(void)
{ {
/* we don't want to mark this access volatile - bad code generation */ /* we don't want to mark this access volatile - bad code generation */
return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
} }
#ifndef CONFIG_X86_64 #include <mach_apicdef.h>
static inline unsigned int read_apic_id(void) static inline unsigned int read_apic_id(void)
{ {
return *(u32 *)(APIC_BASE + APIC_ID); unsigned int reg;
reg = *(u32 *)(APIC_BASE + APIC_ID);
return GET_APIC_ID(reg);
} }
#else
extern unsigned int read_apic_id(void);
#endif #endif
# ifdef APIC_DEFINITION # if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
extern int hard_smp_processor_id(void); extern int hard_smp_processor_id(void);
# else # else
# include <mach_apicdef.h> #include <mach_apicdef.h>
static inline int hard_smp_processor_id(void) static inline int hard_smp_processor_id(void)
{ {
/* we don't want to mark this access volatile - bad code generation */ /* we don't want to mark this access volatile - bad code generation */
return GET_APIC_ID(read_apic_id()); return read_apic_id();
} }
# endif /* APIC_DEFINITION */ # endif /* APIC_DEFINITION */
......
...@@ -25,9 +25,99 @@ ...@@ -25,9 +25,99 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/msi.h> #include <linux/msi.h>
#ifdef CONFIG_DMAR #if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
struct intel_iommu; struct intel_iommu;
struct dmar_drhd_unit {
struct list_head list; /* list of drhd units */
struct acpi_dmar_header *hdr; /* ACPI header */
u64 reg_base_addr; /* register base address*/
struct pci_dev **devices; /* target device array */
int devices_cnt; /* target device count */
u8 ignored:1; /* ignore drhd */
u8 include_all:1;
struct intel_iommu *iommu;
};
extern struct list_head dmar_drhd_units;
#define for_each_drhd_unit(drhd) \
list_for_each_entry(drhd, &dmar_drhd_units, list)
extern int dmar_table_init(void);
extern int early_dmar_detect(void);
extern int dmar_dev_scope_init(void);
/* Intel IOMMU detection */
extern void detect_intel_iommu(void);
extern int parse_ioapics_under_ir(void);
extern int alloc_iommu(struct dmar_drhd_unit *);
#else
static inline void detect_intel_iommu(void)
{
return;
}
static inline int dmar_table_init(void)
{
return -ENODEV;
}
#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
#ifdef CONFIG_INTR_REMAP
extern int intr_remapping_enabled;
extern int enable_intr_remapping(int);
struct irte {
union {
struct {
__u64 present : 1,
fpd : 1,
dst_mode : 1,
redir_hint : 1,
trigger_mode : 1,
dlvry_mode : 3,
avail : 4,
__reserved_1 : 4,
vector : 8,
__reserved_2 : 8,
dest_id : 32;
};
__u64 low;
};
union {
struct {
__u64 sid : 16,
sq : 2,
svt : 2,
__reserved_3 : 44;
};
__u64 high;
};
};
extern int get_irte(int irq, struct irte *entry);
extern int modify_irte(int irq, struct irte *irte_modified);
extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
u16 sub_handle);
extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
extern int flush_irte(int irq);
extern int free_irte(int irq);
extern int irq_remapped(int irq);
extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
extern struct intel_iommu *map_ioapic_to_ir(int apic);
#else
#define irq_remapped(irq) (0)
#define enable_intr_remapping(mode) (-1)
#define intr_remapping_enabled (0)
#endif
#ifdef CONFIG_DMAR
extern const char *dmar_get_fault_reason(u8 fault_reason); extern const char *dmar_get_fault_reason(u8 fault_reason);
/* Can't use the common MSI interrupt functions /* Can't use the common MSI interrupt functions
...@@ -40,47 +130,30 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); ...@@ -40,47 +130,30 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg);
extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int dmar_set_interrupt(struct intel_iommu *iommu);
extern int arch_setup_dmar_msi(unsigned int irq); extern int arch_setup_dmar_msi(unsigned int irq);
/* Intel IOMMU detection and initialization functions */ extern int iommu_detected, no_iommu;
extern void detect_intel_iommu(void);
extern int intel_iommu_init(void);
extern int dmar_table_init(void);
extern int early_dmar_detect(void);
extern struct list_head dmar_drhd_units;
extern struct list_head dmar_rmrr_units; extern struct list_head dmar_rmrr_units;
struct dmar_drhd_unit {
struct list_head list; /* list of drhd units */
u64 reg_base_addr; /* register base address*/
struct pci_dev **devices; /* target device array */
int devices_cnt; /* target device count */
u8 ignored:1; /* ignore drhd */
u8 include_all:1;
struct intel_iommu *iommu;
};
struct dmar_rmrr_unit { struct dmar_rmrr_unit {
struct list_head list; /* list of rmrr units */ struct list_head list; /* list of rmrr units */
struct acpi_dmar_header *hdr; /* ACPI header */
u64 base_address; /* reserved base address*/ u64 base_address; /* reserved base address*/
u64 end_address; /* reserved end address */ u64 end_address; /* reserved end address */
struct pci_dev **devices; /* target devices */ struct pci_dev **devices; /* target devices */
int devices_cnt; /* target device count */ int devices_cnt; /* target device count */
}; };
#define for_each_drhd_unit(drhd) \
list_for_each_entry(drhd, &dmar_drhd_units, list)
#define for_each_rmrr_units(rmrr) \ #define for_each_rmrr_units(rmrr) \
list_for_each_entry(rmrr, &dmar_rmrr_units, list) list_for_each_entry(rmrr, &dmar_rmrr_units, list)
/* Intel DMAR initialization functions */
extern int intel_iommu_init(void);
extern int dmar_disabled;
#else #else
static inline void detect_intel_iommu(void)
{
return;
}
static inline int intel_iommu_init(void) static inline int intel_iommu_init(void)
{ {
#ifdef CONFIG_INTR_REMAP
return dmar_dev_scope_init();
#else
return -ENODEV; return -ENODEV;
#endif
} }
#endif /* !CONFIG_DMAR */ #endif /* !CONFIG_DMAR */
#endif /* __DMAR_H__ */ #endif /* __DMAR_H__ */
...@@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, ...@@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */
#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */
#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
#ifdef CONFIG_IRQ_PER_CPU #ifdef CONFIG_IRQ_PER_CPU
# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
......
...@@ -89,6 +89,13 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) ...@@ -89,6 +89,13 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
set_balance_irq_affinity(irq, cpumask); set_balance_irq_affinity(irq, cpumask);
#ifdef CONFIG_GENERIC_PENDING_IRQ #ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PCNTXT) {
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
desc->chip->set_affinity(irq, cpumask);
spin_unlock_irqrestore(&desc->lock, flags);
} else
set_pending_irq(irq, cpumask); set_pending_irq(irq, cpumask);
#else #else
desc->affinity = cpumask; desc->affinity = cpumask;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment