Commit 269b0123 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu: (89 commits)
  AMD IOMMU: remove now unnecessary #ifdefs
  AMD IOMMU: prealloc_protection_domains should be static
  kvm/iommu: fix compile warning
  AMD IOMMU: add statistics about total number of map requests
  AMD IOMMU: add statistics about allocated io memory
  AMD IOMMU: add stats counter for domain tlb flushes
  AMD IOMMU: add stats counter for single iommu domain tlb flushes
  AMD IOMMU: add stats counter for cross-page request
  AMD IOMMU: add stats counter for free_coherent requests
  AMD IOMMU: add stats counter for alloc_coherent requests
  AMD IOMMU: add stats counter for unmap_sg requests
  AMD IOMMU: add stats counter for map_sg requests
  AMD IOMMU: add stats counter for unmap_single requests
  AMD IOMMU: add stats counter for map_single requests
  AMD IOMMU: add stats counter for completion wait events
  AMD IOMMU: add init code for statistic collection
  AMD IOMMU: add necessary header defines for stats counting
  AMD IOMMU: add Kconfig entry for statistic collection code
  AMD IOMMU: use dev_name in iommu_enable function
  AMD IOMMU: use calc_devid in prealloc_protection_domains
  ...
parents f60a0a79 065a6d68
......@@ -687,3 +687,6 @@ config IRQ_PER_CPU
config IOMMU_HELPER
def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
config IOMMU_API
def_bool (DMAR)
......@@ -467,7 +467,7 @@ struct kvm_arch {
struct kvm_sal_data rdv_sal_data;
struct list_head assigned_dev_head;
struct dmar_domain *intel_iommu_domain;
struct iommu_domain *iommu_domain;
struct hlist_head irq_ack_notifier_list;
unsigned long irq_sources_bitmap;
......
......@@ -51,8 +51,8 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o)
ifeq ($(CONFIG_DMAR),y)
common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
ifeq ($(CONFIG_IOMMU_API),y)
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
endif
kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
......
......@@ -31,6 +31,7 @@
#include <linux/bitops.h>
#include <linux/hrtimer.h>
#include <linux/uaccess.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <asm/pgtable.h>
......@@ -188,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext)
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
case KVM_CAP_IOMMU:
r = intel_iommu_found();
r = iommu_found();
break;
default:
r = 0;
......
......@@ -586,6 +586,16 @@ config AMD_IOMMU
your BIOS for an option to enable it or if you have an IVRS ACPI
table.
config AMD_IOMMU_STATS
bool "Export AMD IOMMU statistics to debugfs"
depends on AMD_IOMMU
select DEBUG_FS
help
This option enables code in the AMD IOMMU driver to collect various
statistics about whats happening in the driver and exports that
information to userspace via debugfs.
If unsure, say N.
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
def_bool y if X86_64
......@@ -599,6 +609,9 @@ config SWIOTLB
config IOMMU_HELPER
def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
config IOMMU_API
def_bool (AMD_IOMMU || DMAR)
config MAXSMP
bool "Configure Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
......
......@@ -190,16 +190,23 @@
/* FIXME: move this macro to <linux/pci.h> */
#define PCI_BUS(x) (((x) >> 8) & 0xff)
/* Protection domain flags */
#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
domain for an IOMMU */
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
*/
struct protection_domain {
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
int mode; /* paging mode (0-6 levels) */
u64 *pt_root; /* page table root pointer */
void *priv; /* private data */
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
int mode; /* paging mode (0-6 levels) */
u64 *pt_root; /* page table root pointer */
unsigned long flags; /* flags to find out type of domain */
unsigned dev_cnt; /* devices assigned to this domain */
void *priv; /* private data */
};
/*
......@@ -295,7 +302,7 @@ struct amd_iommu {
bool int_enabled;
/* if one, we need to send a completion wait command */
int need_sync;
bool need_sync;
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
......@@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table;
extern unsigned long *amd_iommu_pd_alloc_bitmap;
/* will be 1 if device isolation is enabled */
extern int amd_iommu_isolate;
extern bool amd_iommu_isolate;
/*
* If true, the addresses will be flushed on unmap time, not when
......@@ -382,18 +389,6 @@ extern int amd_iommu_isolate;
*/
extern bool amd_iommu_unmap_flush;
/* takes a PCI device id and prints it out in a readable form */
static inline void print_devid(u16 devid, int nl)
{
int bus = devid >> 8;
int dev = devid >> 3 & 0x1f;
int fn = devid & 0x07;
printk("%02x:%02x.%x", bus, dev, fn);
if (nl)
printk("\n");
}
/* takes bus and device/function and returns the device id
* FIXME: should that be in generic PCI code? */
static inline u16 calc_devid(u8 bus, u8 devfn)
......@@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn)
return (((u16)bus) << 8) | devfn;
}
#ifdef CONFIG_AMD_IOMMU_STATS
struct __iommu_counter {
char *name;
struct dentry *dent;
u64 value;
};
#define DECLARE_STATS_COUNTER(nm) \
static struct __iommu_counter nm = { \
.name = #nm, \
}
#define INC_STATS_COUNTER(name) name.value += 1
#define ADD_STATS_COUNTER(name, x) name.value += (x)
#define SUB_STATS_COUNTER(name, x) name.value -= (x)
#else /* CONFIG_AMD_IOMMU_STATS */
#define DECLARE_STATS_COUNTER(name)
#define INC_STATS_COUNTER(name)
#define ADD_STATS_COUNTER(name, x)
#define SUB_STATS_COUNTER(name, x)
static inline void amd_iommu_stats_init(void) { }
#endif /* CONFIG_AMD_IOMMU_STATS */
#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
......@@ -360,7 +360,7 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
struct list_head oos_global_pages;
struct dmar_domain *intel_iommu_domain;
struct iommu_domain *iommu_domain;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
......
......@@ -20,8 +20,12 @@
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/bitops.h>
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
#ifdef CONFIG_IOMMU_API
#include <linux/iommu.h>
#endif
#include <asm/proto.h>
#include <asm/iommu.h>
#include <asm/gart.h>
......@@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
static LIST_HEAD(iommu_pd_list);
static DEFINE_SPINLOCK(iommu_pd_list_lock);
#ifdef CONFIG_IOMMU_API
static struct iommu_ops amd_iommu_ops;
#endif
/*
* general struct to manage commands send to an IOMMU
*/
......@@ -47,6 +55,68 @@ struct iommu_cmd {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
static struct dma_ops_domain *find_protection_domain(u16 devid);
#ifdef CONFIG_AMD_IOMMU_STATS
/*
* Initialization code for statistics collection
*/
DECLARE_STATS_COUNTER(compl_wait);
DECLARE_STATS_COUNTER(cnt_map_single);
DECLARE_STATS_COUNTER(cnt_unmap_single);
DECLARE_STATS_COUNTER(cnt_map_sg);
DECLARE_STATS_COUNTER(cnt_unmap_sg);
DECLARE_STATS_COUNTER(cnt_alloc_coherent);
DECLARE_STATS_COUNTER(cnt_free_coherent);
DECLARE_STATS_COUNTER(cross_page);
DECLARE_STATS_COUNTER(domain_flush_single);
DECLARE_STATS_COUNTER(domain_flush_all);
DECLARE_STATS_COUNTER(alloced_io_mem);
DECLARE_STATS_COUNTER(total_map_requests);
static struct dentry *stats_dir;
static struct dentry *de_isolate;
static struct dentry *de_fflush;
static void amd_iommu_stats_add(struct __iommu_counter *cnt)
{
if (stats_dir == NULL)
return;
cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
&cnt->value);
}
static void amd_iommu_stats_init(void)
{
stats_dir = debugfs_create_dir("amd-iommu", NULL);
if (stats_dir == NULL)
return;
de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
(u32 *)&amd_iommu_isolate);
de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
(u32 *)&amd_iommu_unmap_flush);
amd_iommu_stats_add(&compl_wait);
amd_iommu_stats_add(&cnt_map_single);
amd_iommu_stats_add(&cnt_unmap_single);
amd_iommu_stats_add(&cnt_map_sg);
amd_iommu_stats_add(&cnt_unmap_sg);
amd_iommu_stats_add(&cnt_alloc_coherent);
amd_iommu_stats_add(&cnt_free_coherent);
amd_iommu_stats_add(&cross_page);
amd_iommu_stats_add(&domain_flush_single);
amd_iommu_stats_add(&domain_flush_all);
amd_iommu_stats_add(&alloced_io_mem);
amd_iommu_stats_add(&total_map_requests);
}
#endif
/* returns !0 if the IOMMU is caching non-present entries in its TLB */
static int iommu_has_npcache(struct amd_iommu *iommu)
......@@ -189,12 +259,54 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command(iommu, cmd);
if (!ret)
iommu->need_sync = 1;
iommu->need_sync = true;
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
/*
* This function waits until an IOMMU has completed a completion
* wait command
*/
static void __iommu_wait_for_completion(struct amd_iommu *iommu)
{
int ready = 0;
unsigned status = 0;
unsigned long i = 0;
INC_STATS_COUNTER(compl_wait);
while (!ready && (i < EXIT_LOOP_COUNT)) {
++i;
/* wait for the bit to become one */
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
}
/* set bit back to zero */
status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
if (unlikely(i == EXIT_LOOP_COUNT))
panic("AMD IOMMU: Completion wait loop failed\n");
}
/*
* This function queues a completion wait command into the command
* buffer of an IOMMU
*/
static int __iommu_completion_wait(struct amd_iommu *iommu)
{
struct iommu_cmd cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
return __iommu_queue_command(iommu, &cmd);
}
/*
* This function is called whenever we need to ensure that the IOMMU has
* completed execution of all commands we sent. It sends a
......@@ -204,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
{
int ret = 0, ready = 0;
unsigned status = 0;
struct iommu_cmd cmd;
unsigned long flags, i = 0;
memset(&cmd, 0, sizeof(cmd));
cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
int ret = 0;
unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags);
if (!iommu->need_sync)
goto out;
iommu->need_sync = 0;
ret = __iommu_completion_wait(iommu);
ret = __iommu_queue_command(iommu, &cmd);
iommu->need_sync = false;
if (ret)
goto out;
while (!ready && (i < EXIT_LOOP_COUNT)) {
++i;
/* wait for the bit to become one */
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
}
/* set bit back to zero */
status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
if (unlikely(i == EXIT_LOOP_COUNT))
panic("AMD IOMMU: Completion wait loop failed\n");
__iommu_wait_for_completion(iommu);
out:
spin_unlock_irqrestore(&iommu->lock, flags);
......@@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
return ret;
}
static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
u16 domid, int pde, int s)
{
memset(cmd, 0, sizeof(*cmd));
address &= PAGE_MASK;
CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
cmd->data[1] |= domid;
cmd->data[2] = lower_32_bits(address);
cmd->data[3] = upper_32_bits(address);
if (s) /* size bit - we flush more than one 4kb page */
cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
}
/*
* Generic command send function for invalidaing TLB entries
*/
......@@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
struct iommu_cmd cmd;
int ret;
memset(&cmd, 0, sizeof(cmd));
address &= PAGE_MASK;
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
cmd.data[1] |= domid;
cmd.data[2] = lower_32_bits(address);
cmd.data[3] = upper_32_bits(address);
if (s) /* size bit - we flush more than one 4kb page */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
__iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
ret = iommu_queue_command(iommu, &cmd);
......@@ -321,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
{
u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
INC_STATS_COUNTER(domain_flush_single);
iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
}
/*
* This function is used to flush the IO/TLB for a given protection domain
* on every IOMMU in the system
*/
static void iommu_flush_domain(u16 domid)
{
unsigned long flags;
struct amd_iommu *iommu;
struct iommu_cmd cmd;
INC_STATS_COUNTER(domain_flush_all);
__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
domid, 1, 1);
list_for_each_entry(iommu, &amd_iommu_list, list) {
spin_lock_irqsave(&iommu->lock, flags);
__iommu_queue_command(iommu, &cmd);
__iommu_completion_wait(iommu);
__iommu_wait_for_completion(iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
}
}
/****************************************************************************
*
* The functions below are used the create the page table mappings for
......@@ -338,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
* supporting all features of AMD IOMMU page tables like level skipping
* and full 64 bit address spaces.
*/
static int iommu_map(struct protection_domain *dom,
unsigned long bus_addr,
unsigned long phys_addr,
int prot)
static int iommu_map_page(struct protection_domain *dom,
unsigned long bus_addr,
unsigned long phys_addr,
int prot)
{
u64 __pte, *pte, *page;
......@@ -388,6 +514,28 @@ static int iommu_map(struct protection_domain *dom,
return 0;
}
static void iommu_unmap_page(struct protection_domain *dom,
unsigned long bus_addr)
{
u64 *pte;
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte))
return;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte))
return;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
*pte = 0;
}
/*
* This function checks if a specific unity mapping entry is needed for
* this specific IOMMU.
......@@ -440,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
for (addr = e->address_start; addr < e->address_end;
addr += PAGE_SIZE) {
ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
if (ret)
return ret;
/*
......@@ -571,6 +719,16 @@ static u16 domain_id_alloc(void)
return id;
}
static void domain_id_free(int id)
{
unsigned long flags;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
if (id > 0 && id < MAX_DOMAIN_ID)
__clear_bit(id, amd_iommu_pd_alloc_bitmap);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
/*
* Used to reserve address ranges in the aperture (e.g. for exclusion
* ranges.
......@@ -587,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
iommu_area_reserve(dom->bitmap, start_page, pages);
}
static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
static void free_pagetable(struct protection_domain *domain)
{
int i, j;
u64 *p1, *p2, *p3;
p1 = dma_dom->domain.pt_root;
p1 = domain->pt_root;
if (!p1)
return;
......@@ -613,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
}
free_page((unsigned long)p1);
domain->pt_root = NULL;
}
/*
......@@ -624,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
if (!dom)
return;
dma_ops_free_pagetable(dom);
free_pagetable(&dom->domain);
kfree(dom->pte_pages);
......@@ -663,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
goto free_dma_dom;
dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
dma_dom->domain.flags = PD_DMA_OPS_MASK;
dma_dom->domain.priv = dma_dom;
if (!dma_dom->domain.pt_root)
goto free_dma_dom;
......@@ -724,6 +885,15 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
return NULL;
}
/*
* little helper function to check whether a given protection domain is a
* dma_ops domain
*/
static bool dma_ops_domain(struct protection_domain *domain)
{
return domain->flags & PD_DMA_OPS_MASK;
}
/*
* Find out the protection domain structure for a given PCI device. This
* will give us the pointer to the page table root for example.
......@@ -744,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid)
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
static void set_device_domain(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
static void attach_device(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
{
unsigned long flags;
u64 pte_root = virt_to_phys(domain->pt_root);
domain->dev_cnt += 1;
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
......@@ -767,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu,
iommu_queue_inv_dev_entry(iommu, devid);
}
/*
* Removes a device from a protection domain (unlocked)
*/
static void __detach_device(struct protection_domain *domain, u16 devid)
{
/* lock domain */
spin_lock(&domain->lock);
/* remove domain from the lookup table */
amd_iommu_pd_table[devid] = NULL;
/* remove entry from the device table seen by the hardware */
amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
amd_iommu_dev_table[devid].data[1] = 0;
amd_iommu_dev_table[devid].data[2] = 0;
/* decrease reference counter */
domain->dev_cnt -= 1;
/* ready */
spin_unlock(&domain->lock);
}
/*
* Removes a device from a protection domain (with devtable_lock held)
*/
static void detach_device(struct protection_domain *domain, u16 devid)
{
unsigned long flags;
/* lock device table */
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
__detach_device(domain, devid);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
static int device_change_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
struct pci_dev *pdev = to_pci_dev(dev);
u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
struct protection_domain *domain;
struct dma_ops_domain *dma_domain;
struct amd_iommu *iommu;
int order = amd_iommu_aperture_order;
unsigned long flags;
if (devid > amd_iommu_last_bdf)
goto out;
devid = amd_iommu_alias_table[devid];
iommu = amd_iommu_rlookup_table[devid];
if (iommu == NULL)
goto out;
domain = domain_for_device(devid);
if (domain && !dma_ops_domain(domain))
WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
"to a non-dma-ops domain\n", dev_name(dev));
switch (action) {
case BUS_NOTIFY_BOUND_DRIVER:
if (domain)
goto out;
dma_domain = find_protection_domain(devid);
if (!dma_domain)
dma_domain = iommu->default_dom;
attach_device(iommu, &dma_domain->domain, devid);
printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
"device %s\n", dma_domain->domain.id, dev_name(dev));
break;
case BUS_NOTIFY_UNBIND_DRIVER:
if (!domain)
goto out;
detach_device(domain, devid);
break;
case BUS_NOTIFY_ADD_DEVICE:
/* allocate a protection domain if a device is added */
dma_domain = find_protection_domain(devid);
if (dma_domain)
goto out;
dma_domain = dma_ops_domain_alloc(iommu, order);
if (!dma_domain)
goto out;
dma_domain->target_dev = devid;
spin_lock_irqsave(&iommu_pd_list_lock, flags);
list_add_tail(&dma_domain->list, &iommu_pd_list);
spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
break;
default:
goto out;
}
iommu_queue_inv_dev_entry(iommu, devid);
iommu_completion_wait(iommu);
out:
return 0;
}
struct notifier_block device_nb = {
.notifier_call = device_change_notifier,
};
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
......@@ -802,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
list_for_each_entry(entry, &iommu_pd_list, list) {
if (entry->target_dev == devid) {
ret = entry;
list_del(&ret->list);
break;
}
}
......@@ -853,14 +1133,13 @@ static int get_device_resources(struct device *dev,
if (!dma_dom)
dma_dom = (*iommu)->default_dom;
*domain = &dma_dom->domain;
set_device_domain(*iommu, *domain, *bdf);
attach_device(*iommu, *domain, *bdf);
printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
"device ", (*domain)->id);
print_devid(_bdf, 1);
"device %s\n", (*domain)->id, dev_name(dev));
}
if (domain_for_device(_bdf) == NULL)
set_device_domain(*iommu, *domain, _bdf);
attach_device(*iommu, *domain, _bdf);
return 1;
}
......@@ -946,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev,
pages = iommu_num_pages(paddr, size, PAGE_SIZE);
paddr &= PAGE_MASK;
INC_STATS_COUNTER(total_map_requests);
if (pages > 1)
INC_STATS_COUNTER(cross_page);
if (align)
align_mask = (1UL << get_order(size)) - 1;
......@@ -962,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev,
}
address += offset;
ADD_STATS_COUNTER(alloced_io_mem, size);
if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
iommu_flush_tlb(iommu, dma_dom->domain.id);
dma_dom->need_flush = false;
......@@ -998,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu,
start += PAGE_SIZE;
}
SUB_STATS_COUNTER(alloced_io_mem, size);
dma_ops_free_addresses(dma_dom, dma_addr, pages);
if (amd_iommu_unmap_flush || dma_dom->need_flush) {
......@@ -1019,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
dma_addr_t addr;
u64 dma_mask;
INC_STATS_COUNTER(cnt_map_single);
if (!check_device(dev))
return bad_dma_address;
......@@ -1030,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
/* device not handled by any AMD IOMMU */
return (dma_addr_t)paddr;
if (!dma_ops_domain(domain))
return bad_dma_address;
spin_lock_irqsave(&domain->lock, flags);
addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
dma_mask);
......@@ -1055,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
struct protection_domain *domain;
u16 devid;
INC_STATS_COUNTER(cnt_unmap_single);
if (!check_device(dev) ||
!get_device_resources(dev, &iommu, &domain, &devid))
/* device not handled by any AMD IOMMU */
return;
if (!dma_ops_domain(domain))
return;
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
......@@ -1104,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
int mapped_elems = 0;
u64 dma_mask;
INC_STATS_COUNTER(cnt_map_sg);
if (!check_device(dev))
return 0;
......@@ -1114,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
if (!iommu || !domain)
return map_sg_no_iommu(dev, sglist, nelems, dir);
if (!dma_ops_domain(domain))
return 0;
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
......@@ -1163,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
u16 devid;
int i;
INC_STATS_COUNTER(cnt_unmap_sg);
if (!check_device(dev) ||
!get_device_resources(dev, &iommu, &domain, &devid))
return;
if (!dma_ops_domain(domain))
return;
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
......@@ -1194,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
phys_addr_t paddr;
u64 dma_mask = dev->coherent_dma_mask;
INC_STATS_COUNTER(cnt_alloc_coherent);
if (!check_device(dev))
return NULL;
......@@ -1212,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
return virt_addr;
}
if (!dma_ops_domain(domain))
goto out_free;
if (!dma_mask)
dma_mask = *dev->dma_mask;
......@@ -1220,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
size, DMA_BIDIRECTIONAL, true, dma_mask);
if (*dma_addr == bad_dma_address) {
free_pages((unsigned long)virt_addr, get_order(size));
virt_addr = NULL;
goto out;
}
if (*dma_addr == bad_dma_address)
goto out_free;
iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
return virt_addr;
out_free:
free_pages((unsigned long)virt_addr, get_order(size));
return NULL;
}
/*
......@@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size,
struct protection_domain *domain;
u16 devid;
INC_STATS_COUNTER(cnt_free_coherent);
if (!check_device(dev))
return;
......@@ -1253,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size,
if (!iommu || !domain)
goto free_mem;
if (!dma_ops_domain(domain))
goto free_mem;
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
......@@ -1305,7 +1625,7 @@ static void prealloc_protection_domains(void)
u16 devid;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
devid = (dev->bus->number << 8) | dev->devfn;
devid = calc_devid(dev->bus->number, dev->devfn);
if (devid > amd_iommu_last_bdf)
continue;
devid = amd_iommu_alias_table[devid];
......@@ -1352,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void)
iommu->default_dom = dma_ops_domain_alloc(iommu, order);
if (iommu->default_dom == NULL)
return -ENOMEM;
iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
ret = iommu_init_unity_mappings(iommu);
if (ret)
goto free_domains;
......@@ -1375,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void)
/* Make the driver finally visible to the drivers */
dma_ops = &amd_iommu_dma_ops;
register_iommu(&amd_iommu_ops);
bus_register_notifier(&pci_bus_type, &device_nb);
amd_iommu_stats_init();
return 0;
free_domains:
......@@ -1386,3 +1713,224 @@ int __init amd_iommu_init_dma_ops(void)
return ret;
}
/*****************************************************************************
*
* The following functions belong to the exported interface of AMD IOMMU
*
* This interface allows access to lower level functions of the IOMMU
* like protection domain handling and assignement of devices to domains
* which is not possible with the dma_ops interface.
*
*****************************************************************************/
static void cleanup_domain(struct protection_domain *domain)
{
unsigned long flags;
u16 devid;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
if (amd_iommu_pd_table[devid] == domain)
__detach_device(domain, devid);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return -ENOMEM;
spin_lock_init(&domain->lock);
domain->mode = PAGE_MODE_3_LEVEL;
domain->id = domain_id_alloc();
if (!domain->id)
goto out_free;
domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
if (!domain->pt_root)
goto out_free;
dom->priv = domain;
return 0;
out_free:
kfree(domain);
return -ENOMEM;
}
static void amd_iommu_domain_destroy(struct iommu_domain *dom)
{
struct protection_domain *domain = dom->priv;
if (!domain)
return;
if (domain->dev_cnt > 0)
cleanup_domain(domain);
BUG_ON(domain->dev_cnt != 0);
free_pagetable(domain);
domain_id_free(domain->id);
kfree(domain);
dom->priv = NULL;
}
static void amd_iommu_detach_device(struct iommu_domain *dom,
struct device *dev)
{
struct protection_domain *domain = dom->priv;
struct amd_iommu *iommu;
struct pci_dev *pdev;
u16 devid;
if (dev->bus != &pci_bus_type)
return;
pdev = to_pci_dev(dev);
devid = calc_devid(pdev->bus->number, pdev->devfn);
if (devid > 0)
detach_device(domain, devid);
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
return;
iommu_queue_inv_dev_entry(iommu, devid);
iommu_completion_wait(iommu);
}
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev)
{
struct protection_domain *domain = dom->priv;
struct protection_domain *old_domain;
struct amd_iommu *iommu;
struct pci_dev *pdev;
u16 devid;
if (dev->bus != &pci_bus_type)
return -EINVAL;
pdev = to_pci_dev(dev);
devid = calc_devid(pdev->bus->number, pdev->devfn);
if (devid >= amd_iommu_last_bdf ||
devid != amd_iommu_alias_table[devid])
return -EINVAL;
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
return -EINVAL;
old_domain = domain_for_device(devid);
if (old_domain)
return -EBUSY;
attach_device(iommu, domain, devid);
iommu_completion_wait(iommu);
return 0;
}
static int amd_iommu_map_range(struct iommu_domain *dom,
unsigned long iova, phys_addr_t paddr,
size_t size, int iommu_prot)
{
struct protection_domain *domain = dom->priv;
unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
int prot = 0;
int ret;
if (iommu_prot & IOMMU_READ)
prot |= IOMMU_PROT_IR;
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
iova &= PAGE_MASK;
paddr &= PAGE_MASK;
for (i = 0; i < npages; ++i) {
ret = iommu_map_page(domain, iova, paddr, prot);
if (ret)
return ret;
iova += PAGE_SIZE;
paddr += PAGE_SIZE;
}
return 0;
}
static void amd_iommu_unmap_range(struct iommu_domain *dom,
unsigned long iova, size_t size)
{
struct protection_domain *domain = dom->priv;
unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
iova &= PAGE_MASK;
for (i = 0; i < npages; ++i) {
iommu_unmap_page(domain, iova);
iova += PAGE_SIZE;
}
iommu_flush_domain(domain->id);
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
unsigned long iova)
{
struct protection_domain *domain = dom->priv;
unsigned long offset = iova & ~PAGE_MASK;
phys_addr_t paddr;
u64 *pte;
pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
paddr = *pte & IOMMU_PAGE_MASK;
paddr |= offset;
return paddr;
}
static struct iommu_ops amd_iommu_ops = {
.domain_init = amd_iommu_domain_init,
.domain_destroy = amd_iommu_domain_destroy,
.attach_dev = amd_iommu_attach_device,
.detach_dev = amd_iommu_detach_device,
.map = amd_iommu_map_range,
.unmap = amd_iommu_unmap_range,
.iova_to_phys = amd_iommu_iova_to_phys,
};
......@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */
bool amd_iommu_isolate = true; /* if true, device isolation is
enabled */
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
......@@ -245,12 +246,8 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
/* Function to enable the hardware */
static void __init iommu_enable(struct amd_iommu *iommu)
{
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
"at %02x:%02x.%x cap 0x%hx\n",
iommu->dev->bus->number,
PCI_SLOT(iommu->dev->devfn),
PCI_FUNC(iommu->dev->devfn),
iommu->cap_ptr);
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
dev_name(&iommu->dev->dev), iommu->cap_ptr);
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
}
......@@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
if (strncmp(str, "isolate", 7) == 0)
amd_iommu_isolate = 1;
amd_iommu_isolate = true;
if (strncmp(str, "share", 5) == 0)
amd_iommu_isolate = 0;
amd_iommu_isolate = false;
if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
}
......
......@@ -7,8 +7,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
ifeq ($(CONFIG_KVM_TRACE),y)
common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
endif
ifeq ($(CONFIG_DMAR),y)
common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
ifeq ($(CONFIG_IOMMU_API),y)
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
endif
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
......
......@@ -34,6 +34,7 @@
#include <linux/module.h>
#include <linux/mman.h>
#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <asm/uaccess.h>
......@@ -989,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext)
r = !tdp_enabled;
break;
case KVM_CAP_IOMMU:
r = intel_iommu_found();
r = iommu_found();
break;
default:
r = 0;
......
......@@ -11,6 +11,7 @@ obj-$(CONFIG_FW_LOADER) += firmware_class.o
obj-$(CONFIG_NUMA) += node.o
obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
obj-$(CONFIG_SMP) += topology.o
obj-$(CONFIG_IOMMU_API) += iommu.o
ifeq ($(CONFIG_SYSFS),y)
obj-$(CONFIG_MODULES) += module.o
endif
......
/*
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/iommu.h>
static struct iommu_ops *iommu_ops;
void register_iommu(struct iommu_ops *ops)
{
if (iommu_ops)
BUG();
iommu_ops = ops;
}
bool iommu_found()
{
return iommu_ops != NULL;
}
EXPORT_SYMBOL_GPL(iommu_found);
struct iommu_domain *iommu_domain_alloc(void)
{
struct iommu_domain *domain;
int ret;
domain = kmalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
ret = iommu_ops->domain_init(domain);
if (ret)
goto out_free;
return domain;
out_free:
kfree(domain);
return NULL;
}
EXPORT_SYMBOL_GPL(iommu_domain_alloc);
void iommu_domain_free(struct iommu_domain *domain)
{
iommu_ops->domain_destroy(domain);
kfree(domain);
}
EXPORT_SYMBOL_GPL(iommu_domain_free);
int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
{
return iommu_ops->attach_dev(domain, dev);
}
EXPORT_SYMBOL_GPL(iommu_attach_device);
void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
{
iommu_ops->detach_dev(domain, dev);
}
EXPORT_SYMBOL_GPL(iommu_detach_device);
int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
return iommu_ops->map(domain, iova, paddr, size, prot);
}
EXPORT_SYMBOL_GPL(iommu_map_range);
void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
size_t size)
{
iommu_ops->unmap(domain, iova, size);
}
EXPORT_SYMBOL_GPL(iommu_unmap_range);
phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
unsigned long iova)
{
return iommu_ops->iova_to_phys(domain, iova);
}
EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
......@@ -191,26 +191,17 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
{
struct acpi_dmar_hardware_unit *drhd;
static int include_all;
int ret = 0;
drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
if (!dmaru->include_all)
ret = dmar_parse_dev_scope((void *)(drhd + 1),
if (dmaru->include_all)
return 0;
ret = dmar_parse_dev_scope((void *)(drhd + 1),
((void *)drhd) + drhd->header.length,
&dmaru->devices_cnt, &dmaru->devices,
drhd->segment);
else {
/* Only allow one INCLUDE_ALL */
if (include_all) {
printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
"device scope is allowed\n");
ret = -EINVAL;
}
include_all = 1;
}
if (ret) {
list_del(&dmaru->list);
kfree(dmaru);
......@@ -384,12 +375,21 @@ int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
struct dmar_drhd_unit *
dmar_find_matched_drhd_unit(struct pci_dev *dev)
{
struct dmar_drhd_unit *drhd = NULL;
struct dmar_drhd_unit *dmaru = NULL;
struct acpi_dmar_hardware_unit *drhd;
list_for_each_entry(drhd, &dmar_drhd_units, list) {
if (drhd->include_all || dmar_pci_device_match(drhd->devices,
drhd->devices_cnt, dev))
return drhd;
list_for_each_entry(dmaru, &dmar_drhd_units, list) {
drhd = container_of(dmaru->hdr,
struct acpi_dmar_hardware_unit,
header);
if (dmaru->include_all &&
drhd->segment == pci_domain_nr(dev->bus))
return dmaru;
if (dmar_pci_device_match(dmaru->devices,
dmaru->devices_cnt, dev))
return dmaru;
}
return NULL;
......@@ -491,6 +491,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
int map_size;
u32 ver;
static int iommu_allocated = 0;
int agaw;
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
......@@ -506,6 +507,15 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
agaw = iommu_calculate_agaw(iommu);
if (agaw < 0) {
printk(KERN_ERR
"Cannot get a valid agaw for iommu (seq_id = %d)\n",
iommu->seq_id);
goto error;
}
iommu->agaw = agaw;
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
......
......@@ -27,7 +27,6 @@
#include <linux/slab.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/sysdev.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/dmar.h>
......@@ -35,6 +34,7 @@
#include <linux/mempool.h>
#include <linux/timer.h>
#include <linux/iova.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
......@@ -54,6 +54,195 @@
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
/* global iommu list, set NULL for ignored DMAR units */
static struct intel_iommu **g_iommus;
/*
* 0: Present
* 1-11: Reserved
* 12-63: Context Ptr (12 - (haw-1))
* 64-127: Reserved
*/
struct root_entry {
u64 val;
u64 rsvd1;
};
#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
static inline bool root_present(struct root_entry *root)
{
return (root->val & 1);
}
static inline void set_root_present(struct root_entry *root)
{
root->val |= 1;
}
static inline void set_root_value(struct root_entry *root, unsigned long value)
{
root->val |= value & VTD_PAGE_MASK;
}
static inline struct context_entry *
get_context_addr_from_root(struct root_entry *root)
{
return (struct context_entry *)
(root_present(root)?phys_to_virt(
root->val & VTD_PAGE_MASK) :
NULL);
}
/*
* low 64 bits:
* 0: present
* 1: fault processing disable
* 2-3: translation type
* 12-63: address space root
* high 64 bits:
* 0-2: address width
* 3-6: aval
* 8-23: domain id
*/
struct context_entry {
u64 lo;
u64 hi;
};
static inline bool context_present(struct context_entry *context)
{
return (context->lo & 1);
}
static inline void context_set_present(struct context_entry *context)
{
context->lo |= 1;
}
static inline void context_set_fault_enable(struct context_entry *context)
{
context->lo &= (((u64)-1) << 2) | 1;
}
#define CONTEXT_TT_MULTI_LEVEL 0
static inline void context_set_translation_type(struct context_entry *context,
unsigned long value)
{
context->lo &= (((u64)-1) << 4) | 3;
context->lo |= (value & 3) << 2;
}
static inline void context_set_address_root(struct context_entry *context,
unsigned long value)
{
context->lo |= value & VTD_PAGE_MASK;
}
static inline void context_set_address_width(struct context_entry *context,
unsigned long value)
{
context->hi |= value & 7;
}
static inline void context_set_domain_id(struct context_entry *context,
unsigned long value)
{
context->hi |= (value & ((1 << 16) - 1)) << 8;
}
static inline void context_clear_entry(struct context_entry *context)
{
context->lo = 0;
context->hi = 0;
}
/*
* 0: readable
* 1: writable
* 2-6: reserved
* 7: super page
* 8-11: available
* 12-63: Host physcial address
*/
struct dma_pte {
u64 val;
};
static inline void dma_clear_pte(struct dma_pte *pte)
{
pte->val = 0;
}
static inline void dma_set_pte_readable(struct dma_pte *pte)
{
pte->val |= DMA_PTE_READ;
}
static inline void dma_set_pte_writable(struct dma_pte *pte)
{
pte->val |= DMA_PTE_WRITE;
}
static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
{
pte->val = (pte->val & ~3) | (prot & 3);
}
static inline u64 dma_pte_addr(struct dma_pte *pte)
{
return (pte->val & VTD_PAGE_MASK);
}
static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
{
pte->val |= (addr & VTD_PAGE_MASK);
}
static inline bool dma_pte_present(struct dma_pte *pte)
{
return (pte->val & 3) != 0;
}
/* devices under the same p2p bridge are owned in one domain */
#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
/* domain represents a virtual machine, more than one devices
* across iommus may be owned in one domain, e.g. kvm guest.
*/
#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
struct dmar_domain {
int id; /* domain id */
unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
struct list_head devices; /* all devices' list */
struct iova_domain iovad; /* iova's that belong to this domain */
struct dma_pte *pgd; /* virtual address */
spinlock_t mapping_lock; /* page table lock */
int gaw; /* max guest address width */
/* adjusted guest address width, 0 is level 2 30-bit */
int agaw;
int flags; /* flags to find out type of domain */
int iommu_coherency;/* indicate coherency of iommu access */
int iommu_count; /* reference count of iommu */
spinlock_t iommu_lock; /* protect iommu set in domain */
u64 max_addr; /* maximum mapped address */
};
/* PCI domain-device relationship */
struct device_domain_info {
struct list_head link; /* link to domain siblings */
struct list_head global; /* link to global list */
u8 bus; /* PCI bus numer */
u8 devfn; /* PCI devfn number */
struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
struct dmar_domain *domain; /* pointer to domain */
};
static void flush_unmaps_timeout(unsigned long data);
......@@ -88,6 +277,8 @@ static int intel_iommu_strict;
static DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
static struct iommu_ops intel_iommu_ops;
static int __init intel_iommu_setup(char *str)
{
if (!str)
......@@ -184,6 +375,87 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova);
}
static inline int width_to_agaw(int width);
/* calculate agaw for each iommu.
* "SAGAW" may be different across iommus, use a default agaw, and
* get a supported less agaw for iommus that don't support the default agaw.
*/
int iommu_calculate_agaw(struct intel_iommu *iommu)
{
unsigned long sagaw;
int agaw = -1;
sagaw = cap_sagaw(iommu->cap);
for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
agaw >= 0; agaw--) {
if (test_bit(agaw, &sagaw))
break;
}
return agaw;
}
/* in native case, each domain is related to only one iommu */
static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
{
int iommu_id;
BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
return NULL;
return g_iommus[iommu_id];
}
/* "Coherency" capability may be different across iommus */
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
int i;
domain->iommu_coherency = 1;
i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
for (; i < g_num_of_iommus; ) {
if (!ecap_coherent(g_iommus[i]->ecap)) {
domain->iommu_coherency = 0;
break;
}
i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
}
}
static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
{
struct dmar_drhd_unit *drhd = NULL;
int i;
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
for (i = 0; i < drhd->devices_cnt; i++)
if (drhd->devices[i]->bus->number == bus &&
drhd->devices[i]->devfn == devfn)
return drhd->iommu;
if (drhd->include_all)
return drhd->iommu;
}
return NULL;
}
static void domain_flush_cache(struct dmar_domain *domain,
void *addr, int size)
{
if (!domain->iommu_coherency)
clflush_cache_range(addr, size);
}
/* Gets context entry for a given bus and devfn */
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
u8 bus, u8 devfn)
......@@ -226,7 +498,7 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
ret = 0;
goto out;
}
ret = context_present(context[devfn]);
ret = context_present(&context[devfn]);
out:
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
......@@ -242,7 +514,7 @@ static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
root = &iommu->root_entry[bus];
context = get_context_addr_from_root(root);
if (context) {
context_clear_entry(context[devfn]);
context_clear_entry(&context[devfn]);
__iommu_flush_cache(iommu, &context[devfn], \
sizeof(*context));
}
......@@ -339,7 +611,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
if (level == 1)
break;
if (!dma_pte_present(*pte)) {
if (!dma_pte_present(pte)) {
tmp_page = alloc_pgtable_page();
if (!tmp_page) {
......@@ -347,18 +619,17 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
flags);
return NULL;
}
__iommu_flush_cache(domain->iommu, tmp_page,
PAGE_SIZE);
dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
domain_flush_cache(domain, tmp_page, PAGE_SIZE);
dma_set_pte_addr(pte, virt_to_phys(tmp_page));
/*
* high level table always sets r/w, last level page
* table control read/write
*/
dma_set_pte_readable(*pte);
dma_set_pte_writable(*pte);
__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
dma_set_pte_readable(pte);
dma_set_pte_writable(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
}
parent = phys_to_virt(dma_pte_addr(*pte));
parent = phys_to_virt(dma_pte_addr(pte));
level--;
}
......@@ -381,9 +652,9 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
if (level == total)
return pte;
if (!dma_pte_present(*pte))
if (!dma_pte_present(pte))
break;
parent = phys_to_virt(dma_pte_addr(*pte));
parent = phys_to_virt(dma_pte_addr(pte));
total--;
}
return NULL;
......@@ -398,8 +669,8 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
pte = dma_addr_level_pte(domain, addr, 1);
if (pte) {
dma_clear_pte(*pte);
__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
}
}
......@@ -445,10 +716,9 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
pte = dma_addr_level_pte(domain, tmp, level);
if (pte) {
free_pgtable_page(
phys_to_virt(dma_pte_addr(*pte)));
dma_clear_pte(*pte);
__iommu_flush_cache(domain->iommu,
pte, sizeof(*pte));
phys_to_virt(dma_pte_addr(pte)));
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
}
tmp += level_size(level);
}
......@@ -950,17 +1220,28 @@ static int iommu_init_domains(struct intel_iommu *iommu)
static void domain_exit(struct dmar_domain *domain);
static void vm_domain_exit(struct dmar_domain *domain);
void free_dmar_iommu(struct intel_iommu *iommu)
{
struct dmar_domain *domain;
int i;
unsigned long flags;
i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
for (; i < cap_ndoms(iommu->cap); ) {
domain = iommu->domains[i];
clear_bit(i, iommu->domain_ids);
domain_exit(domain);
spin_lock_irqsave(&domain->iommu_lock, flags);
if (--domain->iommu_count == 0) {
if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
vm_domain_exit(domain);
else
domain_exit(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags);
i = find_next_bit(iommu->domain_ids,
cap_ndoms(iommu->cap), i+1);
}
......@@ -978,6 +1259,17 @@ void free_dmar_iommu(struct intel_iommu *iommu)
kfree(iommu->domains);
kfree(iommu->domain_ids);
g_iommus[iommu->seq_id] = NULL;
/* if all iommus are freed, free g_iommus */
for (i = 0; i < g_num_of_iommus; i++) {
if (g_iommus[i])
break;
}
if (i == g_num_of_iommus)
kfree(g_iommus);
/* free context mapping */
free_context_table(iommu);
}
......@@ -1006,7 +1298,9 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
set_bit(num, iommu->domain_ids);
domain->id = num;
domain->iommu = iommu;
memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
set_bit(iommu->seq_id, &domain->iommu_bmp);
domain->flags = 0;
iommu->domains[num] = domain;
spin_unlock_irqrestore(&iommu->lock, flags);
......@@ -1016,10 +1310,13 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
static void iommu_free_domain(struct dmar_domain *domain)
{
unsigned long flags;
struct intel_iommu *iommu;
iommu = domain_get_iommu(domain);
spin_lock_irqsave(&domain->iommu->lock, flags);
clear_bit(domain->id, domain->iommu->domain_ids);
spin_unlock_irqrestore(&domain->iommu->lock, flags);
spin_lock_irqsave(&iommu->lock, flags);
clear_bit(domain->id, iommu->domain_ids);
spin_unlock_irqrestore(&iommu->lock, flags);
}
static struct iova_domain reserved_iova_list;
......@@ -1094,11 +1391,12 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
spin_lock_init(&domain->mapping_lock);
spin_lock_init(&domain->iommu_lock);
domain_reserve_special_ranges(domain);
/* calculate AGAW */
iommu = domain->iommu;
iommu = domain_get_iommu(domain);
if (guest_width > cap_mgaw(iommu->cap))
guest_width = cap_mgaw(iommu->cap);
domain->gaw = guest_width;
......@@ -1115,6 +1413,13 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
domain->agaw = agaw;
INIT_LIST_HEAD(&domain->devices);
if (ecap_coherent(iommu->ecap))
domain->iommu_coherency = 1;
else
domain->iommu_coherency = 0;
domain->iommu_count = 1;
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page();
if (!domain->pgd)
......@@ -1151,28 +1456,82 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
u8 bus, u8 devfn)
{
struct context_entry *context;
struct intel_iommu *iommu = domain->iommu;
unsigned long flags;
struct intel_iommu *iommu;
struct dma_pte *pgd;
unsigned long num;
unsigned long ndomains;
int id;
int agaw;
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
BUG_ON(!domain->pgd);
iommu = device_to_iommu(bus, devfn);
if (!iommu)
return -ENODEV;
context = device_to_context_entry(iommu, bus, devfn);
if (!context)
return -ENOMEM;
spin_lock_irqsave(&iommu->lock, flags);
if (context_present(*context)) {
if (context_present(context)) {
spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
context_set_domain_id(*context, domain->id);
context_set_address_width(*context, domain->agaw);
context_set_address_root(*context, virt_to_phys(domain->pgd));
context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
context_set_fault_enable(*context);
context_set_present(*context);
__iommu_flush_cache(iommu, context, sizeof(*context));
id = domain->id;
pgd = domain->pgd;
if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
int found = 0;
/* find an available domain id for this device in iommu */
ndomains = cap_ndoms(iommu->cap);
num = find_first_bit(iommu->domain_ids, ndomains);
for (; num < ndomains; ) {
if (iommu->domains[num] == domain) {
id = num;
found = 1;
break;
}
num = find_next_bit(iommu->domain_ids,
cap_ndoms(iommu->cap), num+1);
}
if (found == 0) {
num = find_first_zero_bit(iommu->domain_ids, ndomains);
if (num >= ndomains) {
spin_unlock_irqrestore(&iommu->lock, flags);
printk(KERN_ERR "IOMMU: no free domain ids\n");
return -EFAULT;
}
set_bit(num, iommu->domain_ids);
iommu->domains[num] = domain;
id = num;
}
/* Skip top levels of page tables for
* iommu which has less agaw than default.
*/
for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
pgd = phys_to_virt(dma_pte_addr(pgd));
if (!dma_pte_present(pgd)) {
spin_unlock_irqrestore(&iommu->lock, flags);
return -ENOMEM;
}
}
}
context_set_domain_id(context, id);
context_set_address_width(context, iommu->agaw);
context_set_address_root(context, virt_to_phys(pgd));
context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
context_set_fault_enable(context);
context_set_present(context);
domain_flush_cache(domain, context, sizeof(*context));
/* it's a non-present to present mapping */
if (iommu->flush.flush_context(iommu, domain->id,
......@@ -1183,6 +1542,13 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
spin_unlock_irqrestore(&iommu->lock, flags);
spin_lock_irqsave(&domain->iommu_lock, flags);
if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
domain->iommu_count++;
domain_update_iommu_coherency(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags);
return 0;
}
......@@ -1218,13 +1584,17 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
tmp->bus->number, tmp->devfn);
}
static int domain_context_mapped(struct dmar_domain *domain,
struct pci_dev *pdev)
static int domain_context_mapped(struct pci_dev *pdev)
{
int ret;
struct pci_dev *tmp, *parent;
struct intel_iommu *iommu;
iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
if (!iommu)
return -ENODEV;
ret = device_context_mapped(domain->iommu,
ret = device_context_mapped(iommu,
pdev->bus->number, pdev->devfn);
if (!ret)
return ret;
......@@ -1235,17 +1605,17 @@ static int domain_context_mapped(struct dmar_domain *domain,
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
ret = device_context_mapped(domain->iommu, parent->bus->number,
ret = device_context_mapped(iommu, parent->bus->number,
parent->devfn);
if (!ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie)
return device_context_mapped(domain->iommu,
return device_context_mapped(iommu,
tmp->subordinate->number, 0);
else
return device_context_mapped(domain->iommu,
return device_context_mapped(iommu,
tmp->bus->number, tmp->devfn);
}
......@@ -1273,22 +1643,25 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
/* We don't need lock here, nobody else
* touches the iova range
*/
BUG_ON(dma_pte_addr(*pte));
dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(*pte, prot);
__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
BUG_ON(dma_pte_addr(pte));
dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(pte, prot);
domain_flush_cache(domain, pte, sizeof(*pte));
start_pfn++;
index++;
}
return 0;
}
static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
{
clear_context_table(domain->iommu, bus, devfn);
domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
if (!iommu)
return;
clear_context_table(iommu, bus, devfn);
iommu->flush.flush_context(iommu, 0, 0, 0,
DMA_CCMD_GLOBAL_INVL, 0);
domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH, 0);
}
......@@ -1296,6 +1669,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
{
struct device_domain_info *info;
unsigned long flags;
struct intel_iommu *iommu;
spin_lock_irqsave(&device_domain_lock, flags);
while (!list_empty(&domain->devices)) {
......@@ -1307,7 +1681,8 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
info->dev->dev.archdata.iommu = NULL;
spin_unlock_irqrestore(&device_domain_lock, flags);
detach_domain_for_dev(info->domain, info->bus, info->devfn);
iommu = device_to_iommu(info->bus, info->devfn);
iommu_detach_dev(iommu, info->bus, info->devfn);
free_devinfo_mem(info);
spin_lock_irqsave(&device_domain_lock, flags);
......@@ -1400,7 +1775,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
info->dev = NULL;
info->domain = domain;
/* This domain is shared by devices under p2p bridge */
domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
/* pcie-to-pci bridge already has a domain, uses it */
found = NULL;
......@@ -1563,6 +1938,11 @@ static void __init iommu_prepare_gfx_mapping(void)
printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
}
}
#else /* !CONFIG_DMAR_GFX_WA */
static inline void iommu_prepare_gfx_mapping(void)
{
return;
}
#endif
#ifdef CONFIG_DMAR_FLOPPY_WA
......@@ -1590,7 +1970,7 @@ static inline void iommu_prepare_isa(void)
}
#endif /* !CONFIG_DMAR_FLPY_WA */
int __init init_dmars(void)
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
struct dmar_rmrr_unit *rmrr;
......@@ -1613,9 +1993,18 @@ int __init init_dmars(void)
*/
}
g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
GFP_KERNEL);
if (!g_iommus) {
printk(KERN_ERR "Allocating global iommu array failed\n");
ret = -ENOMEM;
goto error;
}
deferred_flush = kzalloc(g_num_of_iommus *
sizeof(struct deferred_flush_tables), GFP_KERNEL);
if (!deferred_flush) {
kfree(g_iommus);
ret = -ENOMEM;
goto error;
}
......@@ -1625,6 +2014,7 @@ int __init init_dmars(void)
continue;
iommu = drhd->iommu;
g_iommus[iommu->seq_id] = iommu;
ret = iommu_init_domains(iommu);
if (ret)
......@@ -1737,6 +2127,7 @@ int __init init_dmars(void)
iommu = drhd->iommu;
free_iommu(iommu);
}
kfree(g_iommus);
return ret;
}
......@@ -1805,7 +2196,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
}
/* make sure context mapping is ok */
if (unlikely(!domain_context_mapped(domain, pdev))) {
if (unlikely(!domain_context_mapped(pdev))) {
ret = domain_context_mapping(domain, pdev);
if (ret) {
printk(KERN_ERR
......@@ -1827,6 +2218,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
struct iova *iova;
int prot = 0;
int ret;
struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
......@@ -1836,6 +2228,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
if (!domain)
return 0;
iommu = domain_get_iommu(domain);
size = aligned_size((u64)paddr, size);
iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
......@@ -1849,7 +2242,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
!cap_zlr(domain->iommu->cap))
!cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
......@@ -1865,10 +2258,10 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
goto error;
/* it's a non-present to present mapping */
ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
ret = iommu_flush_iotlb_psi(iommu, domain->id,
start_paddr, size >> VTD_PAGE_SHIFT, 1);
if (ret)
iommu_flush_write_buffer(domain->iommu);
iommu_flush_write_buffer(iommu);
return start_paddr + ((u64)paddr & (~PAGE_MASK));
......@@ -1895,10 +2288,11 @@ static void flush_unmaps(void)
/* just flush them all */
for (i = 0; i < g_num_of_iommus; i++) {
if (deferred_flush[i].next) {
struct intel_iommu *iommu =
deferred_flush[i].domain[0]->iommu;
struct intel_iommu *iommu = g_iommus[i];
if (!iommu)
continue;
if (deferred_flush[i].next) {
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH, 0);
for (j = 0; j < deferred_flush[i].next; j++) {
......@@ -1925,12 +2319,14 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
{
unsigned long flags;
int next, iommu_id;
struct intel_iommu *iommu;
spin_lock_irqsave(&async_umap_flush_lock, flags);
if (list_size == HIGH_WATER_MARK)
flush_unmaps();
iommu_id = dom->iommu->seq_id;
iommu = domain_get_iommu(dom);
iommu_id = iommu->seq_id;
next = deferred_flush[iommu_id].next;
deferred_flush[iommu_id].domain[next] = dom;
......@@ -1952,12 +2348,15 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
struct dmar_domain *domain;
unsigned long start_addr;
struct iova *iova;
struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
BUG_ON(!domain);
iommu = domain_get_iommu(domain);
iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
if (!iova)
return;
......@@ -1973,9 +2372,9 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
if (intel_iommu_strict) {
if (iommu_flush_iotlb_psi(domain->iommu,
if (iommu_flush_iotlb_psi(iommu,
domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
iommu_flush_write_buffer(domain->iommu);
iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
} else {
......@@ -2036,11 +2435,15 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
size_t size = 0;
void *addr;
struct scatterlist *sg;
struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
BUG_ON(!domain);
iommu = domain_get_iommu(domain);
iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
if (!iova)
......@@ -2057,9 +2460,9 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
size >> VTD_PAGE_SHIFT, 0))
iommu_flush_write_buffer(domain->iommu);
iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
......@@ -2093,6 +2496,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
int ret;
struct scatterlist *sg;
unsigned long start_addr;
struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
......@@ -2102,6 +2506,8 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
if (!domain)
return 0;
iommu = domain_get_iommu(domain);
for_each_sg(sglist, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg);
addr = (void *)virt_to_phys(addr);
......@@ -2119,7 +2525,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
!cap_zlr(domain->iommu->cap))
!cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
......@@ -2151,9 +2557,9 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
}
/* it's a non-present to present mapping */
if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
if (iommu_flush_iotlb_psi(iommu, domain->id,
start_addr, offset >> VTD_PAGE_SHIFT, 1))
iommu_flush_write_buffer(domain->iommu);
iommu_flush_write_buffer(iommu);
return nelems;
}
......@@ -2325,10 +2731,220 @@ int __init intel_iommu_init(void)
init_timer(&unmap_timer);
force_iommu = 1;
dma_ops = &intel_dma_ops;
register_iommu(&intel_iommu_ops);
return 0;
}
static int vm_domain_add_dev_info(struct dmar_domain *domain,
struct pci_dev *pdev)
{
struct device_domain_info *info;
unsigned long flags;
info = alloc_devinfo_mem();
if (!info)
return -ENOMEM;
info->bus = pdev->bus->number;
info->devfn = pdev->devfn;
info->dev = pdev;
info->domain = domain;
spin_lock_irqsave(&device_domain_lock, flags);
list_add(&info->link, &domain->devices);
list_add(&info->global, &device_domain_list);
pdev->dev.archdata.iommu = info;
spin_unlock_irqrestore(&device_domain_lock, flags);
return 0;
}
static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
struct pci_dev *pdev)
{
struct device_domain_info *info;
struct intel_iommu *iommu;
unsigned long flags;
int found = 0;
struct list_head *entry, *tmp;
iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
if (!iommu)
return;
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_safe(entry, tmp, &domain->devices) {
info = list_entry(entry, struct device_domain_info, link);
if (info->bus == pdev->bus->number &&
info->devfn == pdev->devfn) {
list_del(&info->link);
list_del(&info->global);
if (info->dev)
info->dev->dev.archdata.iommu = NULL;
spin_unlock_irqrestore(&device_domain_lock, flags);
iommu_detach_dev(iommu, info->bus, info->devfn);
free_devinfo_mem(info);
spin_lock_irqsave(&device_domain_lock, flags);
if (found)
break;
else
continue;
}
/* if there is no other devices under the same iommu
* owned by this domain, clear this iommu in iommu_bmp
* update iommu count and coherency
*/
if (device_to_iommu(info->bus, info->devfn) == iommu)
found = 1;
}
if (found == 0) {
unsigned long tmp_flags;
spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
clear_bit(iommu->seq_id, &domain->iommu_bmp);
domain->iommu_count--;
domain_update_iommu_coherency(domain);
spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
}
spin_unlock_irqrestore(&device_domain_lock, flags);
}
static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
{
struct device_domain_info *info;
struct intel_iommu *iommu;
unsigned long flags1, flags2;
spin_lock_irqsave(&device_domain_lock, flags1);
while (!list_empty(&domain->devices)) {
info = list_entry(domain->devices.next,
struct device_domain_info, link);
list_del(&info->link);
list_del(&info->global);
if (info->dev)
info->dev->dev.archdata.iommu = NULL;
spin_unlock_irqrestore(&device_domain_lock, flags1);
iommu = device_to_iommu(info->bus, info->devfn);
iommu_detach_dev(iommu, info->bus, info->devfn);
/* clear this iommu in iommu_bmp, update iommu count
* and coherency
*/
spin_lock_irqsave(&domain->iommu_lock, flags2);
if (test_and_clear_bit(iommu->seq_id,
&domain->iommu_bmp)) {
domain->iommu_count--;
domain_update_iommu_coherency(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags2);
free_devinfo_mem(info);
spin_lock_irqsave(&device_domain_lock, flags1);
}
spin_unlock_irqrestore(&device_domain_lock, flags1);
}
/* domain id for virtual machine, it won't be set in context */
static unsigned long vm_domid;
static int vm_domain_min_agaw(struct dmar_domain *domain)
{
int i;
int min_agaw = domain->agaw;
i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
for (; i < g_num_of_iommus; ) {
if (min_agaw > g_iommus[i]->agaw)
min_agaw = g_iommus[i]->agaw;
i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
}
return min_agaw;
}
static struct dmar_domain *iommu_alloc_vm_domain(void)
{
struct dmar_domain *domain;
domain = alloc_domain_mem();
if (!domain)
return NULL;
domain->id = vm_domid++;
memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
return domain;
}
static int vm_domain_init(struct dmar_domain *domain, int guest_width)
{
int adjust_width;
init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
spin_lock_init(&domain->mapping_lock);
spin_lock_init(&domain->iommu_lock);
domain_reserve_special_ranges(domain);
/* calculate AGAW */
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
domain->agaw = width_to_agaw(adjust_width);
INIT_LIST_HEAD(&domain->devices);
domain->iommu_count = 0;
domain->iommu_coherency = 0;
domain->max_addr = 0;
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page();
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
return 0;
}
void intel_iommu_domain_exit(struct dmar_domain *domain)
static void iommu_free_vm_domain(struct dmar_domain *domain)
{
unsigned long flags;
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
unsigned long i;
unsigned long ndomains;
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
iommu = drhd->iommu;
ndomains = cap_ndoms(iommu->cap);
i = find_first_bit(iommu->domain_ids, ndomains);
for (; i < ndomains; ) {
if (iommu->domains[i] == domain) {
spin_lock_irqsave(&iommu->lock, flags);
clear_bit(i, iommu->domain_ids);
iommu->domains[i] = NULL;
spin_unlock_irqrestore(&iommu->lock, flags);
break;
}
i = find_next_bit(iommu->domain_ids, ndomains, i+1);
}
}
}
static void vm_domain_exit(struct dmar_domain *domain)
{
u64 end;
......@@ -2336,6 +2952,9 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
if (!domain)
return;
vm_domain_remove_all_dev_info(domain);
/* destroy iovas */
put_iova_domain(&domain->iovad);
end = DOMAIN_MAX_ADDR(domain->gaw);
end = end & (~VTD_PAGE_MASK);
......@@ -2345,94 +2964,167 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
/* free page tables */
dma_pte_free_pagetable(domain, 0, end);
iommu_free_domain(domain);
iommu_free_vm_domain(domain);
free_domain_mem(domain);
}
EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
static int intel_iommu_domain_init(struct iommu_domain *domain)
{
struct dmar_drhd_unit *drhd;
struct dmar_domain *domain;
struct intel_iommu *iommu;
drhd = dmar_find_matched_drhd_unit(pdev);
if (!drhd) {
printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
return NULL;
}
struct dmar_domain *dmar_domain;
iommu = drhd->iommu;
if (!iommu) {
printk(KERN_ERR
"intel_iommu_domain_alloc: iommu == NULL\n");
return NULL;
}
domain = iommu_alloc_domain(iommu);
if (!domain) {
dmar_domain = iommu_alloc_vm_domain();
if (!dmar_domain) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain == NULL\n");
return NULL;
"intel_iommu_domain_init: dmar_domain == NULL\n");
return -ENOMEM;
}
if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain_init() failed\n");
intel_iommu_domain_exit(domain);
return NULL;
"intel_iommu_domain_init() failed\n");
vm_domain_exit(dmar_domain);
return -ENOMEM;
}
return domain;
domain->priv = dmar_domain;
return 0;
}
EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
int intel_iommu_context_mapping(
struct dmar_domain *domain, struct pci_dev *pdev)
static void intel_iommu_domain_destroy(struct iommu_domain *domain)
{
int rc;
rc = domain_context_mapping(domain, pdev);
return rc;
struct dmar_domain *dmar_domain = domain->priv;
domain->priv = NULL;
vm_domain_exit(dmar_domain);
}
EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
int intel_iommu_page_mapping(
struct dmar_domain *domain, dma_addr_t iova,
u64 hpa, size_t size, int prot)
static int intel_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
int rc;
rc = domain_page_mapping(domain, iova, hpa, size, prot);
return rc;
struct dmar_domain *dmar_domain = domain->priv;
struct pci_dev *pdev = to_pci_dev(dev);
struct intel_iommu *iommu;
int addr_width;
u64 end;
int ret;
/* normally pdev is not mapped */
if (unlikely(domain_context_mapped(pdev))) {
struct dmar_domain *old_domain;
old_domain = find_domain(pdev);
if (old_domain) {
if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
vm_domain_remove_one_dev_info(old_domain, pdev);
else
domain_remove_dev_info(old_domain);
}
}
iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
if (!iommu)
return -ENODEV;
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
end = DOMAIN_MAX_ADDR(addr_width);
end = end & VTD_PAGE_MASK;
if (end < dmar_domain->max_addr) {
printk(KERN_ERR "%s: iommu agaw (%d) is not "
"sufficient for the mapped address (%llx)\n",
__func__, iommu->agaw, dmar_domain->max_addr);
return -EFAULT;
}
ret = domain_context_mapping(dmar_domain, pdev);
if (ret)
return ret;
ret = vm_domain_add_dev_info(dmar_domain, pdev);
return ret;
}
EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
static void intel_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
detach_domain_for_dev(domain, bus, devfn);
struct dmar_domain *dmar_domain = domain->priv;
struct pci_dev *pdev = to_pci_dev(dev);
vm_domain_remove_one_dev_info(dmar_domain, pdev);
}
EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
struct dmar_domain *
intel_iommu_find_domain(struct pci_dev *pdev)
static int intel_iommu_map_range(struct iommu_domain *domain,
unsigned long iova, phys_addr_t hpa,
size_t size, int iommu_prot)
{
return find_domain(pdev);
struct dmar_domain *dmar_domain = domain->priv;
u64 max_addr;
int addr_width;
int prot = 0;
int ret;
if (iommu_prot & IOMMU_READ)
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
prot |= DMA_PTE_WRITE;
max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
if (dmar_domain->max_addr < max_addr) {
int min_agaw;
u64 end;
/* check if minimum agaw is sufficient for mapped address */
min_agaw = vm_domain_min_agaw(dmar_domain);
addr_width = agaw_to_width(min_agaw);
end = DOMAIN_MAX_ADDR(addr_width);
end = end & VTD_PAGE_MASK;
if (end < max_addr) {
printk(KERN_ERR "%s: iommu agaw (%d) is not "
"sufficient for the mapped address (%llx)\n",
__func__, min_agaw, max_addr);
return -EFAULT;
}
dmar_domain->max_addr = max_addr;
}
ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
return ret;
}
EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
int intel_iommu_found(void)
static void intel_iommu_unmap_range(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
return g_num_of_iommus;
struct dmar_domain *dmar_domain = domain->priv;
dma_addr_t base;
/* The address might not be aligned */
base = iova & VTD_PAGE_MASK;
size = VTD_PAGE_ALIGN(size);
dma_pte_clear_range(dmar_domain, base, base + size);
if (dmar_domain->max_addr == base + size)
dmar_domain->max_addr = base;
}
EXPORT_SYMBOL_GPL(intel_iommu_found);
u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
unsigned long iova)
{
struct dmar_domain *dmar_domain = domain->priv;
struct dma_pte *pte;
u64 pfn;
pfn = 0;
pte = addr_to_dma_pte(domain, iova);
u64 phys = 0;
pte = addr_to_dma_pte(dmar_domain, iova);
if (pte)
pfn = dma_pte_addr(*pte);
phys = dma_pte_addr(pte);
return pfn >> VTD_PAGE_SHIFT;
return phys;
}
EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
static struct iommu_ops intel_iommu_ops = {
.domain_init = intel_iommu_domain_init,
.domain_destroy = intel_iommu_domain_destroy,
.attach_dev = intel_iommu_attach_device,
.detach_dev = intel_iommu_detach_device,
.map = intel_iommu_map_range,
.unmap = intel_iommu_unmap_range,
.iova_to_phys = intel_iommu_iova_to_phys,
};
......@@ -9,148 +9,16 @@
#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
/*
* 0: Present
* 1-11: Reserved
* 12-63: Context Ptr (12 - (haw-1))
* 64-127: Reserved
*/
struct root_entry {
u64 val;
u64 rsvd1;
};
#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
static inline bool root_present(struct root_entry *root)
{
return (root->val & 1);
}
static inline void set_root_present(struct root_entry *root)
{
root->val |= 1;
}
static inline void set_root_value(struct root_entry *root, unsigned long value)
{
root->val |= value & VTD_PAGE_MASK;
}
struct context_entry;
static inline struct context_entry *
get_context_addr_from_root(struct root_entry *root)
{
return (struct context_entry *)
(root_present(root)?phys_to_virt(
root->val & VTD_PAGE_MASK) :
NULL);
}
/*
* low 64 bits:
* 0: present
* 1: fault processing disable
* 2-3: translation type
* 12-63: address space root
* high 64 bits:
* 0-2: address width
* 3-6: aval
* 8-23: domain id
*/
struct context_entry {
u64 lo;
u64 hi;
};
#define context_present(c) ((c).lo & 1)
#define context_fault_disable(c) (((c).lo >> 1) & 1)
#define context_translation_type(c) (((c).lo >> 2) & 3)
#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
#define context_address_width(c) ((c).hi & 7)
#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
#define context_set_present(c) do {(c).lo |= 1;} while (0)
#define context_set_fault_enable(c) \
do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
#define context_set_translation_type(c, val) \
do { \
(c).lo &= (((u64)-1) << 4) | 3; \
(c).lo |= ((val) & 3) << 2; \
} while (0)
#define CONTEXT_TT_MULTI_LEVEL 0
#define context_set_address_root(c, val) \
do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
#define context_set_domain_id(c, val) \
do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
/*
* 0: readable
* 1: writable
* 2-6: reserved
* 7: super page
* 8-11: available
* 12-63: Host physcial address
*/
struct dma_pte {
u64 val;
};
#define dma_clear_pte(p) do {(p).val = 0;} while (0)
#define DMA_PTE_READ (1)
#define DMA_PTE_WRITE (2)
#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
#define dma_set_pte_prot(p, prot) \
do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
#define dma_set_pte_addr(p, addr) do {\
(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
#define dma_pte_present(p) (((p).val & 3) != 0)
struct intel_iommu;
struct dmar_domain;
struct root_entry;
struct dmar_domain {
int id; /* domain id */
struct intel_iommu *iommu; /* back pointer to owning iommu */
struct list_head devices; /* all devices' list */
struct iova_domain iovad; /* iova's that belong to this domain */
struct dma_pte *pgd; /* virtual address */
spinlock_t mapping_lock; /* page table lock */
int gaw; /* max guest address width */
/* adjusted guest address width, 0 is level 2 30-bit */
int agaw;
#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
int flags;
};
/* PCI domain-device relationship */
struct device_domain_info {
struct list_head link; /* link to domain siblings */
struct list_head global; /* link to global list */
u8 bus; /* PCI bus numer */
u8 devfn; /* PCI devfn number */
struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
struct dmar_domain *domain; /* pointer to domain */
};
extern int init_dmars(void);
extern void free_dmar_iommu(struct intel_iommu *iommu);
extern int iommu_calculate_agaw(struct intel_iommu *iommu);
extern int dmar_disabled;
#ifndef CONFIG_DMAR_GFX_WA
static inline void iommu_prepare_gfx_mapping(void)
{
return;
}
#endif /* !CONFIG_DMAR_GFX_WA */
#endif
......@@ -144,7 +144,6 @@ struct dmar_rmrr_unit {
list_for_each_entry(rmrr, &dmar_rmrr_units, list)
/* Intel DMAR initialization functions */
extern int intel_iommu_init(void);
extern int dmar_disabled;
#else
static inline int intel_iommu_init(void)
{
......
......@@ -23,8 +23,6 @@
#define _INTEL_IOMMU_H_
#include <linux/types.h>
#include <linux/msi.h>
#include <linux/sysdev.h>
#include <linux/iova.h>
#include <linux/io.h>
#include <linux/dma_remapping.h>
......@@ -289,10 +287,10 @@ struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
u64 cap;
u64 ecap;
int seg;
u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
spinlock_t register_lock; /* protect register handling */
int seq_id; /* sequence id of the iommu */
int agaw; /* agaw of this iommu */
#ifdef CONFIG_DMAR
unsigned long *domain_ids; /* bitmap of domains */
......@@ -302,8 +300,6 @@ struct intel_iommu {
unsigned int irq;
unsigned char name[7]; /* Device Name */
struct msi_msg saved_msg;
struct sys_device sysdev;
struct iommu_flush flush;
#endif
struct q_inval *qi; /* Queued invalidation info */
......@@ -334,25 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
void intel_iommu_domain_exit(struct dmar_domain *domain);
struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
int intel_iommu_context_mapping(struct dmar_domain *domain,
struct pci_dev *pdev);
int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
u64 hpa, size_t size, int prot);
void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
#ifdef CONFIG_DMAR
int intel_iommu_found(void);
#else /* CONFIG_DMAR */
static inline int intel_iommu_found(void)
{
return 0;
}
#endif /* CONFIG_DMAR */
extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int);
......
/*
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __LINUX_IOMMU_H
#define __LINUX_IOMMU_H
#define IOMMU_READ (1)
#define IOMMU_WRITE (2)
struct device;
struct iommu_domain {
void *priv;
};
struct iommu_ops {
int (*domain_init)(struct iommu_domain *domain);
void (*domain_destroy)(struct iommu_domain *domain);
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
void (*unmap)(struct iommu_domain *domain, unsigned long iova,
size_t size);
phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
unsigned long iova);
};
#ifdef CONFIG_IOMMU_API
extern void register_iommu(struct iommu_ops *ops);
extern bool iommu_found(void);
extern struct iommu_domain *iommu_domain_alloc(void);
extern void iommu_domain_free(struct iommu_domain *domain);
extern int iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
extern void iommu_detach_device(struct iommu_domain *domain,
struct device *dev);
extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
size_t size);
extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
unsigned long iova);
#else /* CONFIG_IOMMU_API */
static inline void register_iommu(struct iommu_ops *ops)
{
}
static inline bool iommu_found(void)
{
return false;
}
static inline struct iommu_domain *iommu_domain_alloc(void)
{
return NULL;
}
static inline void iommu_domain_free(struct iommu_domain *domain)
{
}
static inline int iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
return -ENODEV;
}
static inline void iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
}
static inline int iommu_map_range(struct iommu_domain *domain,
unsigned long iova, phys_addr_t paddr,
size_t size, int prot)
{
return -ENODEV;
}
static inline void iommu_unmap_range(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
}
static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
unsigned long iova)
{
return 0;
}
#endif /* CONFIG_IOMMU_API */
#endif /* __LINUX_IOMMU_H */
......@@ -316,6 +316,7 @@ struct kvm_assigned_dev_kernel {
#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
unsigned long irq_requested_type;
int irq_source_id;
int flags;
struct pci_dev *dev;
struct kvm *kvm;
};
......@@ -327,13 +328,16 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
#ifdef CONFIG_DMAR
#ifdef CONFIG_IOMMU_API
int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
unsigned long npages);
int kvm_iommu_map_guest(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev);
int kvm_iommu_map_guest(struct kvm *kvm);
int kvm_iommu_unmap_guest(struct kvm *kvm);
#else /* CONFIG_DMAR */
int kvm_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev);
int kvm_deassign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev);
#else /* CONFIG_IOMMU_API */
static inline int kvm_iommu_map_pages(struct kvm *kvm,
gfn_t base_gfn,
unsigned long npages)
......@@ -341,9 +345,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm,
return 0;
}
static inline int kvm_iommu_map_guest(struct kvm *kvm,
struct kvm_assigned_dev_kernel
*assigned_dev)
static inline int kvm_iommu_map_guest(struct kvm *kvm)
{
return -ENODEV;
}
......@@ -352,7 +354,19 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
{
return 0;
}
#endif /* CONFIG_DMAR */
static inline int kvm_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
return 0;
}
static inline int kvm_deassign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
return 0;
}
#endif /* CONFIG_IOMMU_API */
static inline void kvm_guest_enter(void)
{
......
......@@ -25,6 +25,7 @@
#include <linux/kvm_host.h>
#include <linux/pci.h>
#include <linux/dmar.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
static int kvm_iommu_unmap_memslots(struct kvm *kvm);
......@@ -37,7 +38,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
gfn_t gfn = base_gfn;
pfn_t pfn;
int i, r = 0;
struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
struct iommu_domain *domain = kvm->arch.iommu_domain;
/* check if iommu exists and in use */
if (!domain)
......@@ -45,20 +46,17 @@ int kvm_iommu_map_pages(struct kvm *kvm,
for (i = 0; i < npages; i++) {
/* check if already mapped */
pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
gfn_to_gpa(gfn));
if (pfn)
if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
continue;
pfn = gfn_to_pfn(kvm, gfn);
r = intel_iommu_page_mapping(domain,
gfn_to_gpa(gfn),
pfn_to_hpa(pfn),
PAGE_SIZE,
DMA_PTE_READ |
DMA_PTE_WRITE);
r = iommu_map_range(domain,
gfn_to_gpa(gfn),
pfn_to_hpa(pfn),
PAGE_SIZE,
IOMMU_READ | IOMMU_WRITE);
if (r) {
printk(KERN_ERR "kvm_iommu_map_pages:"
printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%lx\n", pfn);
goto unmap_pages;
}
......@@ -73,7 +71,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
static int kvm_iommu_map_memslots(struct kvm *kvm)
{
int i, r;
int i, r = 0;
down_read(&kvm->slots_lock);
for (i = 0; i < kvm->nmemslots; i++) {
......@@ -86,50 +84,79 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
return r;
}
int kvm_iommu_map_guest(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
int kvm_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
struct pci_dev *pdev = NULL;
struct iommu_domain *domain = kvm->arch.iommu_domain;
int r;
if (!intel_iommu_found()) {
printk(KERN_ERR "%s: intel iommu not found\n", __func__);
/* check if iommu exists and in use */
if (!domain)
return 0;
pdev = assigned_dev->dev;
if (pdev == NULL)
return -ENODEV;
r = iommu_attach_device(domain, &pdev->dev);
if (r) {
printk(KERN_ERR "assign device %x:%x.%x failed",
pdev->bus->number,
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn));
return r;
}
printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
return 0;
}
int kvm_deassign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
struct iommu_domain *domain = kvm->arch.iommu_domain;
struct pci_dev *pdev = NULL;
/* check if iommu exists and in use */
if (!domain)
return 0;
pdev = assigned_dev->dev;
if (pdev == NULL)
return -ENODEV;
if (pdev == NULL) {
if (kvm->arch.intel_iommu_domain) {
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
kvm->arch.intel_iommu_domain = NULL;
}
iommu_detach_device(domain, &pdev->dev);
printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
return 0;
}
int kvm_iommu_map_guest(struct kvm *kvm)
{
int r;
if (!iommu_found()) {
printk(KERN_ERR "%s: iommu not found\n", __func__);
return -ENODEV;
}
kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
if (!kvm->arch.intel_iommu_domain)
return -ENODEV;
kvm->arch.iommu_domain = iommu_domain_alloc();
if (!kvm->arch.iommu_domain)
return -ENOMEM;
r = kvm_iommu_map_memslots(kvm);
if (r)
goto out_unmap;
intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
pdev->bus->number, pdev->devfn);
r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
pdev);
if (r) {
printk(KERN_ERR "Domain context map for %s failed",
pci_name(pdev));
goto out_unmap;
}
return 0;
out_unmap:
......@@ -138,19 +165,26 @@ int kvm_iommu_map_guest(struct kvm *kvm,
}
static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages)
gfn_t base_gfn, unsigned long npages)
{
gfn_t gfn = base_gfn;
pfn_t pfn;
struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
int i;
struct iommu_domain *domain = kvm->arch.iommu_domain;
unsigned long i;
u64 phys;
/* check if iommu exists and in use */
if (!domain)
return;
for (i = 0; i < npages; i++) {
pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
gfn_to_gpa(gfn));
phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
pfn = phys >> PAGE_SHIFT;
kvm_release_pfn_clean(pfn);
gfn++;
}
iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages);
}
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
......@@ -168,24 +202,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
int kvm_iommu_unmap_guest(struct kvm *kvm)
{
struct kvm_assigned_dev_kernel *entry;
struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
struct iommu_domain *domain = kvm->arch.iommu_domain;
/* check if iommu exists and in use */
if (!domain)
return 0;
list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
entry->host_busnr,
PCI_SLOT(entry->host_devfn),
PCI_FUNC(entry->host_devfn));
/* detach kvm dmar domain */
intel_iommu_detach_dev(domain, entry->host_busnr,
entry->host_devfn);
}
kvm_iommu_unmap_memslots(kvm);
intel_iommu_domain_exit(domain);
iommu_domain_free(domain);
return 0;
}
......@@ -496,6 +496,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->flags = assigned_dev->flags;
match->dev = dev;
match->irq_source_id = -1;
match->kvm = kvm;
......@@ -503,7 +504,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(&match->list, &kvm->arch.assigned_dev_head);
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
r = kvm_iommu_map_guest(kvm, match);
if (!kvm->arch.iommu_domain) {
r = kvm_iommu_map_guest(kvm);
if (r)
goto out_list_del;
}
r = kvm_assign_device(kvm, match);
if (r)
goto out_list_del;
}
......@@ -525,6 +531,35 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
}
#endif
#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_dev->assigned_dev_id);
if (!match) {
printk(KERN_INFO "%s: device hasn't been assigned before, "
"so cannot be deassigned\n", __func__);
r = -EINVAL;
goto out;
}
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
kvm_deassign_device(kvm, match);
kvm_free_assigned_device(kvm, match);
out:
mutex_unlock(&kvm->lock);
return r;
}
#endif
static inline int valid_vcpu(int n)
{
return likely(n >= 0 && n < KVM_MAX_VCPUS);
......@@ -1857,6 +1892,19 @@ static long kvm_vm_ioctl(struct file *filp,
goto out;
break;
}
#endif
#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
case KVM_DEASSIGN_PCI_DEVICE: {
struct kvm_assigned_pci_dev assigned_dev;
r = -EFAULT;
if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
goto out;
r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
if (r)
goto out;
break;
}
#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment