Commit 2cd83ba5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio

Pull IOMMU updates from Alex Williamson:
 "As Joerg mentioned[1], he's out on paternity leave through the end of
  the year and I'm filling in for him in the interim:

   - Enforce MSI multiple IRQ alignment in AMD IOMMU

   - VT-d PASID error handling fixes

   - Add r8a7795 IPMMU support

   - Manage runtime PM links on exynos at {add,remove}_device callbacks

   - Fix Mediatek driver name to avoid conflict

   - Add terminate support to qcom fault handler

   - 64-bit IOVA optimizations

   - Simplfy IOVA domain destruction, better use of rcache, and skip
     anchor nodes on copy

   - Convert to IOMMU TLB sync API in io-pgtable-arm{-v7s}

   - Drop command queue lock when waiting for CMD_SYNC completion on ARM
     SMMU implementations supporting MSI to cacheable memory

   - iomu-vmsa cleanup inspired by missed IOTLB sync callbacks

   - Fix sleeping lock with preemption disabled for RT

   - Dual MMU support for TI DRA7xx DSPs

   - Optional flush option on IOVA allocation avoiding overhead when
     caller can try other options

  [1] https://lkml.org/lkml/2017/10/22/72"

* tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio: (54 commits)
  iommu/iova: Use raw_cpu_ptr() instead of get_cpu_ptr() for ->fq
  iommu/mediatek: Fix driver name
  iommu/ipmmu-vmsa: Hook up r8a7795 DT matching code
  iommu/ipmmu-vmsa: Allow two bit SL0
  iommu/ipmmu-vmsa: Make IMBUSCTR setup optional
  iommu/ipmmu-vmsa: Write IMCTR twice
  iommu/ipmmu-vmsa: IPMMU device is 40-bit bus master
  iommu/ipmmu-vmsa: Make use of IOMMU_OF_DECLARE()
  iommu/ipmmu-vmsa: Enable multi context support
  iommu/ipmmu-vmsa: Add optional root device feature
  iommu/ipmmu-vmsa: Introduce features, break out alias
  iommu/ipmmu-vmsa: Unify ipmmu_ops
  iommu/ipmmu-vmsa: Clean up struct ipmmu_vmsa_iommu_priv
  iommu/ipmmu-vmsa: Simplify group allocation
  iommu/ipmmu-vmsa: Unify domain alloc/free
  iommu/ipmmu-vmsa: Fix return value check in ipmmu_find_group_dma()
  iommu/vt-d: Clear pasid table entry when memory unbound
  iommu/vt-d: Clear Page Request Overflow fault bit
  iommu/vt-d: Missing checks for pasid tables if allocation fails
  iommu/amd: Limit the IOVA page range to the specified addresses
  ...
parents 670ffccb 56f19441
...@@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) ...@@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
order = __ffs(tegra->domain->pgsize_bitmap); order = __ffs(tegra->domain->pgsize_bitmap);
init_iova_domain(&tegra->carveout.domain, 1UL << order, init_iova_domain(&tegra->carveout.domain, 1UL << order,
carveout_start >> order, carveout_start >> order);
carveout_end >> order);
tegra->carveout.shift = iova_shift(&tegra->carveout.domain); tegra->carveout.shift = iova_shift(&tegra->carveout.domain);
tegra->carveout.limit = carveout_end >> tegra->carveout.shift; tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
......
...@@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev) ...@@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev)
order = __ffs(host->domain->pgsize_bitmap); order = __ffs(host->domain->pgsize_bitmap);
init_iova_domain(&host->iova, 1UL << order, init_iova_domain(&host->iova, 1UL << order,
geometry->aperture_start >> order, geometry->aperture_start >> order);
geometry->aperture_end >> order);
host->iova_end = geometry->aperture_end; host->iova_end = geometry->aperture_end;
} }
......
...@@ -63,7 +63,6 @@ ...@@ -63,7 +63,6 @@
/* IO virtual address start page frame number */ /* IO virtual address start page frame number */
#define IOVA_START_PFN (1) #define IOVA_START_PFN (1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
/* Reserved IOVA ranges */ /* Reserved IOVA ranges */
#define MSI_RANGE_START (0xfee00000) #define MSI_RANGE_START (0xfee00000)
...@@ -1547,10 +1546,11 @@ static unsigned long dma_ops_alloc_iova(struct device *dev, ...@@ -1547,10 +1546,11 @@ static unsigned long dma_ops_alloc_iova(struct device *dev,
if (dma_mask > DMA_BIT_MASK(32)) if (dma_mask > DMA_BIT_MASK(32))
pfn = alloc_iova_fast(&dma_dom->iovad, pages, pfn = alloc_iova_fast(&dma_dom->iovad, pages,
IOVA_PFN(DMA_BIT_MASK(32))); IOVA_PFN(DMA_BIT_MASK(32)), false);
if (!pfn) if (!pfn)
pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask)); pfn = alloc_iova_fast(&dma_dom->iovad, pages,
IOVA_PFN(dma_mask), true);
return (pfn << PAGE_SHIFT); return (pfn << PAGE_SHIFT);
} }
...@@ -1788,8 +1788,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) ...@@ -1788,8 +1788,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
if (!dma_dom->domain.pt_root) if (!dma_dom->domain.pt_root)
goto free_dma_dom; goto free_dma_dom;
init_iova_domain(&dma_dom->iovad, PAGE_SIZE, init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN);
IOVA_START_PFN, DMA_32BIT_PFN);
if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
goto free_dma_dom; goto free_dma_dom;
...@@ -2383,11 +2382,9 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, ...@@ -2383,11 +2382,9 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
size_t size, size_t size,
int dir) int dir)
{ {
dma_addr_t flush_addr;
dma_addr_t i, start; dma_addr_t i, start;
unsigned int pages; unsigned int pages;
flush_addr = dma_addr;
pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
dma_addr &= PAGE_MASK; dma_addr &= PAGE_MASK;
start = dma_addr; start = dma_addr;
...@@ -2696,8 +2693,7 @@ static int init_reserved_iova_ranges(void) ...@@ -2696,8 +2693,7 @@ static int init_reserved_iova_ranges(void)
struct pci_dev *pdev = NULL; struct pci_dev *pdev = NULL;
struct iova *val; struct iova *val;
init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN);
IOVA_START_PFN, DMA_32BIT_PFN);
lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock, lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock,
&reserved_rbtree_key); &reserved_rbtree_key);
...@@ -3155,7 +3151,7 @@ static void amd_iommu_apply_resv_region(struct device *dev, ...@@ -3155,7 +3151,7 @@ static void amd_iommu_apply_resv_region(struct device *dev,
unsigned long start, end; unsigned long start, end;
start = IOVA_PFN(region->start); start = IOVA_PFN(region->start);
end = IOVA_PFN(region->start + region->length); end = IOVA_PFN(region->start + region->length - 1);
WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
} }
...@@ -3663,11 +3659,11 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) ...@@ -3663,11 +3659,11 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
return table; return table;
} }
static int alloc_irq_index(u16 devid, int count) static int alloc_irq_index(u16 devid, int count, bool align)
{ {
struct irq_remap_table *table; struct irq_remap_table *table;
int index, c, alignment = 1;
unsigned long flags; unsigned long flags;
int index, c;
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
if (!iommu) if (!iommu)
...@@ -3677,16 +3673,21 @@ static int alloc_irq_index(u16 devid, int count) ...@@ -3677,16 +3673,21 @@ static int alloc_irq_index(u16 devid, int count)
if (!table) if (!table)
return -ENODEV; return -ENODEV;
if (align)
alignment = roundup_pow_of_two(count);
spin_lock_irqsave(&table->lock, flags); spin_lock_irqsave(&table->lock, flags);
/* Scan table for free entries */ /* Scan table for free entries */
for (c = 0, index = table->min_index; for (index = ALIGN(table->min_index, alignment), c = 0;
index < MAX_IRQS_PER_TABLE; index < MAX_IRQS_PER_TABLE;) {
++index) { if (!iommu->irte_ops->is_allocated(table, index)) {
if (!iommu->irte_ops->is_allocated(table, index))
c += 1; c += 1;
else } else {
c = 0; c = 0;
index = ALIGN(index + 1, alignment);
continue;
}
if (c == count) { if (c == count) {
for (; c != 0; --c) for (; c != 0; --c)
...@@ -3695,6 +3696,8 @@ static int alloc_irq_index(u16 devid, int count) ...@@ -3695,6 +3696,8 @@ static int alloc_irq_index(u16 devid, int count)
index -= count - 1; index -= count - 1;
goto out; goto out;
} }
index++;
} }
index = -ENOSPC; index = -ENOSPC;
...@@ -4099,7 +4102,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, ...@@ -4099,7 +4102,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
else else
ret = -ENOMEM; ret = -ENOMEM;
} else { } else {
index = alloc_irq_index(devid, nr_irqs); bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
index = alloc_irq_index(devid, nr_irqs, align);
} }
if (index < 0) { if (index < 0) {
pr_warn("Failed to allocate IRTE\n"); pr_warn("Failed to allocate IRTE\n");
......
...@@ -316,6 +316,7 @@ ...@@ -316,6 +316,7 @@
#define ARM64_TCR_TBI0_MASK 0x1UL #define ARM64_TCR_TBI0_MASK 0x1UL
#define CTXDESC_CD_0_AA64 (1UL << 41) #define CTXDESC_CD_0_AA64 (1UL << 41)
#define CTXDESC_CD_0_S (1UL << 44)
#define CTXDESC_CD_0_R (1UL << 45) #define CTXDESC_CD_0_R (1UL << 45)
#define CTXDESC_CD_0_A (1UL << 46) #define CTXDESC_CD_0_A (1UL << 46)
#define CTXDESC_CD_0_ASET_SHIFT 47 #define CTXDESC_CD_0_ASET_SHIFT 47
...@@ -377,7 +378,16 @@ ...@@ -377,7 +378,16 @@
#define CMDQ_SYNC_0_CS_SHIFT 12 #define CMDQ_SYNC_0_CS_SHIFT 12
#define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT) #define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT)
#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT)
#define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT) #define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT)
#define CMDQ_SYNC_0_MSH_SHIFT 22
#define CMDQ_SYNC_0_MSH_ISH (3UL << CMDQ_SYNC_0_MSH_SHIFT)
#define CMDQ_SYNC_0_MSIATTR_SHIFT 24
#define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
#define CMDQ_SYNC_0_MSIDATA_SHIFT 32
#define CMDQ_SYNC_0_MSIDATA_MASK 0xffffffffUL
#define CMDQ_SYNC_1_MSIADDR_SHIFT 0
#define CMDQ_SYNC_1_MSIADDR_MASK 0xffffffffffffcUL
/* Event queue */ /* Event queue */
#define EVTQ_ENT_DWORDS 4 #define EVTQ_ENT_DWORDS 4
...@@ -408,20 +418,12 @@ ...@@ -408,20 +418,12 @@
/* High-level queue structures */ /* High-level queue structures */
#define ARM_SMMU_POLL_TIMEOUT_US 100 #define ARM_SMMU_POLL_TIMEOUT_US 100
#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
#define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000 #define MSI_IOVA_LENGTH 0x100000
/* Until ACPICA headers cover IORT rev. C */
#ifndef ACPI_IORT_SMMU_HISILICON_HI161X
#define ACPI_IORT_SMMU_HISILICON_HI161X 0x1
#endif
#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2
#endif
static bool disable_bypass; static bool disable_bypass;
module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
MODULE_PARM_DESC(disable_bypass, MODULE_PARM_DESC(disable_bypass,
...@@ -504,6 +506,10 @@ struct arm_smmu_cmdq_ent { ...@@ -504,6 +506,10 @@ struct arm_smmu_cmdq_ent {
} pri; } pri;
#define CMDQ_OP_CMD_SYNC 0x46 #define CMDQ_OP_CMD_SYNC 0x46
struct {
u32 msidata;
u64 msiaddr;
} sync;
}; };
}; };
...@@ -604,6 +610,7 @@ struct arm_smmu_device { ...@@ -604,6 +610,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_TRANS_S2 (1 << 10) #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
#define ARM_SMMU_FEAT_STALLS (1 << 11) #define ARM_SMMU_FEAT_STALLS (1 << 11)
#define ARM_SMMU_FEAT_HYP (1 << 12) #define ARM_SMMU_FEAT_HYP (1 << 12)
#define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
u32 features; u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
...@@ -616,6 +623,7 @@ struct arm_smmu_device { ...@@ -616,6 +623,7 @@ struct arm_smmu_device {
int gerr_irq; int gerr_irq;
int combined_irq; int combined_irq;
atomic_t sync_nr;
unsigned long ias; /* IPA */ unsigned long ias; /* IPA */
unsigned long oas; /* PA */ unsigned long oas; /* PA */
...@@ -634,6 +642,8 @@ struct arm_smmu_device { ...@@ -634,6 +642,8 @@ struct arm_smmu_device {
struct arm_smmu_strtab_cfg strtab_cfg; struct arm_smmu_strtab_cfg strtab_cfg;
u32 sync_count;
/* IOMMU core code handle */ /* IOMMU core code handle */
struct iommu_device iommu; struct iommu_device iommu;
}; };
...@@ -757,26 +767,29 @@ static void queue_inc_prod(struct arm_smmu_queue *q) ...@@ -757,26 +767,29 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
* Wait for the SMMU to consume items. If drain is true, wait until the queue * Wait for the SMMU to consume items. If drain is true, wait until the queue
* is empty. Otherwise, wait until there is at least one free slot. * is empty. Otherwise, wait until there is at least one free slot.
*/ */
static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
{ {
ktime_t timeout; ktime_t timeout;
unsigned int delay = 1; unsigned int delay = 1, spin_cnt = 0;
/* Wait longer if it's queue drain */ /* Wait longer if it's a CMD_SYNC */
timeout = ktime_add_us(ktime_get(), drain ? timeout = ktime_add_us(ktime_get(), sync ?
ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
ARM_SMMU_POLL_TIMEOUT_US); ARM_SMMU_POLL_TIMEOUT_US);
while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
if (ktime_compare(ktime_get(), timeout) > 0) if (ktime_compare(ktime_get(), timeout) > 0)
return -ETIMEDOUT; return -ETIMEDOUT;
if (wfe) { if (wfe) {
wfe(); wfe();
} else { } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
cpu_relax(); cpu_relax();
continue;
} else {
udelay(delay); udelay(delay);
delay *= 2; delay *= 2;
spin_cnt = 0;
} }
} }
...@@ -878,7 +891,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) ...@@ -878,7 +891,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
} }
break; break;
case CMDQ_OP_CMD_SYNC: case CMDQ_OP_CMD_SYNC:
if (ent->sync.msiaddr)
cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
else
cmd[0] |= CMDQ_SYNC_0_CS_SEV; cmd[0] |= CMDQ_SYNC_0_CS_SEV;
cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
break; break;
default: default:
return -ENOENT; return -ENOENT;
...@@ -936,13 +955,22 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) ...@@ -936,13 +955,22 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
} }
static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
{
struct arm_smmu_queue *q = &smmu->cmdq.q;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
while (queue_insert_raw(q, cmd) == -ENOSPC) {
if (queue_poll_cons(q, false, wfe))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
}
}
static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent) struct arm_smmu_cmdq_ent *ent)
{ {
u64 cmd[CMDQ_ENT_DWORDS]; u64 cmd[CMDQ_ENT_DWORDS];
unsigned long flags; unsigned long flags;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
struct arm_smmu_queue *q = &smmu->cmdq.q;
if (arm_smmu_cmdq_build_cmd(cmd, ent)) { if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
...@@ -951,14 +979,76 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, ...@@ -951,14 +979,76 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
} }
spin_lock_irqsave(&smmu->cmdq.lock, flags); spin_lock_irqsave(&smmu->cmdq.lock, flags);
while (queue_insert_raw(q, cmd) == -ENOSPC) { arm_smmu_cmdq_insert_cmd(smmu, cmd);
if (queue_poll_cons(q, false, wfe)) spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); }
}
if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe)) /*
dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); * The difference between val and sync_idx is bounded by the maximum size of
* a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
*/
static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
{
ktime_t timeout;
u32 val;
timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
val = smp_cond_load_acquire(&smmu->sync_count,
(int)(VAL - sync_idx) >= 0 ||
!ktime_before(ktime_get(), timeout));
return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
}
static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
{
u64 cmd[CMDQ_ENT_DWORDS];
unsigned long flags;
struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_CMD_SYNC,
.sync = {
.msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
.msiaddr = virt_to_phys(&smmu->sync_count),
},
};
arm_smmu_cmdq_build_cmd(cmd, &ent);
spin_lock_irqsave(&smmu->cmdq.lock, flags);
arm_smmu_cmdq_insert_cmd(smmu, cmd);
spin_unlock_irqrestore(&smmu->cmdq.lock, flags); spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
}
static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
{
u64 cmd[CMDQ_ENT_DWORDS];
unsigned long flags;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
int ret;
arm_smmu_cmdq_build_cmd(cmd, &ent);
spin_lock_irqsave(&smmu->cmdq.lock, flags);
arm_smmu_cmdq_insert_cmd(smmu, cmd);
ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
return ret;
}
static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
{
int ret;
bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
(smmu->features & ARM_SMMU_FEAT_COHERENCY);
ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
: __arm_smmu_cmdq_issue_sync(smmu);
if (ret)
dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
} }
/* Context descriptor manipulation functions */ /* Context descriptor manipulation functions */
...@@ -996,6 +1086,11 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, ...@@ -996,6 +1086,11 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE | CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT | CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
CTXDESC_CD_0_V; CTXDESC_CD_0_V;
/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
val |= CTXDESC_CD_0_S;
cfg->cdptr[0] = cpu_to_le64(val); cfg->cdptr[0] = cpu_to_le64(val);
val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT; val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
...@@ -1029,8 +1124,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) ...@@ -1029,8 +1124,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
}; };
arm_smmu_cmdq_issue_cmd(smmu, &cmd); arm_smmu_cmdq_issue_cmd(smmu, &cmd);
cmd.opcode = CMDQ_OP_CMD_SYNC; arm_smmu_cmdq_issue_sync(smmu);
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
} }
static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
...@@ -1094,7 +1188,11 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, ...@@ -1094,7 +1188,11 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
<< STRTAB_STE_1_SHCFG_SHIFT); << STRTAB_STE_1_SHCFG_SHIFT);
dst[2] = 0; /* Nuke the VMID */ dst[2] = 0; /* Nuke the VMID */
if (ste_live) /*
* The SMMU can perform negative caching, so we must sync
* the STE regardless of whether the old value was live.
*/
if (smmu)
arm_smmu_sync_ste_for_sid(smmu, sid); arm_smmu_sync_ste_for_sid(smmu, sid);
return; return;
} }
...@@ -1112,7 +1210,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, ...@@ -1112,7 +1210,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
#endif #endif
STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
if (smmu->features & ARM_SMMU_FEAT_STALLS) if (smmu->features & ARM_SMMU_FEAT_STALLS &&
!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
...@@ -1275,12 +1374,6 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) ...@@ -1275,12 +1374,6 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
{
/* We don't actually use CMD_SYNC interrupts for anything */
return IRQ_HANDLED;
}
static int arm_smmu_device_disable(struct arm_smmu_device *smmu); static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
...@@ -1313,10 +1406,8 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) ...@@ -1313,10 +1406,8 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
if (active & GERROR_MSI_EVTQ_ABT_ERR) if (active & GERROR_MSI_EVTQ_ABT_ERR)
dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
if (active & GERROR_MSI_CMDQ_ABT_ERR) { if (active & GERROR_MSI_CMDQ_ABT_ERR)
dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
arm_smmu_cmdq_sync_handler(irq, smmu->dev);
}
if (active & GERROR_PRIQ_ABT_ERR) if (active & GERROR_PRIQ_ABT_ERR)
dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
...@@ -1345,17 +1436,13 @@ static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) ...@@ -1345,17 +1436,13 @@ static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
{ {
arm_smmu_gerror_handler(irq, dev); arm_smmu_gerror_handler(irq, dev);
arm_smmu_cmdq_sync_handler(irq, dev);
return IRQ_WAKE_THREAD; return IRQ_WAKE_THREAD;
} }
/* IO_PGTABLE API */ /* IO_PGTABLE API */
static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
{ {
struct arm_smmu_cmdq_ent cmd; arm_smmu_cmdq_issue_sync(smmu);
cmd.opcode = CMDQ_OP_CMD_SYNC;
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
} }
static void arm_smmu_tlb_sync(void *cookie) static void arm_smmu_tlb_sync(void *cookie)
...@@ -1743,6 +1830,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) ...@@ -1743,6 +1830,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
return ops->unmap(ops, iova, size); return ops->unmap(ops, iova, size);
} }
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
{
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
if (smmu)
__arm_smmu_tlb_sync(smmu);
}
static phys_addr_t static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
{ {
...@@ -1963,6 +2058,8 @@ static struct iommu_ops arm_smmu_ops = { ...@@ -1963,6 +2058,8 @@ static struct iommu_ops arm_smmu_ops = {
.map = arm_smmu_map, .map = arm_smmu_map,
.unmap = arm_smmu_unmap, .unmap = arm_smmu_unmap,
.map_sg = default_iommu_map_sg, .map_sg = default_iommu_map_sg,
.flush_iotlb_all = arm_smmu_iotlb_sync,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys, .iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device, .add_device = arm_smmu_add_device,
.remove_device = arm_smmu_remove_device, .remove_device = arm_smmu_remove_device,
...@@ -2147,6 +2244,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) ...@@ -2147,6 +2244,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
{ {
int ret; int ret;
atomic_set(&smmu->sync_nr, 0);
ret = arm_smmu_init_queues(smmu); ret = arm_smmu_init_queues(smmu);
if (ret) if (ret)
return ret; return ret;
...@@ -2265,15 +2363,6 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) ...@@ -2265,15 +2363,6 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
dev_warn(smmu->dev, "failed to enable evtq irq\n"); dev_warn(smmu->dev, "failed to enable evtq irq\n");
} }
irq = smmu->cmdq.q.irq;
if (irq) {
ret = devm_request_irq(smmu->dev, irq,
arm_smmu_cmdq_sync_handler, 0,
"arm-smmu-v3-cmdq-sync", smmu);
if (ret < 0)
dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
}
irq = smmu->gerr_irq; irq = smmu->gerr_irq;
if (irq) { if (irq) {
ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
...@@ -2399,8 +2488,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) ...@@ -2399,8 +2488,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
/* Invalidate any cached configuration */ /* Invalidate any cached configuration */
cmd.opcode = CMDQ_OP_CFGI_ALL; cmd.opcode = CMDQ_OP_CFGI_ALL;
arm_smmu_cmdq_issue_cmd(smmu, &cmd); arm_smmu_cmdq_issue_cmd(smmu, &cmd);
cmd.opcode = CMDQ_OP_CMD_SYNC; arm_smmu_cmdq_issue_sync(smmu);
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
/* Invalidate any stale TLB entries */ /* Invalidate any stale TLB entries */
if (smmu->features & ARM_SMMU_FEAT_HYP) { if (smmu->features & ARM_SMMU_FEAT_HYP) {
...@@ -2410,8 +2498,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) ...@@ -2410,8 +2498,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
arm_smmu_cmdq_issue_cmd(smmu, &cmd); arm_smmu_cmdq_issue_cmd(smmu, &cmd);
cmd.opcode = CMDQ_OP_CMD_SYNC; arm_smmu_cmdq_issue_sync(smmu);
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
/* Event queue */ /* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
...@@ -2532,13 +2619,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) ...@@ -2532,13 +2619,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
* register, but warn on mismatch. * register, but warn on mismatch.
*/ */
if (!!(reg & IDR0_COHACC) != coherent) if (!!(reg & IDR0_COHACC) != coherent)
dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n", dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
coherent ? "true" : "false"); coherent ? "true" : "false");
switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
case IDR0_STALL_MODEL_STALL:
/* Fallthrough */
case IDR0_STALL_MODEL_FORCE: case IDR0_STALL_MODEL_FORCE:
smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
/* Fallthrough */
case IDR0_STALL_MODEL_STALL:
smmu->features |= ARM_SMMU_FEAT_STALLS; smmu->features |= ARM_SMMU_FEAT_STALLS;
} }
...@@ -2665,7 +2753,7 @@ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) ...@@ -2665,7 +2753,7 @@ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
break; break;
case ACPI_IORT_SMMU_HISILICON_HI161X: case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
break; break;
} }
...@@ -2783,10 +2871,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) ...@@ -2783,10 +2871,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
if (irq > 0) if (irq > 0)
smmu->priq.q.irq = irq; smmu->priq.q.irq = irq;
irq = platform_get_irq_byname(pdev, "cmdq-sync");
if (irq > 0)
smmu->cmdq.q.irq = irq;
irq = platform_get_irq_byname(pdev, "gerror"); irq = platform_get_irq_byname(pdev, "gerror");
if (irq > 0) if (irq > 0)
smmu->gerr_irq = irq; smmu->gerr_irq = irq;
......
...@@ -59,6 +59,7 @@ ...@@ -59,6 +59,7 @@
#define ARM_MMU500_ACTLR_CPRE (1 << 1) #define ARM_MMU500_ACTLR_CPRE (1 << 1)
#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
...@@ -119,14 +120,6 @@ enum arm_smmu_implementation { ...@@ -119,14 +120,6 @@ enum arm_smmu_implementation {
CAVIUM_SMMUV2, CAVIUM_SMMUV2,
}; };
/* Until ACPICA headers cover IORT rev. C */
#ifndef ACPI_IORT_SMMU_CORELINK_MMU401
#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4
#endif
#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5
#endif
struct arm_smmu_s2cr { struct arm_smmu_s2cr {
struct iommu_group *group; struct iommu_group *group;
int count; int count;
...@@ -250,6 +243,7 @@ enum arm_smmu_domain_stage { ...@@ -250,6 +243,7 @@ enum arm_smmu_domain_stage {
struct arm_smmu_domain { struct arm_smmu_domain {
struct arm_smmu_device *smmu; struct arm_smmu_device *smmu;
struct io_pgtable_ops *pgtbl_ops; struct io_pgtable_ops *pgtbl_ops;
const struct iommu_gather_ops *tlb_ops;
struct arm_smmu_cfg cfg; struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage; enum arm_smmu_domain_stage stage;
struct mutex init_mutex; /* Protects smmu pointer */ struct mutex init_mutex; /* Protects smmu pointer */
...@@ -735,7 +729,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ...@@ -735,7 +729,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
enum io_pgtable_fmt fmt; enum io_pgtable_fmt fmt;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg; struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
const struct iommu_gather_ops *tlb_ops;
mutex_lock(&smmu_domain->init_mutex); mutex_lock(&smmu_domain->init_mutex);
if (smmu_domain->smmu) if (smmu_domain->smmu)
...@@ -813,7 +806,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ...@@ -813,7 +806,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
ias = min(ias, 32UL); ias = min(ias, 32UL);
oas = min(oas, 32UL); oas = min(oas, 32UL);
} }
tlb_ops = &arm_smmu_s1_tlb_ops; smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
break; break;
case ARM_SMMU_DOMAIN_NESTED: case ARM_SMMU_DOMAIN_NESTED:
/* /*
...@@ -833,9 +826,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ...@@ -833,9 +826,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
oas = min(oas, 40UL); oas = min(oas, 40UL);
} }
if (smmu->version == ARM_SMMU_V2) if (smmu->version == ARM_SMMU_V2)
tlb_ops = &arm_smmu_s2_tlb_ops_v2; smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
else else
tlb_ops = &arm_smmu_s2_tlb_ops_v1; smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
...@@ -863,7 +856,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ...@@ -863,7 +856,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
.pgsize_bitmap = smmu->pgsize_bitmap, .pgsize_bitmap = smmu->pgsize_bitmap,
.ias = ias, .ias = ias,
.oas = oas, .oas = oas,
.tlb = tlb_ops, .tlb = smmu_domain->tlb_ops,
.iommu_dev = smmu->dev, .iommu_dev = smmu->dev,
}; };
...@@ -1259,6 +1252,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, ...@@ -1259,6 +1252,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
return ops->unmap(ops, iova, size); return ops->unmap(ops, iova, size);
} }
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
if (smmu_domain->tlb_ops)
smmu_domain->tlb_ops->tlb_sync(smmu_domain);
}
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
dma_addr_t iova) dma_addr_t iova)
{ {
...@@ -1562,6 +1563,8 @@ static struct iommu_ops arm_smmu_ops = { ...@@ -1562,6 +1563,8 @@ static struct iommu_ops arm_smmu_ops = {
.map = arm_smmu_map, .map = arm_smmu_map,
.unmap = arm_smmu_unmap, .unmap = arm_smmu_unmap,
.map_sg = default_iommu_map_sg, .map_sg = default_iommu_map_sg,
.flush_iotlb_all = arm_smmu_iotlb_sync,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys, .iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device, .add_device = arm_smmu_add_device,
.remove_device = arm_smmu_remove_device, .remove_device = arm_smmu_remove_device,
...@@ -1606,7 +1609,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) ...@@ -1606,7 +1609,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
* Allow unmatched Stream IDs to allocate bypass * Allow unmatched Stream IDs to allocate bypass
* TLB entries for reduced latency. * TLB entries for reduced latency.
*/ */
reg |= ARM_MMU500_ACR_SMTNMB_TLBEN; reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR); writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
} }
......
...@@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, ...@@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
/* ...then finally give it a kicking to make sure it fits */ /* ...then finally give it a kicking to make sure it fits */
base_pfn = max_t(unsigned long, base_pfn, base_pfn = max_t(unsigned long, base_pfn,
domain->geometry.aperture_start >> order); domain->geometry.aperture_start >> order);
end_pfn = min_t(unsigned long, end_pfn,
domain->geometry.aperture_end >> order);
} }
/*
* PCI devices may have larger DMA masks, but still prefer allocating
* within a 32-bit mask to avoid DAC addressing. Such limitations don't
* apply to the typical platform device, so for those we may as well
* leave the cache limit at the top of their range to save an rb_last()
* traversal on every allocation.
*/
if (dev && dev_is_pci(dev))
end_pfn &= DMA_BIT_MASK(32) >> order;
/* start_pfn is always nonzero for an already-initialised domain */ /* start_pfn is always nonzero for an already-initialised domain */
if (iovad->start_pfn) { if (iovad->start_pfn) {
...@@ -312,16 +301,11 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, ...@@ -312,16 +301,11 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
pr_warn("Incompatible range for DMA domain\n"); pr_warn("Incompatible range for DMA domain\n");
return -EFAULT; return -EFAULT;
} }
/*
* If we have devices with different DMA masks, move the free
* area cache limit down for the benefit of the smaller one.
*/
iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn);
return 0; return 0;
} }
init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); init_iova_domain(iovad, 1UL << order, base_pfn);
if (!dev) if (!dev)
return 0; return 0;
...@@ -386,10 +370,12 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, ...@@ -386,10 +370,12 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
/* Try to get PCI devices a SAC address */ /* Try to get PCI devices a SAC address */
if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); iova = alloc_iova_fast(iovad, iova_len,
DMA_BIT_MASK(32) >> shift, false);
if (!iova) if (!iova)
iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
true);
return (dma_addr_t)iova << shift; return (dma_addr_t)iova << shift;
} }
......
...@@ -801,13 +801,16 @@ int __init dmar_dev_scope_init(void) ...@@ -801,13 +801,16 @@ int __init dmar_dev_scope_init(void)
dmar_free_pci_notify_info(info); dmar_free_pci_notify_info(info);
} }
} }
bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
} }
return dmar_dev_scope_status; return dmar_dev_scope_status;
} }
void dmar_register_bus_notifier(void)
{
bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
}
int __init dmar_table_init(void) int __init dmar_table_init(void)
{ {
...@@ -1676,7 +1679,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id) ...@@ -1676,7 +1679,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
raw_spin_lock_irqsave(&iommu->register_lock, flag); raw_spin_lock_irqsave(&iommu->register_lock, flag);
} }
writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG); writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
iommu->reg + DMAR_FSTS_REG);
unlock_exit: unlock_exit:
raw_spin_unlock_irqrestore(&iommu->register_lock, flag); raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
......
...@@ -263,6 +263,7 @@ struct exynos_iommu_domain { ...@@ -263,6 +263,7 @@ struct exynos_iommu_domain {
struct sysmmu_drvdata { struct sysmmu_drvdata {
struct device *sysmmu; /* SYSMMU controller device */ struct device *sysmmu; /* SYSMMU controller device */
struct device *master; /* master device (owner) */ struct device *master; /* master device (owner) */
struct device_link *link; /* runtime PM link to master */
void __iomem *sfrbase; /* our registers */ void __iomem *sfrbase; /* our registers */
struct clk *clk; /* SYSMMU's clock */ struct clk *clk; /* SYSMMU's clock */
struct clk *aclk; /* SYSMMU's aclk clock */ struct clk *aclk; /* SYSMMU's aclk clock */
...@@ -1250,6 +1251,8 @@ static struct iommu_group *get_device_iommu_group(struct device *dev) ...@@ -1250,6 +1251,8 @@ static struct iommu_group *get_device_iommu_group(struct device *dev)
static int exynos_iommu_add_device(struct device *dev) static int exynos_iommu_add_device(struct device *dev)
{ {
struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct sysmmu_drvdata *data;
struct iommu_group *group; struct iommu_group *group;
if (!has_sysmmu(dev)) if (!has_sysmmu(dev))
...@@ -1260,6 +1263,15 @@ static int exynos_iommu_add_device(struct device *dev) ...@@ -1260,6 +1263,15 @@ static int exynos_iommu_add_device(struct device *dev)
if (IS_ERR(group)) if (IS_ERR(group))
return PTR_ERR(group); return PTR_ERR(group);
list_for_each_entry(data, &owner->controllers, owner_node) {
/*
* SYSMMU will be runtime activated via device link
* (dependency) to its master device, so there are no
* direct calls to pm_runtime_get/put in this driver.
*/
data->link = device_link_add(dev, data->sysmmu,
DL_FLAG_PM_RUNTIME);
}
iommu_group_put(group); iommu_group_put(group);
return 0; return 0;
...@@ -1268,6 +1280,7 @@ static int exynos_iommu_add_device(struct device *dev) ...@@ -1268,6 +1280,7 @@ static int exynos_iommu_add_device(struct device *dev)
static void exynos_iommu_remove_device(struct device *dev) static void exynos_iommu_remove_device(struct device *dev)
{ {
struct exynos_iommu_owner *owner = dev->archdata.iommu; struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct sysmmu_drvdata *data;
if (!has_sysmmu(dev)) if (!has_sysmmu(dev))
return; return;
...@@ -1283,6 +1296,9 @@ static void exynos_iommu_remove_device(struct device *dev) ...@@ -1283,6 +1296,9 @@ static void exynos_iommu_remove_device(struct device *dev)
} }
} }
iommu_group_remove_device(dev); iommu_group_remove_device(dev);
list_for_each_entry(data, &owner->controllers, owner_node)
device_link_del(data->link);
} }
static int exynos_iommu_of_xlate(struct device *dev, static int exynos_iommu_of_xlate(struct device *dev,
...@@ -1316,13 +1332,6 @@ static int exynos_iommu_of_xlate(struct device *dev, ...@@ -1316,13 +1332,6 @@ static int exynos_iommu_of_xlate(struct device *dev,
list_add_tail(&data->owner_node, &owner->controllers); list_add_tail(&data->owner_node, &owner->controllers);
data->master = dev; data->master = dev;
/*
* SYSMMU will be runtime activated via device link (dependency) to its
* master device, so there are no direct calls to pm_runtime_get/put
* in this driver.
*/
device_link_add(dev, data->sysmmu, DL_FLAG_PM_RUNTIME);
return 0; return 0;
} }
......
...@@ -82,8 +82,6 @@ ...@@ -82,8 +82,6 @@
#define IOVA_START_PFN (1) #define IOVA_START_PFN (1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
/* page table handling */ /* page table handling */
#define LEVEL_STRIDE (9) #define LEVEL_STRIDE (9)
...@@ -1878,8 +1876,7 @@ static int dmar_init_reserved_ranges(void) ...@@ -1878,8 +1876,7 @@ static int dmar_init_reserved_ranges(void)
struct iova *iova; struct iova *iova;
int i; int i;
init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
DMA_32BIT_PFN);
lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
&reserved_rbtree_key); &reserved_rbtree_key);
...@@ -1938,8 +1935,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, ...@@ -1938,8 +1935,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
unsigned long sagaw; unsigned long sagaw;
int err; int err;
init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
DMA_32BIT_PFN);
err = init_iova_flush_queue(&domain->iovad, err = init_iova_flush_queue(&domain->iovad,
iommu_flush_iova, iova_entry_free); iommu_flush_iova, iova_entry_free);
...@@ -2058,7 +2054,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, ...@@ -2058,7 +2054,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
if (context_copied(context)) { if (context_copied(context)) {
u16 did_old = context_domain_id(context); u16 did_old = context_domain_id(context);
if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { if (did_old < cap_ndoms(iommu->cap)) {
iommu->flush.flush_context(iommu, did_old, iommu->flush.flush_context(iommu, did_old,
(((u16)bus) << 8) | devfn, (((u16)bus) << 8) | devfn,
DMA_CCMD_MASK_NOBIT, DMA_CCMD_MASK_NOBIT,
...@@ -3473,11 +3469,12 @@ static unsigned long intel_alloc_iova(struct device *dev, ...@@ -3473,11 +3469,12 @@ static unsigned long intel_alloc_iova(struct device *dev,
* from higher range * from higher range
*/ */
iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
IOVA_PFN(DMA_BIT_MASK(32))); IOVA_PFN(DMA_BIT_MASK(32)), false);
if (iova_pfn) if (iova_pfn)
return iova_pfn; return iova_pfn;
} }
iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask)); iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
IOVA_PFN(dma_mask), true);
if (unlikely(!iova_pfn)) { if (unlikely(!iova_pfn)) {
pr_err("Allocating %ld-page iova for %s failed", pr_err("Allocating %ld-page iova for %s failed",
nrpages, dev_name(dev)); nrpages, dev_name(dev));
...@@ -4752,6 +4749,16 @@ int __init intel_iommu_init(void) ...@@ -4752,6 +4749,16 @@ int __init intel_iommu_init(void)
goto out_free_dmar; goto out_free_dmar;
} }
up_write(&dmar_global_lock);
/*
* The bus notifier takes the dmar_global_lock, so lockdep will
* complain later when we register it under the lock.
*/
dmar_register_bus_notifier();
down_write(&dmar_global_lock);
if (no_iommu || dmar_disabled) { if (no_iommu || dmar_disabled) {
/* /*
* We exit the function here to ensure IOMMU's remapping and * We exit the function here to ensure IOMMU's remapping and
...@@ -4897,8 +4904,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) ...@@ -4897,8 +4904,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
{ {
int adjust_width; int adjust_width;
init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
DMA_32BIT_PFN);
domain_reserve_special_ranges(domain); domain_reserve_special_ranges(domain);
/* calculate AGAW */ /* calculate AGAW */
......
...@@ -292,7 +292,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ ...@@ -292,7 +292,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
int pasid_max; int pasid_max;
int ret; int ret;
if (WARN_ON(!iommu)) if (WARN_ON(!iommu || !iommu->pasid_table))
return -EINVAL; return -EINVAL;
if (dev_is_pci(dev)) { if (dev_is_pci(dev)) {
...@@ -458,6 +458,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) ...@@ -458,6 +458,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
kfree_rcu(sdev, rcu); kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) { if (list_empty(&svm->devs)) {
svm->iommu->pasid_table[svm->pasid].val = 0;
wmb();
idr_remove(&svm->iommu->pasid_idr, svm->pasid); idr_remove(&svm->iommu->pasid_idr, svm->pasid);
if (svm->mm) if (svm->mm)
......
...@@ -660,16 +660,11 @@ static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -660,16 +660,11 @@ static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t size) size_t size)
{ {
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
size_t unmapped;
if (WARN_ON(upper_32_bits(iova))) if (WARN_ON(upper_32_bits(iova)))
return 0; return 0;
unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd); return __arm_v7s_unmap(data, iova, size, 1, data->pgd);
if (unmapped)
io_pgtable_tlb_sync(&data->iop);
return unmapped;
} }
static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
......
...@@ -609,7 +609,6 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, ...@@ -609,7 +609,6 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t size) size_t size)
{ {
size_t unmapped;
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte *ptep = data->pgd; arm_lpae_iopte *ptep = data->pgd;
int lvl = ARM_LPAE_START_LVL(data); int lvl = ARM_LPAE_START_LVL(data);
...@@ -617,11 +616,7 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -617,11 +616,7 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0; return 0;
unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); return __arm_lpae_unmap(data, iova, size, lvl, ptep);
if (unmapped)
io_pgtable_tlb_sync(&data->iop);
return unmapped;
} }
static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
......
...@@ -24,6 +24,9 @@ ...@@ -24,6 +24,9 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/cpu.h> #include <linux/cpu.h>
/* The anchor node sits above the top of the usable address space */
#define IOVA_ANCHOR ~0UL
static bool iova_rcache_insert(struct iova_domain *iovad, static bool iova_rcache_insert(struct iova_domain *iovad,
unsigned long pfn, unsigned long pfn,
unsigned long size); unsigned long size);
...@@ -37,7 +40,7 @@ static void fq_flush_timeout(unsigned long data); ...@@ -37,7 +40,7 @@ static void fq_flush_timeout(unsigned long data);
void void
init_iova_domain(struct iova_domain *iovad, unsigned long granule, init_iova_domain(struct iova_domain *iovad, unsigned long granule,
unsigned long start_pfn, unsigned long pfn_32bit) unsigned long start_pfn)
{ {
/* /*
* IOVA granularity will normally be equal to the smallest * IOVA granularity will normally be equal to the smallest
...@@ -48,12 +51,16 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, ...@@ -48,12 +51,16 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
spin_lock_init(&iovad->iova_rbtree_lock); spin_lock_init(&iovad->iova_rbtree_lock);
iovad->rbroot = RB_ROOT; iovad->rbroot = RB_ROOT;
iovad->cached32_node = NULL; iovad->cached_node = &iovad->anchor.node;
iovad->cached32_node = &iovad->anchor.node;
iovad->granule = granule; iovad->granule = granule;
iovad->start_pfn = start_pfn; iovad->start_pfn = start_pfn;
iovad->dma_32bit_pfn = pfn_32bit + 1; iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
iovad->flush_cb = NULL; iovad->flush_cb = NULL;
iovad->fq = NULL; iovad->fq = NULL;
iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
init_iova_rcaches(iovad); init_iova_rcaches(iovad);
} }
EXPORT_SYMBOL_GPL(init_iova_domain); EXPORT_SYMBOL_GPL(init_iova_domain);
...@@ -108,50 +115,36 @@ int init_iova_flush_queue(struct iova_domain *iovad, ...@@ -108,50 +115,36 @@ int init_iova_flush_queue(struct iova_domain *iovad,
EXPORT_SYMBOL_GPL(init_iova_flush_queue); EXPORT_SYMBOL_GPL(init_iova_flush_queue);
static struct rb_node * static struct rb_node *
__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
{ {
if ((*limit_pfn > iovad->dma_32bit_pfn) || if (limit_pfn <= iovad->dma_32bit_pfn)
(iovad->cached32_node == NULL)) return iovad->cached32_node;
return rb_last(&iovad->rbroot);
else { return iovad->cached_node;
struct rb_node *prev_node = rb_prev(iovad->cached32_node);
struct iova *curr_iova =
rb_entry(iovad->cached32_node, struct iova, node);
*limit_pfn = curr_iova->pfn_lo;
return prev_node;
}
} }
static void static void
__cached_rbnode_insert_update(struct iova_domain *iovad, __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
unsigned long limit_pfn, struct iova *new)
{ {
if (limit_pfn != iovad->dma_32bit_pfn) if (new->pfn_hi < iovad->dma_32bit_pfn)
return;
iovad->cached32_node = &new->node; iovad->cached32_node = &new->node;
else
iovad->cached_node = &new->node;
} }
static void static void
__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
{ {
struct iova *cached_iova; struct iova *cached_iova;
struct rb_node *curr;
if (!iovad->cached32_node)
return;
curr = iovad->cached32_node;
cached_iova = rb_entry(curr, struct iova, node);
if (free->pfn_lo >= cached_iova->pfn_lo) { cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
struct rb_node *node = rb_next(&free->node); if (free->pfn_hi < iovad->dma_32bit_pfn &&
struct iova *iova = rb_entry(node, struct iova, node); free->pfn_lo >= cached_iova->pfn_lo)
iovad->cached32_node = rb_next(&free->node);
/* only cache if it's below 32bit pfn */ cached_iova = rb_entry(iovad->cached_node, struct iova, node);
if (node && iova->pfn_lo < iovad->dma_32bit_pfn) if (free->pfn_lo >= cached_iova->pfn_lo)
iovad->cached32_node = node; iovad->cached_node = rb_next(&free->node);
else
iovad->cached32_node = NULL;
}
} }
/* Insert the iova into domain rbtree by holding writer lock */ /* Insert the iova into domain rbtree by holding writer lock */
...@@ -182,63 +175,43 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, ...@@ -182,63 +175,43 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova,
rb_insert_color(&iova->node, root); rb_insert_color(&iova->node, root);
} }
/*
* Computes the padding size required, to make the start address
* naturally aligned on the power-of-two order of its size
*/
static unsigned int
iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
{
return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
}
static int __alloc_and_insert_iova_range(struct iova_domain *iovad, static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
unsigned long size, unsigned long limit_pfn, unsigned long size, unsigned long limit_pfn,
struct iova *new, bool size_aligned) struct iova *new, bool size_aligned)
{ {
struct rb_node *prev, *curr = NULL; struct rb_node *curr, *prev;
struct iova *curr_iova;
unsigned long flags; unsigned long flags;
unsigned long saved_pfn; unsigned long new_pfn;
unsigned int pad_size = 0; unsigned long align_mask = ~0UL;
if (size_aligned)
align_mask <<= fls_long(size - 1);
/* Walk the tree backwards */ /* Walk the tree backwards */
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, limit_pfn);
curr = __get_cached_rbnode(iovad, &limit_pfn); curr_iova = rb_entry(curr, struct iova, node);
prev = curr; do {
while (curr) { limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
struct iova *curr_iova = rb_entry(curr, struct iova, node); new_pfn = (limit_pfn - size) & align_mask;
if (limit_pfn <= curr_iova->pfn_lo) {
goto move_left;
} else if (limit_pfn > curr_iova->pfn_hi) {
if (size_aligned)
pad_size = iova_get_pad_size(size, limit_pfn);
if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
break; /* found a free slot */
}
limit_pfn = curr_iova->pfn_lo;
move_left:
prev = curr; prev = curr;
curr = rb_prev(curr); curr = rb_prev(curr);
} curr_iova = rb_entry(curr, struct iova, node);
} while (curr && new_pfn <= curr_iova->pfn_hi);
if (!curr) { if (limit_pfn < size || new_pfn < iovad->start_pfn) {
if (size_aligned)
pad_size = iova_get_pad_size(size, limit_pfn);
if ((iovad->start_pfn + size + pad_size) > limit_pfn) {
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
return -ENOMEM; return -ENOMEM;
} }
}
/* pfn_lo will point to size aligned address if size_aligned is set */ /* pfn_lo will point to size aligned address if size_aligned is set */
new->pfn_lo = limit_pfn - (size + pad_size); new->pfn_lo = new_pfn;
new->pfn_hi = new->pfn_lo + size - 1; new->pfn_hi = new->pfn_lo + size - 1;
/* If we have 'prev', it's a valid place to start the insertion. */ /* If we have 'prev', it's a valid place to start the insertion. */
iova_insert_rbtree(&iovad->rbroot, new, prev); iova_insert_rbtree(&iovad->rbroot, new, prev);
__cached_rbnode_insert_update(iovad, saved_pfn, new); __cached_rbnode_insert_update(iovad, new);
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
...@@ -258,6 +231,7 @@ EXPORT_SYMBOL(alloc_iova_mem); ...@@ -258,6 +231,7 @@ EXPORT_SYMBOL(alloc_iova_mem);
void free_iova_mem(struct iova *iova) void free_iova_mem(struct iova *iova)
{ {
if (iova->pfn_lo != IOVA_ANCHOR)
kmem_cache_free(iova_cache, iova); kmem_cache_free(iova_cache, iova);
} }
EXPORT_SYMBOL(free_iova_mem); EXPORT_SYMBOL(free_iova_mem);
...@@ -342,15 +316,12 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) ...@@ -342,15 +316,12 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn)
while (node) { while (node) {
struct iova *iova = rb_entry(node, struct iova, node); struct iova *iova = rb_entry(node, struct iova, node);
/* If pfn falls within iova's range, return iova */
if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
return iova;
}
if (pfn < iova->pfn_lo) if (pfn < iova->pfn_lo)
node = node->rb_left; node = node->rb_left;
else if (pfn > iova->pfn_lo) else if (pfn > iova->pfn_hi)
node = node->rb_right; node = node->rb_right;
else
return iova; /* pfn falls within iova's range */
} }
return NULL; return NULL;
...@@ -424,18 +395,19 @@ EXPORT_SYMBOL_GPL(free_iova); ...@@ -424,18 +395,19 @@ EXPORT_SYMBOL_GPL(free_iova);
* @iovad: - iova domain in question * @iovad: - iova domain in question
* @size: - size of page frames to allocate * @size: - size of page frames to allocate
* @limit_pfn: - max limit address * @limit_pfn: - max limit address
* @flush_rcache: - set to flush rcache on regular allocation failure
* This function tries to satisfy an iova allocation from the rcache, * This function tries to satisfy an iova allocation from the rcache,
* and falls back to regular allocation on failure. * and falls back to regular allocation on failure. If regular allocation
* fails too and the flush_rcache flag is set then the rcache will be flushed.
*/ */
unsigned long unsigned long
alloc_iova_fast(struct iova_domain *iovad, unsigned long size, alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
unsigned long limit_pfn) unsigned long limit_pfn, bool flush_rcache)
{ {
bool flushed_rcache = false;
unsigned long iova_pfn; unsigned long iova_pfn;
struct iova *new_iova; struct iova *new_iova;
iova_pfn = iova_rcache_get(iovad, size, limit_pfn); iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
if (iova_pfn) if (iova_pfn)
return iova_pfn; return iova_pfn;
...@@ -444,11 +416,11 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, ...@@ -444,11 +416,11 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
if (!new_iova) { if (!new_iova) {
unsigned int cpu; unsigned int cpu;
if (flushed_rcache) if (!flush_rcache)
return 0; return 0;
/* Try replenishing IOVAs by flushing rcache. */ /* Try replenishing IOVAs by flushing rcache. */
flushed_rcache = true; flush_rcache = false;
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
free_cpu_cached_iovas(cpu, iovad); free_cpu_cached_iovas(cpu, iovad);
goto retry; goto retry;
...@@ -570,7 +542,7 @@ void queue_iova(struct iova_domain *iovad, ...@@ -570,7 +542,7 @@ void queue_iova(struct iova_domain *iovad,
unsigned long pfn, unsigned long pages, unsigned long pfn, unsigned long pages,
unsigned long data) unsigned long data)
{ {
struct iova_fq *fq = get_cpu_ptr(iovad->fq); struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
unsigned long flags; unsigned long flags;
unsigned idx; unsigned idx;
...@@ -600,8 +572,6 @@ void queue_iova(struct iova_domain *iovad, ...@@ -600,8 +572,6 @@ void queue_iova(struct iova_domain *iovad,
if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
mod_timer(&iovad->fq_timer, mod_timer(&iovad->fq_timer,
jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
put_cpu_ptr(iovad->fq);
} }
EXPORT_SYMBOL_GPL(queue_iova); EXPORT_SYMBOL_GPL(queue_iova);
...@@ -612,21 +582,12 @@ EXPORT_SYMBOL_GPL(queue_iova); ...@@ -612,21 +582,12 @@ EXPORT_SYMBOL_GPL(queue_iova);
*/ */
void put_iova_domain(struct iova_domain *iovad) void put_iova_domain(struct iova_domain *iovad)
{ {
struct rb_node *node; struct iova *iova, *tmp;
unsigned long flags;
free_iova_flush_queue(iovad); free_iova_flush_queue(iovad);
free_iova_rcaches(iovad); free_iova_rcaches(iovad);
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
node = rb_first(&iovad->rbroot);
while (node) {
struct iova *iova = rb_entry(node, struct iova, node);
rb_erase(node, &iovad->rbroot);
free_iova_mem(iova); free_iova_mem(iova);
node = rb_first(&iovad->rbroot);
}
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
} }
EXPORT_SYMBOL_GPL(put_iova_domain); EXPORT_SYMBOL_GPL(put_iova_domain);
...@@ -695,6 +656,10 @@ reserve_iova(struct iova_domain *iovad, ...@@ -695,6 +656,10 @@ reserve_iova(struct iova_domain *iovad,
struct iova *iova; struct iova *iova;
unsigned int overlap = 0; unsigned int overlap = 0;
/* Don't allow nonsensical pfns */
if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
return NULL;
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
if (__is_range_overlap(node, pfn_lo, pfn_hi)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
...@@ -738,6 +703,9 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) ...@@ -738,6 +703,9 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
struct iova *iova = rb_entry(node, struct iova, node); struct iova *iova = rb_entry(node, struct iova, node);
struct iova *new_iova; struct iova *new_iova;
if (iova->pfn_lo == IOVA_ANCHOR)
continue;
new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
if (!new_iova) if (!new_iova)
printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
...@@ -855,12 +823,21 @@ static bool iova_magazine_empty(struct iova_magazine *mag) ...@@ -855,12 +823,21 @@ static bool iova_magazine_empty(struct iova_magazine *mag)
static unsigned long iova_magazine_pop(struct iova_magazine *mag, static unsigned long iova_magazine_pop(struct iova_magazine *mag,
unsigned long limit_pfn) unsigned long limit_pfn)
{ {
int i;
unsigned long pfn;
BUG_ON(iova_magazine_empty(mag)); BUG_ON(iova_magazine_empty(mag));
if (mag->pfns[mag->size - 1] >= limit_pfn) /* Only fall back to the rbtree if we have no suitable pfns at all */
for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
if (i == 0)
return 0; return 0;
return mag->pfns[--mag->size]; /* Swap it to pop it */
pfn = mag->pfns[i];
mag->pfns[i] = mag->pfns[--mag->size];
return pfn;
} }
static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
...@@ -1011,27 +988,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, ...@@ -1011,27 +988,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
return 0; return 0;
return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
}
/*
* Free a cpu's rcache.
*/
static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
struct iova_rcache *rcache)
{
struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
unsigned long flags;
spin_lock_irqsave(&cpu_rcache->lock, flags);
iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
iova_magazine_free(cpu_rcache->loaded);
iova_magazine_free_pfns(cpu_rcache->prev, iovad);
iova_magazine_free(cpu_rcache->prev);
spin_unlock_irqrestore(&cpu_rcache->lock, flags);
} }
/* /*
...@@ -1040,22 +997,21 @@ static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, ...@@ -1040,22 +997,21 @@ static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
static void free_iova_rcaches(struct iova_domain *iovad) static void free_iova_rcaches(struct iova_domain *iovad)
{ {
struct iova_rcache *rcache; struct iova_rcache *rcache;
unsigned long flags; struct iova_cpu_rcache *cpu_rcache;
unsigned int cpu; unsigned int cpu;
int i, j; int i, j;
for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
rcache = &iovad->rcaches[i]; rcache = &iovad->rcaches[i];
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu) {
free_cpu_iova_rcache(cpu, iovad, rcache); cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
spin_lock_irqsave(&rcache->lock, flags); iova_magazine_free(cpu_rcache->loaded);
iova_magazine_free(cpu_rcache->prev);
}
free_percpu(rcache->cpu_rcaches); free_percpu(rcache->cpu_rcaches);
for (j = 0; j < rcache->depot_size; ++j) { for (j = 0; j < rcache->depot_size; ++j)
iova_magazine_free_pfns(rcache->depot[j], iovad);
iova_magazine_free(rcache->depot[j]); iova_magazine_free(rcache->depot[j]);
} }
spin_unlock_irqrestore(&rcache->lock, flags);
}
} }
/* /*
......
...@@ -19,30 +19,49 @@ ...@@ -19,30 +19,49 @@
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/of.h> #include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_iommu.h>
#include <linux/of_platform.h> #include <linux/of_platform.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/sys_soc.h>
#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
#include <asm/dma-iommu.h> #include <asm/dma-iommu.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#else
#define arm_iommu_create_mapping(...) NULL
#define arm_iommu_attach_device(...) -ENODEV
#define arm_iommu_release_mapping(...) do {} while (0)
#define arm_iommu_detach_device(...) do {} while (0)
#endif #endif
#include "io-pgtable.h" #include "io-pgtable.h"
#define IPMMU_CTX_MAX 1 #define IPMMU_CTX_MAX 8
struct ipmmu_features {
bool use_ns_alias_offset;
bool has_cache_leaf_nodes;
unsigned int number_of_contexts;
bool setup_imbuscr;
bool twobit_imttbcr_sl0;
};
struct ipmmu_vmsa_device { struct ipmmu_vmsa_device {
struct device *dev; struct device *dev;
void __iomem *base; void __iomem *base;
struct iommu_device iommu; struct iommu_device iommu;
struct ipmmu_vmsa_device *root;
const struct ipmmu_features *features;
unsigned int num_utlbs; unsigned int num_utlbs;
unsigned int num_ctx;
spinlock_t lock; /* Protects ctx and domains[] */ spinlock_t lock; /* Protects ctx and domains[] */
DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
struct iommu_group *group;
struct dma_iommu_mapping *mapping; struct dma_iommu_mapping *mapping;
}; };
...@@ -57,18 +76,12 @@ struct ipmmu_vmsa_domain { ...@@ -57,18 +76,12 @@ struct ipmmu_vmsa_domain {
spinlock_t lock; /* Protects mappings */ spinlock_t lock; /* Protects mappings */
}; };
struct ipmmu_vmsa_iommu_priv {
struct ipmmu_vmsa_device *mmu;
struct device *dev;
struct list_head list;
};
static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
{ {
return container_of(dom, struct ipmmu_vmsa_domain, io_domain); return container_of(dom, struct ipmmu_vmsa_domain, io_domain);
} }
static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev)
{ {
return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL; return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL;
} }
...@@ -133,6 +146,10 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) ...@@ -133,6 +146,10 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
#define IMTTBCR_TSZ0_MASK (7 << 0) #define IMTTBCR_TSZ0_MASK (7 << 0)
#define IMTTBCR_TSZ0_SHIFT O #define IMTTBCR_TSZ0_SHIFT O
#define IMTTBCR_SL0_TWOBIT_LVL_3 (0 << 6)
#define IMTTBCR_SL0_TWOBIT_LVL_2 (1 << 6)
#define IMTTBCR_SL0_TWOBIT_LVL_1 (2 << 6)
#define IMBUSCR 0x000c #define IMBUSCR 0x000c
#define IMBUSCR_DVM (1 << 2) #define IMBUSCR_DVM (1 << 2)
#define IMBUSCR_BUSSEL_SYS (0 << 0) #define IMBUSCR_BUSSEL_SYS (0 << 0)
...@@ -193,6 +210,36 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) ...@@ -193,6 +210,36 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
#define IMUASID_ASID0_MASK (0xff << 0) #define IMUASID_ASID0_MASK (0xff << 0)
#define IMUASID_ASID0_SHIFT 0 #define IMUASID_ASID0_SHIFT 0
/* -----------------------------------------------------------------------------
* Root device handling
*/
static struct platform_driver ipmmu_driver;
static bool ipmmu_is_root(struct ipmmu_vmsa_device *mmu)
{
return mmu->root == mmu;
}
static int __ipmmu_check_device(struct device *dev, void *data)
{
struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev);
struct ipmmu_vmsa_device **rootp = data;
if (ipmmu_is_root(mmu))
*rootp = mmu;
return 0;
}
static struct ipmmu_vmsa_device *ipmmu_find_root(void)
{
struct ipmmu_vmsa_device *root = NULL;
return driver_for_each_device(&ipmmu_driver.driver, NULL, &root,
__ipmmu_check_device) == 0 ? root : NULL;
}
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
* Read/Write Access * Read/Write Access
*/ */
...@@ -208,15 +255,29 @@ static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset, ...@@ -208,15 +255,29 @@ static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset,
iowrite32(data, mmu->base + offset); iowrite32(data, mmu->base + offset);
} }
static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg) static u32 ipmmu_ctx_read_root(struct ipmmu_vmsa_domain *domain,
unsigned int reg)
{ {
return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg); return ipmmu_read(domain->mmu->root,
domain->context_id * IM_CTX_SIZE + reg);
} }
static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg, static void ipmmu_ctx_write_root(struct ipmmu_vmsa_domain *domain,
u32 data) unsigned int reg, u32 data)
{ {
ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data); ipmmu_write(domain->mmu->root,
domain->context_id * IM_CTX_SIZE + reg, data);
}
static void ipmmu_ctx_write_all(struct ipmmu_vmsa_domain *domain,
unsigned int reg, u32 data)
{
if (domain->mmu != domain->mmu->root)
ipmmu_write(domain->mmu,
domain->context_id * IM_CTX_SIZE + reg, data);
ipmmu_write(domain->mmu->root,
domain->context_id * IM_CTX_SIZE + reg, data);
} }
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
...@@ -228,7 +289,7 @@ static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain) ...@@ -228,7 +289,7 @@ static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain)
{ {
unsigned int count = 0; unsigned int count = 0;
while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) { while (ipmmu_ctx_read_root(domain, IMCTR) & IMCTR_FLUSH) {
cpu_relax(); cpu_relax();
if (++count == TLB_LOOP_TIMEOUT) { if (++count == TLB_LOOP_TIMEOUT) {
dev_err_ratelimited(domain->mmu->dev, dev_err_ratelimited(domain->mmu->dev,
...@@ -243,9 +304,9 @@ static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain) ...@@ -243,9 +304,9 @@ static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain)
{ {
u32 reg; u32 reg;
reg = ipmmu_ctx_read(domain, IMCTR); reg = ipmmu_ctx_read_root(domain, IMCTR);
reg |= IMCTR_FLUSH; reg |= IMCTR_FLUSH;
ipmmu_ctx_write(domain, IMCTR, reg); ipmmu_ctx_write_all(domain, IMCTR, reg);
ipmmu_tlb_sync(domain); ipmmu_tlb_sync(domain);
} }
...@@ -313,11 +374,12 @@ static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu, ...@@ -313,11 +374,12 @@ static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu,
spin_lock_irqsave(&mmu->lock, flags); spin_lock_irqsave(&mmu->lock, flags);
ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX); ret = find_first_zero_bit(mmu->ctx, mmu->num_ctx);
if (ret != IPMMU_CTX_MAX) { if (ret != mmu->num_ctx) {
mmu->domains[ret] = domain; mmu->domains[ret] = domain;
set_bit(ret, mmu->ctx); set_bit(ret, mmu->ctx);
} } else
ret = -EBUSY;
spin_unlock_irqrestore(&mmu->lock, flags); spin_unlock_irqrestore(&mmu->lock, flags);
...@@ -340,6 +402,7 @@ static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, ...@@ -340,6 +402,7 @@ static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu,
static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
{ {
u64 ttbr; u64 ttbr;
u32 tmp;
int ret; int ret;
/* /*
...@@ -364,51 +427,59 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) ...@@ -364,51 +427,59 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
* TODO: Add support for coherent walk through CCI with DVM and remove * TODO: Add support for coherent walk through CCI with DVM and remove
* cache handling. For now, delegate it to the io-pgtable code. * cache handling. For now, delegate it to the io-pgtable code.
*/ */
domain->cfg.iommu_dev = domain->mmu->dev; domain->cfg.iommu_dev = domain->mmu->root->dev;
/* /*
* Find an unused context. * Find an unused context.
*/ */
ret = ipmmu_domain_allocate_context(domain->mmu, domain); ret = ipmmu_domain_allocate_context(domain->mmu->root, domain);
if (ret == IPMMU_CTX_MAX) if (ret < 0)
return -EBUSY; return ret;
domain->context_id = ret; domain->context_id = ret;
domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg,
domain); domain);
if (!domain->iop) { if (!domain->iop) {
ipmmu_domain_free_context(domain->mmu, domain->context_id); ipmmu_domain_free_context(domain->mmu->root,
domain->context_id);
return -EINVAL; return -EINVAL;
} }
/* TTBR0 */ /* TTBR0 */
ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
ipmmu_ctx_write(domain, IMTTLBR0, ttbr); ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr);
ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32); ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32);
/* /*
* TTBCR * TTBCR
* We use long descriptors with inner-shareable WBWA tables and allocate * We use long descriptors with inner-shareable WBWA tables and allocate
* the whole 32-bit VA space to TTBR0. * the whole 32-bit VA space to TTBR0.
*/ */
ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE | if (domain->mmu->features->twobit_imttbcr_sl0)
tmp = IMTTBCR_SL0_TWOBIT_LVL_1;
else
tmp = IMTTBCR_SL0_LVL_1;
ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE |
IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); IMTTBCR_IRGN0_WB_WA | tmp);
/* MAIR0 */ /* MAIR0 */
ipmmu_ctx_write(domain, IMMAIR0, domain->cfg.arm_lpae_s1_cfg.mair[0]); ipmmu_ctx_write_root(domain, IMMAIR0,
domain->cfg.arm_lpae_s1_cfg.mair[0]);
/* IMBUSCR */ /* IMBUSCR */
ipmmu_ctx_write(domain, IMBUSCR, if (domain->mmu->features->setup_imbuscr)
ipmmu_ctx_read(domain, IMBUSCR) & ipmmu_ctx_write_root(domain, IMBUSCR,
ipmmu_ctx_read_root(domain, IMBUSCR) &
~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK));
/* /*
* IMSTR * IMSTR
* Clear all interrupt flags. * Clear all interrupt flags.
*/ */
ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR)); ipmmu_ctx_write_root(domain, IMSTR, ipmmu_ctx_read_root(domain, IMSTR));
/* /*
* IMCTR * IMCTR
...@@ -417,7 +488,8 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) ...@@ -417,7 +488,8 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
* software management as we have no use for it. Flush the TLB as * software management as we have no use for it. Flush the TLB as
* required when modifying the context registers. * required when modifying the context registers.
*/ */
ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); ipmmu_ctx_write_all(domain, IMCTR,
IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN);
return 0; return 0;
} }
...@@ -430,9 +502,9 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) ...@@ -430,9 +502,9 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
* *
* TODO: Is TLB flush really needed ? * TODO: Is TLB flush really needed ?
*/ */
ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); ipmmu_ctx_write_all(domain, IMCTR, IMCTR_FLUSH);
ipmmu_tlb_sync(domain); ipmmu_tlb_sync(domain);
ipmmu_domain_free_context(domain->mmu, domain->context_id); ipmmu_domain_free_context(domain->mmu->root, domain->context_id);
} }
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
...@@ -446,11 +518,11 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) ...@@ -446,11 +518,11 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
u32 status; u32 status;
u32 iova; u32 iova;
status = ipmmu_ctx_read(domain, IMSTR); status = ipmmu_ctx_read_root(domain, IMSTR);
if (!(status & err_mask)) if (!(status & err_mask))
return IRQ_NONE; return IRQ_NONE;
iova = ipmmu_ctx_read(domain, IMEAR); iova = ipmmu_ctx_read_root(domain, IMEAR);
/* /*
* Clear the error status flags. Unlike traditional interrupt flag * Clear the error status flags. Unlike traditional interrupt flag
...@@ -458,7 +530,7 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) ...@@ -458,7 +530,7 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
* seems to require 0. The error address register must be read before, * seems to require 0. The error address register must be read before,
* otherwise its value will be 0. * otherwise its value will be 0.
*/ */
ipmmu_ctx_write(domain, IMSTR, 0); ipmmu_ctx_write_root(domain, IMSTR, 0);
/* Log fatal errors. */ /* Log fatal errors. */
if (status & IMSTR_MHIT) if (status & IMSTR_MHIT)
...@@ -499,7 +571,7 @@ static irqreturn_t ipmmu_irq(int irq, void *dev) ...@@ -499,7 +571,7 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
/* /*
* Check interrupts for all active contexts. * Check interrupts for all active contexts.
*/ */
for (i = 0; i < IPMMU_CTX_MAX; i++) { for (i = 0; i < mmu->num_ctx; i++) {
if (!mmu->domains[i]) if (!mmu->domains[i])
continue; continue;
if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED) if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED)
...@@ -528,6 +600,27 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type) ...@@ -528,6 +600,27 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
return &domain->io_domain; return &domain->io_domain;
} }
static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
{
struct iommu_domain *io_domain = NULL;
switch (type) {
case IOMMU_DOMAIN_UNMANAGED:
io_domain = __ipmmu_domain_alloc(type);
break;
case IOMMU_DOMAIN_DMA:
io_domain = __ipmmu_domain_alloc(type);
if (io_domain && iommu_get_dma_cookie(io_domain)) {
kfree(io_domain);
io_domain = NULL;
}
break;
}
return io_domain;
}
static void ipmmu_domain_free(struct iommu_domain *io_domain) static void ipmmu_domain_free(struct iommu_domain *io_domain)
{ {
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
...@@ -536,6 +629,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain) ...@@ -536,6 +629,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
* Free the domain resources. We assume that all devices have already * Free the domain resources. We assume that all devices have already
* been detached. * been detached.
*/ */
iommu_put_dma_cookie(io_domain);
ipmmu_domain_destroy_context(domain); ipmmu_domain_destroy_context(domain);
free_io_pgtable_ops(domain->iop); free_io_pgtable_ops(domain->iop);
kfree(domain); kfree(domain);
...@@ -544,15 +638,14 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain) ...@@ -544,15 +638,14 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
static int ipmmu_attach_device(struct iommu_domain *io_domain, static int ipmmu_attach_device(struct iommu_domain *io_domain,
struct device *dev) struct device *dev)
{ {
struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
struct iommu_fwspec *fwspec = dev->iommu_fwspec; struct iommu_fwspec *fwspec = dev->iommu_fwspec;
struct ipmmu_vmsa_device *mmu = priv->mmu; struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
unsigned long flags; unsigned long flags;
unsigned int i; unsigned int i;
int ret = 0; int ret = 0;
if (!priv || !priv->mmu) { if (!mmu) {
dev_err(dev, "Cannot attach to IPMMU\n"); dev_err(dev, "Cannot attach to IPMMU\n");
return -ENXIO; return -ENXIO;
} }
...@@ -563,6 +656,13 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, ...@@ -563,6 +656,13 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
/* The domain hasn't been used yet, initialize it. */ /* The domain hasn't been used yet, initialize it. */
domain->mmu = mmu; domain->mmu = mmu;
ret = ipmmu_domain_init_context(domain); ret = ipmmu_domain_init_context(domain);
if (ret < 0) {
dev_err(dev, "Unable to initialize IPMMU context\n");
domain->mmu = NULL;
} else {
dev_info(dev, "Using IPMMU context %u\n",
domain->context_id);
}
} else if (domain->mmu != mmu) { } else if (domain->mmu != mmu) {
/* /*
* Something is wrong, we can't attach two devices using * Something is wrong, we can't attach two devices using
...@@ -619,6 +719,14 @@ static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, ...@@ -619,6 +719,14 @@ static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
return domain->iop->unmap(domain->iop, iova, size); return domain->iop->unmap(domain->iop, iova, size);
} }
static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
{
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
if (domain->mmu)
ipmmu_tlb_flush_all(domain);
}
static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
dma_addr_t iova) dma_addr_t iova)
{ {
...@@ -633,62 +741,53 @@ static int ipmmu_init_platform_device(struct device *dev, ...@@ -633,62 +741,53 @@ static int ipmmu_init_platform_device(struct device *dev,
struct of_phandle_args *args) struct of_phandle_args *args)
{ {
struct platform_device *ipmmu_pdev; struct platform_device *ipmmu_pdev;
struct ipmmu_vmsa_iommu_priv *priv;
ipmmu_pdev = of_find_device_by_node(args->np); ipmmu_pdev = of_find_device_by_node(args->np);
if (!ipmmu_pdev) if (!ipmmu_pdev)
return -ENODEV; return -ENODEV;
priv = kzalloc(sizeof(*priv), GFP_KERNEL); dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev);
if (!priv)
return -ENOMEM;
priv->mmu = platform_get_drvdata(ipmmu_pdev);
priv->dev = dev;
dev->iommu_fwspec->iommu_priv = priv;
return 0; return 0;
} }
static bool ipmmu_slave_whitelist(struct device *dev)
{
/* By default, do not allow use of IPMMU */
return false;
}
static const struct soc_device_attribute soc_r8a7795[] = {
{ .soc_id = "r8a7795", },
{ /* sentinel */ }
};
static int ipmmu_of_xlate(struct device *dev, static int ipmmu_of_xlate(struct device *dev,
struct of_phandle_args *spec) struct of_phandle_args *spec)
{ {
/* For R-Car Gen3 use a white list to opt-in slave devices */
if (soc_device_match(soc_r8a7795) && !ipmmu_slave_whitelist(dev))
return -ENODEV;
iommu_fwspec_add_ids(dev, spec->args, 1); iommu_fwspec_add_ids(dev, spec->args, 1);
/* Initialize once - xlate() will call multiple times */ /* Initialize once - xlate() will call multiple times */
if (to_priv(dev)) if (to_ipmmu(dev))
return 0; return 0;
return ipmmu_init_platform_device(dev, spec); return ipmmu_init_platform_device(dev, spec);
} }
#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) static int ipmmu_init_arm_mapping(struct device *dev)
static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
{
if (type != IOMMU_DOMAIN_UNMANAGED)
return NULL;
return __ipmmu_domain_alloc(type);
}
static int ipmmu_add_device(struct device *dev)
{ {
struct ipmmu_vmsa_device *mmu = NULL; struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
struct iommu_group *group; struct iommu_group *group;
int ret; int ret;
/*
* Only let through devices that have been verified in xlate()
*/
if (!to_priv(dev))
return -ENODEV;
/* Create a device group and add the device to it. */ /* Create a device group and add the device to it. */
group = iommu_group_alloc(); group = iommu_group_alloc();
if (IS_ERR(group)) { if (IS_ERR(group)) {
dev_err(dev, "Failed to allocate IOMMU group\n"); dev_err(dev, "Failed to allocate IOMMU group\n");
ret = PTR_ERR(group); return PTR_ERR(group);
goto error;
} }
ret = iommu_group_add_device(group, dev); ret = iommu_group_add_device(group, dev);
...@@ -696,8 +795,7 @@ static int ipmmu_add_device(struct device *dev) ...@@ -696,8 +795,7 @@ static int ipmmu_add_device(struct device *dev)
if (ret < 0) { if (ret < 0) {
dev_err(dev, "Failed to add device to IPMMU group\n"); dev_err(dev, "Failed to add device to IPMMU group\n");
group = NULL; return ret;
goto error;
} }
/* /*
...@@ -709,7 +807,6 @@ static int ipmmu_add_device(struct device *dev) ...@@ -709,7 +807,6 @@ static int ipmmu_add_device(struct device *dev)
* - Make the mapping size configurable ? We currently use a 2GB mapping * - Make the mapping size configurable ? We currently use a 2GB mapping
* at a 1GB offset to ensure that NULL VAs will fault. * at a 1GB offset to ensure that NULL VAs will fault.
*/ */
mmu = to_priv(dev)->mmu;
if (!mmu->mapping) { if (!mmu->mapping) {
struct dma_iommu_mapping *mapping; struct dma_iommu_mapping *mapping;
...@@ -734,159 +831,73 @@ static int ipmmu_add_device(struct device *dev) ...@@ -734,159 +831,73 @@ static int ipmmu_add_device(struct device *dev)
return 0; return 0;
error: error:
if (mmu)
arm_iommu_release_mapping(mmu->mapping);
if (!IS_ERR_OR_NULL(group))
iommu_group_remove_device(dev); iommu_group_remove_device(dev);
if (mmu->mapping)
arm_iommu_release_mapping(mmu->mapping);
return ret; return ret;
} }
static void ipmmu_remove_device(struct device *dev) static int ipmmu_add_device(struct device *dev)
{
arm_iommu_detach_device(dev);
iommu_group_remove_device(dev);
}
static const struct iommu_ops ipmmu_ops = {
.domain_alloc = ipmmu_domain_alloc,
.domain_free = ipmmu_domain_free,
.attach_dev = ipmmu_attach_device,
.detach_dev = ipmmu_detach_device,
.map = ipmmu_map,
.unmap = ipmmu_unmap,
.map_sg = default_iommu_map_sg,
.iova_to_phys = ipmmu_iova_to_phys,
.add_device = ipmmu_add_device,
.remove_device = ipmmu_remove_device,
.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
.of_xlate = ipmmu_of_xlate,
};
#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */
#ifdef CONFIG_IOMMU_DMA
static DEFINE_SPINLOCK(ipmmu_slave_devices_lock);
static LIST_HEAD(ipmmu_slave_devices);
static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type)
{
struct iommu_domain *io_domain = NULL;
switch (type) {
case IOMMU_DOMAIN_UNMANAGED:
io_domain = __ipmmu_domain_alloc(type);
break;
case IOMMU_DOMAIN_DMA:
io_domain = __ipmmu_domain_alloc(type);
if (io_domain)
iommu_get_dma_cookie(io_domain);
break;
}
return io_domain;
}
static void ipmmu_domain_free_dma(struct iommu_domain *io_domain)
{
switch (io_domain->type) {
case IOMMU_DOMAIN_DMA:
iommu_put_dma_cookie(io_domain);
/* fall-through */
default:
ipmmu_domain_free(io_domain);
break;
}
}
static int ipmmu_add_device_dma(struct device *dev)
{ {
struct iommu_group *group; struct iommu_group *group;
/* /*
* Only let through devices that have been verified in xlate() * Only let through devices that have been verified in xlate()
*/ */
if (!to_priv(dev)) if (!to_ipmmu(dev))
return -ENODEV; return -ENODEV;
if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA))
return ipmmu_init_arm_mapping(dev);
group = iommu_group_get_for_dev(dev); group = iommu_group_get_for_dev(dev);
if (IS_ERR(group)) if (IS_ERR(group))
return PTR_ERR(group); return PTR_ERR(group);
spin_lock(&ipmmu_slave_devices_lock); iommu_group_put(group);
list_add(&to_priv(dev)->list, &ipmmu_slave_devices);
spin_unlock(&ipmmu_slave_devices_lock);
return 0; return 0;
} }
static void ipmmu_remove_device_dma(struct device *dev) static void ipmmu_remove_device(struct device *dev)
{ {
struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); arm_iommu_detach_device(dev);
spin_lock(&ipmmu_slave_devices_lock);
list_del(&priv->list);
spin_unlock(&ipmmu_slave_devices_lock);
iommu_group_remove_device(dev); iommu_group_remove_device(dev);
} }
static struct device *ipmmu_find_sibling_device(struct device *dev) static struct iommu_group *ipmmu_find_group(struct device *dev)
{
struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL;
bool found = false;
spin_lock(&ipmmu_slave_devices_lock);
list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) {
if (priv == sibling_priv)
continue;
if (sibling_priv->mmu == priv->mmu) {
found = true;
break;
}
}
spin_unlock(&ipmmu_slave_devices_lock);
return found ? sibling_priv->dev : NULL;
}
static struct iommu_group *ipmmu_find_group_dma(struct device *dev)
{ {
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
struct iommu_group *group; struct iommu_group *group;
struct device *sibling;
sibling = ipmmu_find_sibling_device(dev); if (mmu->group)
if (sibling) return iommu_group_ref_get(mmu->group);
group = iommu_group_get(sibling);
if (!sibling || IS_ERR(group)) group = iommu_group_alloc();
group = generic_device_group(dev); if (!IS_ERR(group))
mmu->group = group;
return group; return group;
} }
static const struct iommu_ops ipmmu_ops = { static const struct iommu_ops ipmmu_ops = {
.domain_alloc = ipmmu_domain_alloc_dma, .domain_alloc = ipmmu_domain_alloc,
.domain_free = ipmmu_domain_free_dma, .domain_free = ipmmu_domain_free,
.attach_dev = ipmmu_attach_device, .attach_dev = ipmmu_attach_device,
.detach_dev = ipmmu_detach_device, .detach_dev = ipmmu_detach_device,
.map = ipmmu_map, .map = ipmmu_map,
.unmap = ipmmu_unmap, .unmap = ipmmu_unmap,
.flush_iotlb_all = ipmmu_iotlb_sync,
.iotlb_sync = ipmmu_iotlb_sync,
.map_sg = default_iommu_map_sg, .map_sg = default_iommu_map_sg,
.iova_to_phys = ipmmu_iova_to_phys, .iova_to_phys = ipmmu_iova_to_phys,
.add_device = ipmmu_add_device_dma, .add_device = ipmmu_add_device,
.remove_device = ipmmu_remove_device_dma, .remove_device = ipmmu_remove_device,
.device_group = ipmmu_find_group_dma, .device_group = ipmmu_find_group,
.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
.of_xlate = ipmmu_of_xlate, .of_xlate = ipmmu_of_xlate,
}; };
#endif /* CONFIG_IOMMU_DMA */
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
* Probe/remove and init * Probe/remove and init
*/ */
...@@ -896,10 +907,40 @@ static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu) ...@@ -896,10 +907,40 @@ static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu)
unsigned int i; unsigned int i;
/* Disable all contexts. */ /* Disable all contexts. */
for (i = 0; i < 4; ++i) for (i = 0; i < mmu->num_ctx; ++i)
ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0); ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0);
} }
static const struct ipmmu_features ipmmu_features_default = {
.use_ns_alias_offset = true,
.has_cache_leaf_nodes = false,
.number_of_contexts = 1, /* software only tested with one context */
.setup_imbuscr = true,
.twobit_imttbcr_sl0 = false,
};
static const struct ipmmu_features ipmmu_features_r8a7795 = {
.use_ns_alias_offset = false,
.has_cache_leaf_nodes = true,
.number_of_contexts = 8,
.setup_imbuscr = false,
.twobit_imttbcr_sl0 = true,
};
static const struct of_device_id ipmmu_of_ids[] = {
{
.compatible = "renesas,ipmmu-vmsa",
.data = &ipmmu_features_default,
}, {
.compatible = "renesas,ipmmu-r8a7795",
.data = &ipmmu_features_r8a7795,
}, {
/* Terminator */
},
};
MODULE_DEVICE_TABLE(of, ipmmu_of_ids);
static int ipmmu_probe(struct platform_device *pdev) static int ipmmu_probe(struct platform_device *pdev)
{ {
struct ipmmu_vmsa_device *mmu; struct ipmmu_vmsa_device *mmu;
...@@ -917,6 +958,8 @@ static int ipmmu_probe(struct platform_device *pdev) ...@@ -917,6 +958,8 @@ static int ipmmu_probe(struct platform_device *pdev)
mmu->num_utlbs = 32; mmu->num_utlbs = 32;
spin_lock_init(&mmu->lock); spin_lock_init(&mmu->lock);
bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
mmu->features = of_device_get_match_data(&pdev->dev);
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
/* Map I/O memory and request IRQ. */ /* Map I/O memory and request IRQ. */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0); res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
...@@ -936,9 +979,32 @@ static int ipmmu_probe(struct platform_device *pdev) ...@@ -936,9 +979,32 @@ static int ipmmu_probe(struct platform_device *pdev)
* Offset the registers base unconditionally to point to the non-secure * Offset the registers base unconditionally to point to the non-secure
* alias space for now. * alias space for now.
*/ */
if (mmu->features->use_ns_alias_offset)
mmu->base += IM_NS_ALIAS_OFFSET; mmu->base += IM_NS_ALIAS_OFFSET;
mmu->num_ctx = min_t(unsigned int, IPMMU_CTX_MAX,
mmu->features->number_of_contexts);
irq = platform_get_irq(pdev, 0); irq = platform_get_irq(pdev, 0);
/*
* Determine if this IPMMU instance is a root device by checking for
* the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property.
*/
if (!mmu->features->has_cache_leaf_nodes ||
!of_find_property(pdev->dev.of_node, "renesas,ipmmu-main", NULL))
mmu->root = mmu;
else
mmu->root = ipmmu_find_root();
/*
* Wait until the root device has been registered for sure.
*/
if (!mmu->root)
return -EPROBE_DEFER;
/* Root devices have mandatory IRQs */
if (ipmmu_is_root(mmu)) {
if (irq < 0) { if (irq < 0) {
dev_err(&pdev->dev, "no IRQ found\n"); dev_err(&pdev->dev, "no IRQ found\n");
return irq; return irq;
...@@ -952,19 +1018,33 @@ static int ipmmu_probe(struct platform_device *pdev) ...@@ -952,19 +1018,33 @@ static int ipmmu_probe(struct platform_device *pdev)
} }
ipmmu_device_reset(mmu); ipmmu_device_reset(mmu);
}
/*
* Register the IPMMU to the IOMMU subsystem in the following cases:
* - R-Car Gen2 IPMMU (all devices registered)
* - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device)
*/
if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) {
ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL,
dev_name(&pdev->dev)); dev_name(&pdev->dev));
if (ret) if (ret)
return ret; return ret;
iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); iommu_device_set_ops(&mmu->iommu, &ipmmu_ops);
iommu_device_set_fwnode(&mmu->iommu, &pdev->dev.of_node->fwnode); iommu_device_set_fwnode(&mmu->iommu,
&pdev->dev.of_node->fwnode);
ret = iommu_device_register(&mmu->iommu); ret = iommu_device_register(&mmu->iommu);
if (ret) if (ret)
return ret; return ret;
#if defined(CONFIG_IOMMU_DMA)
if (!iommu_present(&platform_bus_type))
bus_set_iommu(&platform_bus_type, &ipmmu_ops);
#endif
}
/* /*
* We can't create the ARM mapping here as it requires the bus to have * We can't create the ARM mapping here as it requires the bus to have
* an IOMMU, which only happens when bus_set_iommu() is called in * an IOMMU, which only happens when bus_set_iommu() is called in
...@@ -983,20 +1063,13 @@ static int ipmmu_remove(struct platform_device *pdev) ...@@ -983,20 +1063,13 @@ static int ipmmu_remove(struct platform_device *pdev)
iommu_device_sysfs_remove(&mmu->iommu); iommu_device_sysfs_remove(&mmu->iommu);
iommu_device_unregister(&mmu->iommu); iommu_device_unregister(&mmu->iommu);
#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
arm_iommu_release_mapping(mmu->mapping); arm_iommu_release_mapping(mmu->mapping);
#endif
ipmmu_device_reset(mmu); ipmmu_device_reset(mmu);
return 0; return 0;
} }
static const struct of_device_id ipmmu_of_ids[] = {
{ .compatible = "renesas,ipmmu-vmsa", },
{ }
};
static struct platform_driver ipmmu_driver = { static struct platform_driver ipmmu_driver = {
.driver = { .driver = {
.name = "ipmmu-vmsa", .name = "ipmmu-vmsa",
...@@ -1008,15 +1081,22 @@ static struct platform_driver ipmmu_driver = { ...@@ -1008,15 +1081,22 @@ static struct platform_driver ipmmu_driver = {
static int __init ipmmu_init(void) static int __init ipmmu_init(void)
{ {
static bool setup_done;
int ret; int ret;
if (setup_done)
return 0;
ret = platform_driver_register(&ipmmu_driver); ret = platform_driver_register(&ipmmu_driver);
if (ret < 0) if (ret < 0)
return ret; return ret;
#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
if (!iommu_present(&platform_bus_type)) if (!iommu_present(&platform_bus_type))
bus_set_iommu(&platform_bus_type, &ipmmu_ops); bus_set_iommu(&platform_bus_type, &ipmmu_ops);
#endif
setup_done = true;
return 0; return 0;
} }
...@@ -1028,6 +1108,19 @@ static void __exit ipmmu_exit(void) ...@@ -1028,6 +1108,19 @@ static void __exit ipmmu_exit(void)
subsys_initcall(ipmmu_init); subsys_initcall(ipmmu_init);
module_exit(ipmmu_exit); module_exit(ipmmu_exit);
#ifdef CONFIG_IOMMU_DMA
static int __init ipmmu_vmsa_iommu_of_setup(struct device_node *np)
{
ipmmu_init();
return 0;
}
IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa",
ipmmu_vmsa_iommu_of_setup);
IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795",
ipmmu_vmsa_iommu_of_setup);
#endif
MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>"); MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
...@@ -392,6 +392,11 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, ...@@ -392,6 +392,11 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
return unmapsz; return unmapsz;
} }
static void mtk_iommu_iotlb_sync(struct iommu_domain *domain)
{
mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
}
static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova) dma_addr_t iova)
{ {
...@@ -491,6 +496,8 @@ static struct iommu_ops mtk_iommu_ops = { ...@@ -491,6 +496,8 @@ static struct iommu_ops mtk_iommu_ops = {
.map = mtk_iommu_map, .map = mtk_iommu_map,
.unmap = mtk_iommu_unmap, .unmap = mtk_iommu_unmap,
.map_sg = default_iommu_map_sg, .map_sg = default_iommu_map_sg,
.flush_iotlb_all = mtk_iommu_iotlb_sync,
.iotlb_sync = mtk_iommu_iotlb_sync,
.iova_to_phys = mtk_iommu_iova_to_phys, .iova_to_phys = mtk_iommu_iova_to_phys,
.add_device = mtk_iommu_add_device, .add_device = mtk_iommu_add_device,
.remove_device = mtk_iommu_remove_device, .remove_device = mtk_iommu_remove_device,
......
...@@ -708,7 +708,7 @@ static struct platform_driver mtk_iommu_driver = { ...@@ -708,7 +708,7 @@ static struct platform_driver mtk_iommu_driver = {
.probe = mtk_iommu_probe, .probe = mtk_iommu_probe,
.remove = mtk_iommu_remove, .remove = mtk_iommu_remove,
.driver = { .driver = {
.name = "mtk-iommu", .name = "mtk-iommu-v1",
.of_match_table = mtk_iommu_of_ids, .of_match_table = mtk_iommu_of_ids,
.pm = &mtk_iommu_pm_ops, .pm = &mtk_iommu_pm_ops,
} }
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
* omap iommu: tlb and pagetable primitives * omap iommu: tlb and pagetable primitives
* *
* Copyright (C) 2008-2010 Nokia Corporation * Copyright (C) 2008-2010 Nokia Corporation
* Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
* *
* Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>, * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
* Paul Mundt and Toshihiro Kobayashi * Paul Mundt and Toshihiro Kobayashi
...@@ -71,13 +72,23 @@ static struct omap_iommu_domain *to_omap_domain(struct iommu_domain *dom) ...@@ -71,13 +72,23 @@ static struct omap_iommu_domain *to_omap_domain(struct iommu_domain *dom)
**/ **/
void omap_iommu_save_ctx(struct device *dev) void omap_iommu_save_ctx(struct device *dev)
{ {
struct omap_iommu *obj = dev_to_omap_iommu(dev); struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
u32 *p = obj->ctx; struct omap_iommu *obj;
u32 *p;
int i; int i;
if (!arch_data)
return;
while (arch_data->iommu_dev) {
obj = arch_data->iommu_dev;
p = obj->ctx;
for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
p[i] = iommu_read_reg(obj, i * sizeof(u32)); p[i] = iommu_read_reg(obj, i * sizeof(u32));
dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i,
p[i]);
}
arch_data++;
} }
} }
EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
...@@ -88,13 +99,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); ...@@ -88,13 +99,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
**/ **/
void omap_iommu_restore_ctx(struct device *dev) void omap_iommu_restore_ctx(struct device *dev)
{ {
struct omap_iommu *obj = dev_to_omap_iommu(dev); struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
u32 *p = obj->ctx; struct omap_iommu *obj;
u32 *p;
int i; int i;
if (!arch_data)
return;
while (arch_data->iommu_dev) {
obj = arch_data->iommu_dev;
p = obj->ctx;
for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
iommu_write_reg(obj, p[i], i * sizeof(u32)); iommu_write_reg(obj, p[i], i * sizeof(u32));
dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i,
p[i]);
}
arch_data++;
} }
} }
EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
...@@ -805,7 +826,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) ...@@ -805,7 +826,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
struct iommu_domain *domain = obj->domain; struct iommu_domain *domain = obj->domain;
struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
if (!omap_domain->iommu_dev) if (!omap_domain->dev)
return IRQ_NONE; return IRQ_NONE;
errs = iommu_report_fault(obj, &da); errs = iommu_report_fault(obj, &da);
...@@ -893,6 +914,24 @@ static void omap_iommu_detach(struct omap_iommu *obj) ...@@ -893,6 +914,24 @@ static void omap_iommu_detach(struct omap_iommu *obj)
dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
} }
static bool omap_iommu_can_register(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu"))
return true;
/*
* restrict IOMMU core registration only for processor-port MDMA MMUs
* on DRA7 DSPs
*/
if ((!strcmp(dev_name(&pdev->dev), "40d01000.mmu")) ||
(!strcmp(dev_name(&pdev->dev), "41501000.mmu")))
return true;
return false;
}
static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev,
struct omap_iommu *obj) struct omap_iommu *obj)
{ {
...@@ -984,11 +1023,13 @@ static int omap_iommu_probe(struct platform_device *pdev) ...@@ -984,11 +1023,13 @@ static int omap_iommu_probe(struct platform_device *pdev)
return err; return err;
platform_set_drvdata(pdev, obj); platform_set_drvdata(pdev, obj);
if (omap_iommu_can_register(pdev)) {
obj->group = iommu_group_alloc(); obj->group = iommu_group_alloc();
if (IS_ERR(obj->group)) if (IS_ERR(obj->group))
return PTR_ERR(obj->group); return PTR_ERR(obj->group);
err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL,
obj->name);
if (err) if (err)
goto out_group; goto out_group;
...@@ -997,6 +1038,7 @@ static int omap_iommu_probe(struct platform_device *pdev) ...@@ -997,6 +1038,7 @@ static int omap_iommu_probe(struct platform_device *pdev)
err = iommu_device_register(&obj->iommu); err = iommu_device_register(&obj->iommu);
if (err) if (err)
goto out_sysfs; goto out_sysfs;
}
pm_runtime_irq_safe(obj->dev); pm_runtime_irq_safe(obj->dev);
pm_runtime_enable(obj->dev); pm_runtime_enable(obj->dev);
...@@ -1018,11 +1060,13 @@ static int omap_iommu_remove(struct platform_device *pdev) ...@@ -1018,11 +1060,13 @@ static int omap_iommu_remove(struct platform_device *pdev)
{ {
struct omap_iommu *obj = platform_get_drvdata(pdev); struct omap_iommu *obj = platform_get_drvdata(pdev);
if (obj->group) {
iommu_group_put(obj->group); iommu_group_put(obj->group);
obj->group = NULL; obj->group = NULL;
iommu_device_sysfs_remove(&obj->iommu); iommu_device_sysfs_remove(&obj->iommu);
iommu_device_unregister(&obj->iommu); iommu_device_unregister(&obj->iommu);
}
omap_iommu_debugfs_remove(obj); omap_iommu_debugfs_remove(obj);
...@@ -1068,11 +1112,13 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, ...@@ -1068,11 +1112,13 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
phys_addr_t pa, size_t bytes, int prot) phys_addr_t pa, size_t bytes, int prot)
{ {
struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct omap_iommu *oiommu = omap_domain->iommu_dev; struct device *dev = omap_domain->dev;
struct device *dev = oiommu->dev; struct omap_iommu_device *iommu;
struct omap_iommu *oiommu;
struct iotlb_entry e; struct iotlb_entry e;
int omap_pgsz; int omap_pgsz;
u32 ret; u32 ret = -EINVAL;
int i;
omap_pgsz = bytes_to_iopgsz(bytes); omap_pgsz = bytes_to_iopgsz(bytes);
if (omap_pgsz < 0) { if (omap_pgsz < 0) {
...@@ -1084,9 +1130,24 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, ...@@ -1084,9 +1130,24 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
iotlb_init_entry(&e, da, pa, omap_pgsz); iotlb_init_entry(&e, da, pa, omap_pgsz);
iommu = omap_domain->iommus;
for (i = 0; i < omap_domain->num_iommus; i++, iommu++) {
oiommu = iommu->iommu_dev;
ret = omap_iopgtable_store_entry(oiommu, &e); ret = omap_iopgtable_store_entry(oiommu, &e);
if (ret) if (ret) {
dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret); dev_err(dev, "omap_iopgtable_store_entry failed: %d\n",
ret);
break;
}
}
if (ret) {
while (i--) {
iommu--;
oiommu = iommu->iommu_dev;
iopgtable_clear_entry(oiommu, da);
}
}
return ret; return ret;
} }
...@@ -1095,12 +1156,90 @@ static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, ...@@ -1095,12 +1156,90 @@ static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
size_t size) size_t size)
{ {
struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct omap_iommu *oiommu = omap_domain->iommu_dev; struct device *dev = omap_domain->dev;
struct device *dev = oiommu->dev; struct omap_iommu_device *iommu;
struct omap_iommu *oiommu;
bool error = false;
size_t bytes = 0;
int i;
dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size); dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
return iopgtable_clear_entry(oiommu, da); iommu = omap_domain->iommus;
for (i = 0; i < omap_domain->num_iommus; i++, iommu++) {
oiommu = iommu->iommu_dev;
bytes = iopgtable_clear_entry(oiommu, da);
if (!bytes)
error = true;
}
/*
* simplify return - we are only checking if any of the iommus
* reported an error, but not if all of them are unmapping the
* same number of entries. This should not occur due to the
* mirror programming.
*/
return error ? 0 : bytes;
}
static int omap_iommu_count(struct device *dev)
{
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
int count = 0;
while (arch_data->iommu_dev) {
count++;
arch_data++;
}
return count;
}
/* caller should call cleanup if this function fails */
static int omap_iommu_attach_init(struct device *dev,
struct omap_iommu_domain *odomain)
{
struct omap_iommu_device *iommu;
int i;
odomain->num_iommus = omap_iommu_count(dev);
if (!odomain->num_iommus)
return -EINVAL;
odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu),
GFP_ATOMIC);
if (!odomain->iommus)
return -ENOMEM;
iommu = odomain->iommus;
for (i = 0; i < odomain->num_iommus; i++, iommu++) {
iommu->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_ATOMIC);
if (!iommu->pgtable)
return -ENOMEM;
/*
* should never fail, but please keep this around to ensure
* we keep the hardware happy
*/
if (WARN_ON(!IS_ALIGNED((long)iommu->pgtable,
IOPGD_TABLE_SIZE)))
return -EINVAL;
}
return 0;
}
static void omap_iommu_detach_fini(struct omap_iommu_domain *odomain)
{
int i;
struct omap_iommu_device *iommu = odomain->iommus;
for (i = 0; iommu && i < odomain->num_iommus; i++, iommu++)
kfree(iommu->pgtable);
kfree(odomain->iommus);
odomain->num_iommus = 0;
odomain->iommus = NULL;
} }
static int static int
...@@ -1108,8 +1247,10 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) ...@@ -1108,8 +1247,10 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
{ {
struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
struct omap_iommu_device *iommu;
struct omap_iommu *oiommu; struct omap_iommu *oiommu;
int ret = 0; int ret = 0;
int i;
if (!arch_data || !arch_data->iommu_dev) { if (!arch_data || !arch_data->iommu_dev) {
dev_err(dev, "device doesn't have an associated iommu\n"); dev_err(dev, "device doesn't have an associated iommu\n");
...@@ -1118,26 +1259,49 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) ...@@ -1118,26 +1259,49 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
spin_lock(&omap_domain->lock); spin_lock(&omap_domain->lock);
/* only a single device is supported per domain for now */ /* only a single client device can be attached to a domain */
if (omap_domain->iommu_dev) { if (omap_domain->dev) {
dev_err(dev, "iommu domain is already attached\n"); dev_err(dev, "iommu domain is already attached\n");
ret = -EBUSY; ret = -EBUSY;
goto out; goto out;
} }
oiommu = arch_data->iommu_dev; ret = omap_iommu_attach_init(dev, omap_domain);
if (ret) {
dev_err(dev, "failed to allocate required iommu data %d\n",
ret);
goto init_fail;
}
/* get a handle to and enable the omap iommu */ iommu = omap_domain->iommus;
ret = omap_iommu_attach(oiommu, omap_domain->pgtable); for (i = 0; i < omap_domain->num_iommus; i++, iommu++, arch_data++) {
/* configure and enable the omap iommu */
oiommu = arch_data->iommu_dev;
ret = omap_iommu_attach(oiommu, iommu->pgtable);
if (ret) { if (ret) {
dev_err(dev, "can't get omap iommu: %d\n", ret); dev_err(dev, "can't get omap iommu: %d\n", ret);
goto out; goto attach_fail;
} }
omap_domain->iommu_dev = oiommu;
omap_domain->dev = dev;
oiommu->domain = domain; oiommu->domain = domain;
iommu->iommu_dev = oiommu;
}
omap_domain->dev = dev;
goto out;
attach_fail:
while (i--) {
iommu--;
arch_data--;
oiommu = iommu->iommu_dev;
omap_iommu_detach(oiommu);
iommu->iommu_dev = NULL;
oiommu->domain = NULL;
}
init_fail:
omap_iommu_detach_fini(omap_domain);
out: out:
spin_unlock(&omap_domain->lock); spin_unlock(&omap_domain->lock);
return ret; return ret;
...@@ -1146,21 +1310,40 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) ...@@ -1146,21 +1310,40 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
struct device *dev) struct device *dev)
{ {
struct omap_iommu *oiommu = dev_to_omap_iommu(dev); struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
struct omap_iommu_device *iommu = omap_domain->iommus;
struct omap_iommu *oiommu;
int i;
if (!omap_domain->dev) {
dev_err(dev, "domain has no attached device\n");
return;
}
/* only a single device is supported per domain for now */ /* only a single device is supported per domain for now */
if (omap_domain->iommu_dev != oiommu) { if (omap_domain->dev != dev) {
dev_err(dev, "invalid iommu device\n"); dev_err(dev, "invalid attached device\n");
return; return;
} }
/*
* cleanup in the reverse order of attachment - this addresses
* any h/w dependencies between multiple instances, if any
*/
iommu += (omap_domain->num_iommus - 1);
arch_data += (omap_domain->num_iommus - 1);
for (i = 0; i < omap_domain->num_iommus; i++, iommu--, arch_data--) {
oiommu = iommu->iommu_dev;
iopgtable_clear_entry_all(oiommu); iopgtable_clear_entry_all(oiommu);
omap_iommu_detach(oiommu); omap_iommu_detach(oiommu);
iommu->iommu_dev = NULL;
oiommu->domain = NULL;
}
omap_iommu_detach_fini(omap_domain);
omap_domain->iommu_dev = NULL;
omap_domain->dev = NULL; omap_domain->dev = NULL;
oiommu->domain = NULL;
} }
static void omap_iommu_detach_dev(struct iommu_domain *domain, static void omap_iommu_detach_dev(struct iommu_domain *domain,
...@@ -1182,18 +1365,7 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) ...@@ -1182,18 +1365,7 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL);
if (!omap_domain) if (!omap_domain)
goto out; return NULL;
omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL);
if (!omap_domain->pgtable)
goto fail_nomem;
/*
* should never fail, but please keep this around to ensure
* we keep the hardware happy
*/
if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE)))
goto fail_align;
spin_lock_init(&omap_domain->lock); spin_lock_init(&omap_domain->lock);
...@@ -1202,13 +1374,6 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) ...@@ -1202,13 +1374,6 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
omap_domain->domain.geometry.force_aperture = true; omap_domain->domain.geometry.force_aperture = true;
return &omap_domain->domain; return &omap_domain->domain;
fail_align:
kfree(omap_domain->pgtable);
fail_nomem:
kfree(omap_domain);
out:
return NULL;
} }
static void omap_iommu_domain_free(struct iommu_domain *domain) static void omap_iommu_domain_free(struct iommu_domain *domain)
...@@ -1219,10 +1384,9 @@ static void omap_iommu_domain_free(struct iommu_domain *domain) ...@@ -1219,10 +1384,9 @@ static void omap_iommu_domain_free(struct iommu_domain *domain)
* An iommu device is still attached * An iommu device is still attached
* (currently, only one device can be attached) ? * (currently, only one device can be attached) ?
*/ */
if (omap_domain->iommu_dev) if (omap_domain->dev)
_omap_iommu_detach_dev(omap_domain, omap_domain->dev); _omap_iommu_detach_dev(omap_domain, omap_domain->dev);
kfree(omap_domain->pgtable);
kfree(omap_domain); kfree(omap_domain);
} }
...@@ -1230,11 +1394,16 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, ...@@ -1230,11 +1394,16 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t da) dma_addr_t da)
{ {
struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct omap_iommu *oiommu = omap_domain->iommu_dev; struct omap_iommu_device *iommu = omap_domain->iommus;
struct omap_iommu *oiommu = iommu->iommu_dev;
struct device *dev = oiommu->dev; struct device *dev = oiommu->dev;
u32 *pgd, *pte; u32 *pgd, *pte;
phys_addr_t ret = 0; phys_addr_t ret = 0;
/*
* all the iommus within the domain will have identical programming,
* so perform the lookup using just the first iommu
*/
iopgtable_lookup_entry(oiommu, da, &pgd, &pte); iopgtable_lookup_entry(oiommu, da, &pgd, &pte);
if (pte) { if (pte) {
...@@ -1260,11 +1429,12 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, ...@@ -1260,11 +1429,12 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
static int omap_iommu_add_device(struct device *dev) static int omap_iommu_add_device(struct device *dev)
{ {
struct omap_iommu_arch_data *arch_data; struct omap_iommu_arch_data *arch_data, *tmp;
struct omap_iommu *oiommu; struct omap_iommu *oiommu;
struct iommu_group *group; struct iommu_group *group;
struct device_node *np; struct device_node *np;
struct platform_device *pdev; struct platform_device *pdev;
int num_iommus, i;
int ret; int ret;
/* /*
...@@ -1276,36 +1446,57 @@ static int omap_iommu_add_device(struct device *dev) ...@@ -1276,36 +1446,57 @@ static int omap_iommu_add_device(struct device *dev)
if (!dev->of_node) if (!dev->of_node)
return 0; return 0;
np = of_parse_phandle(dev->of_node, "iommus", 0); /*
if (!np) * retrieve the count of IOMMU nodes using phandle size as element size
* since #iommu-cells = 0 for OMAP
*/
num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus",
sizeof(phandle));
if (num_iommus < 0)
return 0; return 0;
arch_data = kzalloc((num_iommus + 1) * sizeof(*arch_data), GFP_KERNEL);
if (!arch_data)
return -ENOMEM;
for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) {
np = of_parse_phandle(dev->of_node, "iommus", i);
if (!np) {
kfree(arch_data);
return -EINVAL;
}
pdev = of_find_device_by_node(np); pdev = of_find_device_by_node(np);
if (WARN_ON(!pdev)) { if (WARN_ON(!pdev)) {
of_node_put(np); of_node_put(np);
kfree(arch_data);
return -EINVAL; return -EINVAL;
} }
oiommu = platform_get_drvdata(pdev); oiommu = platform_get_drvdata(pdev);
if (!oiommu) { if (!oiommu) {
of_node_put(np); of_node_put(np);
kfree(arch_data);
return -EINVAL; return -EINVAL;
} }
arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); tmp->iommu_dev = oiommu;
if (!arch_data) {
of_node_put(np); of_node_put(np);
return -ENOMEM;
} }
/*
* use the first IOMMU alone for the sysfs device linking.
* TODO: Evaluate if a single iommu_group needs to be
* maintained for both IOMMUs
*/
oiommu = arch_data->iommu_dev;
ret = iommu_device_link(&oiommu->iommu, dev); ret = iommu_device_link(&oiommu->iommu, dev);
if (ret) { if (ret) {
kfree(arch_data); kfree(arch_data);
of_node_put(np);
return ret; return ret;
} }
arch_data->iommu_dev = oiommu;
dev->archdata.iommu = arch_data; dev->archdata.iommu = arch_data;
/* /*
...@@ -1321,8 +1512,6 @@ static int omap_iommu_add_device(struct device *dev) ...@@ -1321,8 +1512,6 @@ static int omap_iommu_add_device(struct device *dev)
} }
iommu_group_put(group); iommu_group_put(group);
of_node_put(np);
return 0; return 0;
} }
......
...@@ -28,18 +28,27 @@ struct iotlb_entry { ...@@ -28,18 +28,27 @@ struct iotlb_entry {
u32 endian, elsz, mixed; u32 endian, elsz, mixed;
}; };
/**
* struct omap_iommu_device - omap iommu device data
* @pgtable: page table used by an omap iommu attached to a domain
* @iommu_dev: pointer to store an omap iommu instance attached to a domain
*/
struct omap_iommu_device {
u32 *pgtable;
struct omap_iommu *iommu_dev;
};
/** /**
* struct omap_iommu_domain - omap iommu domain * struct omap_iommu_domain - omap iommu domain
* @pgtable: the page table * @num_iommus: number of iommus in this domain
* @iommu_dev: an omap iommu device attached to this domain. only a single * @iommus: omap iommu device data for all iommus in this domain
* iommu device can be attached for now.
* @dev: Device using this domain. * @dev: Device using this domain.
* @lock: domain lock, should be taken when attaching/detaching * @lock: domain lock, should be taken when attaching/detaching
* @domain: generic domain handle used by iommu core code * @domain: generic domain handle used by iommu core code
*/ */
struct omap_iommu_domain { struct omap_iommu_domain {
u32 *pgtable; u32 num_iommus;
struct omap_iommu *iommu_dev; struct omap_iommu_device *iommus;
struct device *dev; struct device *dev;
spinlock_t lock; spinlock_t lock;
struct iommu_domain domain; struct iommu_domain domain;
...@@ -97,17 +106,6 @@ struct iotlb_lock { ...@@ -97,17 +106,6 @@ struct iotlb_lock {
short vict; short vict;
}; };
/**
* dev_to_omap_iommu() - retrieves an omap iommu object from a user device
* @dev: iommu client device
*/
static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
{
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
return arch_data->iommu_dev;
}
/* /*
* MMU Register offsets * MMU Register offsets
*/ */
......
...@@ -66,6 +66,7 @@ struct qcom_iommu_ctx { ...@@ -66,6 +66,7 @@ struct qcom_iommu_ctx {
void __iomem *base; void __iomem *base;
bool secure_init; bool secure_init;
u8 asid; /* asid and ctx bank # are 1:1 */ u8 asid; /* asid and ctx bank # are 1:1 */
struct iommu_domain *domain;
}; };
struct qcom_iommu_domain { struct qcom_iommu_domain {
...@@ -194,12 +195,15 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) ...@@ -194,12 +195,15 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev)
fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0);
iova = iommu_readq(ctx, ARM_SMMU_CB_FAR); iova = iommu_readq(ctx, ARM_SMMU_CB_FAR);
if (!report_iommu_fault(ctx->domain, ctx->dev, iova, 0)) {
dev_err_ratelimited(ctx->dev, dev_err_ratelimited(ctx->dev,
"Unhandled context fault: fsr=0x%x, " "Unhandled context fault: fsr=0x%x, "
"iova=0x%016llx, fsynr=0x%x, cb=%d\n", "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
fsr, iova, fsynr, ctx->asid); fsr, iova, fsynr, ctx->asid);
}
iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr); iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr);
iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
...@@ -274,12 +278,14 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, ...@@ -274,12 +278,14 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
/* SCTLR */ /* SCTLR */
reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE |
SCTLR_M | SCTLR_S1_ASIDPNE; SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG;
if (IS_ENABLED(CONFIG_BIG_ENDIAN)) if (IS_ENABLED(CONFIG_BIG_ENDIAN))
reg |= SCTLR_E; reg |= SCTLR_E;
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg); iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg);
ctx->domain = domain;
} }
mutex_unlock(&qcom_domain->init_mutex); mutex_unlock(&qcom_domain->init_mutex);
...@@ -395,6 +401,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de ...@@ -395,6 +401,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de
/* Disable the context bank: */ /* Disable the context bank: */
iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
ctx->domain = NULL;
} }
pm_runtime_put_sync(qcom_iommu->dev); pm_runtime_put_sync(qcom_iommu->dev);
...@@ -443,6 +451,19 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, ...@@ -443,6 +451,19 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
return ret; return ret;
} }
static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
{
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops,
struct io_pgtable, ops);
if (!qcom_domain->pgtbl_ops)
return;
pm_runtime_get_sync(qcom_domain->iommu->dev);
qcom_iommu_tlb_sync(pgtable->cookie);
pm_runtime_put_sync(qcom_domain->iommu->dev);
}
static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova) dma_addr_t iova)
{ {
...@@ -570,6 +591,8 @@ static const struct iommu_ops qcom_iommu_ops = { ...@@ -570,6 +591,8 @@ static const struct iommu_ops qcom_iommu_ops = {
.map = qcom_iommu_map, .map = qcom_iommu_map,
.unmap = qcom_iommu_unmap, .unmap = qcom_iommu_unmap,
.map_sg = default_iommu_map_sg, .map_sg = default_iommu_map_sg,
.flush_iotlb_all = qcom_iommu_iotlb_sync,
.iotlb_sync = qcom_iommu_iotlb_sync,
.iova_to_phys = qcom_iommu_iova_to_phys, .iova_to_phys = qcom_iommu_iova_to_phys,
.add_device = qcom_iommu_add_device, .add_device = qcom_iommu_add_device,
.remove_device = qcom_iommu_remove_device, .remove_device = qcom_iommu_remove_device,
......
...@@ -39,8 +39,7 @@ void scif_rma_ep_init(struct scif_endpt *ep) ...@@ -39,8 +39,7 @@ void scif_rma_ep_init(struct scif_endpt *ep)
struct scif_endpt_rma_info *rma = &ep->rma_info; struct scif_endpt_rma_info *rma = &ep->rma_info;
mutex_init(&rma->rma_lock); mutex_init(&rma->rma_lock);
init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN, init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN);
SCIF_DMA_64BIT_PFN);
spin_lock_init(&rma->tc_lock); spin_lock_init(&rma->tc_lock);
mutex_init(&rma->mmn_lock); mutex_init(&rma->mmn_lock);
INIT_LIST_HEAD(&rma->reg_list); INIT_LIST_HEAD(&rma->reg_list);
......
...@@ -112,6 +112,7 @@ static inline bool dmar_rcu_check(void) ...@@ -112,6 +112,7 @@ static inline bool dmar_rcu_check(void)
extern int dmar_table_init(void); extern int dmar_table_init(void);
extern int dmar_dev_scope_init(void); extern int dmar_dev_scope_init(void);
extern void dmar_register_bus_notifier(void);
extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, extern int dmar_parse_dev_scope(void *start, void *end, int *cnt,
struct dmar_dev_scope **devices, u16 segment); struct dmar_dev_scope **devices, u16 segment);
extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt);
......
...@@ -212,6 +212,7 @@ ...@@ -212,6 +212,7 @@
#define DMA_FSTS_IQE (1 << 4) #define DMA_FSTS_IQE (1 << 4)
#define DMA_FSTS_ICE (1 << 5) #define DMA_FSTS_ICE (1 << 5)
#define DMA_FSTS_ITE (1 << 6) #define DMA_FSTS_ITE (1 << 6)
#define DMA_FSTS_PRO (1 << 7)
#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
/* FRCD_REG, 32 bits access */ /* FRCD_REG, 32 bits access */
......
...@@ -70,10 +70,12 @@ struct iova_fq { ...@@ -70,10 +70,12 @@ struct iova_fq {
struct iova_domain { struct iova_domain {
spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */
struct rb_root rbroot; /* iova domain rbtree root */ struct rb_root rbroot; /* iova domain rbtree root */
struct rb_node *cached32_node; /* Save last alloced node */ struct rb_node *cached_node; /* Save last alloced node */
struct rb_node *cached32_node; /* Save last 32-bit alloced node */
unsigned long granule; /* pfn granularity for this domain */ unsigned long granule; /* pfn granularity for this domain */
unsigned long start_pfn; /* Lower limit for this domain */ unsigned long start_pfn; /* Lower limit for this domain */
unsigned long dma_32bit_pfn; unsigned long dma_32bit_pfn;
struct iova anchor; /* rbtree lookup anchor */
struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */
iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU
...@@ -148,12 +150,12 @@ void queue_iova(struct iova_domain *iovad, ...@@ -148,12 +150,12 @@ void queue_iova(struct iova_domain *iovad,
unsigned long pfn, unsigned long pages, unsigned long pfn, unsigned long pages,
unsigned long data); unsigned long data);
unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
unsigned long limit_pfn); unsigned long limit_pfn, bool flush_rcache);
struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
unsigned long pfn_hi); unsigned long pfn_hi);
void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
void init_iova_domain(struct iova_domain *iovad, unsigned long granule, void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
unsigned long start_pfn, unsigned long pfn_32bit); unsigned long start_pfn);
int init_iova_flush_queue(struct iova_domain *iovad, int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
...@@ -210,7 +212,8 @@ static inline void queue_iova(struct iova_domain *iovad, ...@@ -210,7 +212,8 @@ static inline void queue_iova(struct iova_domain *iovad,
static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, static inline unsigned long alloc_iova_fast(struct iova_domain *iovad,
unsigned long size, unsigned long size,
unsigned long limit_pfn) unsigned long limit_pfn,
bool flush_rcache)
{ {
return 0; return 0;
} }
...@@ -229,8 +232,7 @@ static inline void copy_reserved_iova(struct iova_domain *from, ...@@ -229,8 +232,7 @@ static inline void copy_reserved_iova(struct iova_domain *from,
static inline void init_iova_domain(struct iova_domain *iovad, static inline void init_iova_domain(struct iova_domain *iovad,
unsigned long granule, unsigned long granule,
unsigned long start_pfn, unsigned long start_pfn)
unsigned long pfn_32bit)
{ {
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment