Commit f9683931 authored by Joerg Roedel's avatar Joerg Roedel

Merge branch 'for-joerg/arm-smmu/updates' of...

Merge branch 'for-joerg/arm-smmu/updates' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into arm/smmu
parents 74d33293 f5b83190
...@@ -43,6 +43,12 @@ conditions. ...@@ -43,6 +43,12 @@ conditions.
** System MMU optional properties: ** System MMU optional properties:
- dma-coherent : Present if page table walks made by the SMMU are
cache coherent with the CPU.
NOTE: this only applies to the SMMU itself, not
masters connected upstream of the SMMU.
- calxeda,smmu-secure-config-access : Enable proper handling of buggy - calxeda,smmu-secure-config-access : Enable proper handling of buggy
implementations that always use secure access to implementations that always use secure access to
SMMU configuration registers. In this case non-secure SMMU configuration registers. In this case non-secure
......
...@@ -23,7 +23,8 @@ config IOMMU_IO_PGTABLE ...@@ -23,7 +23,8 @@ config IOMMU_IO_PGTABLE
config IOMMU_IO_PGTABLE_LPAE config IOMMU_IO_PGTABLE_LPAE
bool "ARMv7/v8 Long Descriptor Format" bool "ARMv7/v8 Long Descriptor Format"
select IOMMU_IO_PGTABLE select IOMMU_IO_PGTABLE
depends on ARM || ARM64 || COMPILE_TEST # SWIOTLB guarantees a dma_to_phys() implementation
depends on ARM || ARM64 || (COMPILE_TEST && SWIOTLB)
help help
Enable support for the ARM long descriptor pagetable format. Enable support for the ARM long descriptor pagetable format.
This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
......
...@@ -118,6 +118,7 @@ ...@@ -118,6 +118,7 @@
#define ARM_SMMU_IRQ_CTRL 0x50 #define ARM_SMMU_IRQ_CTRL 0x50
#define IRQ_CTRL_EVTQ_IRQEN (1 << 2) #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
#define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
#define IRQ_CTRL_GERROR_IRQEN (1 << 0) #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
#define ARM_SMMU_IRQ_CTRLACK 0x54 #define ARM_SMMU_IRQ_CTRLACK 0x54
...@@ -173,14 +174,14 @@ ...@@ -173,14 +174,14 @@
#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
/* Common MSI config fields */ /* Common MSI config fields */
#define MSI_CFG0_SH_SHIFT 60
#define MSI_CFG0_SH_NSH (0UL << MSI_CFG0_SH_SHIFT)
#define MSI_CFG0_SH_OSH (2UL << MSI_CFG0_SH_SHIFT)
#define MSI_CFG0_SH_ISH (3UL << MSI_CFG0_SH_SHIFT)
#define MSI_CFG0_MEMATTR_SHIFT 56
#define MSI_CFG0_MEMATTR_DEVICE_nGnRE (0x1 << MSI_CFG0_MEMATTR_SHIFT)
#define MSI_CFG0_ADDR_SHIFT 2 #define MSI_CFG0_ADDR_SHIFT 2
#define MSI_CFG0_ADDR_MASK 0x3fffffffffffUL #define MSI_CFG0_ADDR_MASK 0x3fffffffffffUL
#define MSI_CFG2_SH_SHIFT 4
#define MSI_CFG2_SH_NSH (0UL << MSI_CFG2_SH_SHIFT)
#define MSI_CFG2_SH_OSH (2UL << MSI_CFG2_SH_SHIFT)
#define MSI_CFG2_SH_ISH (3UL << MSI_CFG2_SH_SHIFT)
#define MSI_CFG2_MEMATTR_SHIFT 0
#define MSI_CFG2_MEMATTR_DEVICE_nGnRE (0x1 << MSI_CFG2_MEMATTR_SHIFT)
#define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1)) #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
#define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift)) #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
...@@ -1330,33 +1331,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, ...@@ -1330,33 +1331,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
arm_smmu_cmdq_issue_cmd(smmu, &cmd); arm_smmu_cmdq_issue_cmd(smmu, &cmd);
} }
static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
if (smmu->features & ARM_SMMU_FEAT_COHERENCY) {
dsb(ishst);
} else {
dma_addr_t dma_addr;
struct device *dev = smmu->dev;
dma_addr = dma_map_page(dev, virt_to_page(addr), offset, size,
DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma_addr))
dev_err(dev, "failed to flush pgtable at %p\n", addr);
else
dma_unmap_page(dev, dma_addr, size, DMA_TO_DEVICE);
}
}
static struct iommu_gather_ops arm_smmu_gather_ops = { static struct iommu_gather_ops arm_smmu_gather_ops = {
.tlb_flush_all = arm_smmu_tlb_inv_context, .tlb_flush_all = arm_smmu_tlb_inv_context,
.tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
.tlb_sync = arm_smmu_tlb_sync, .tlb_sync = arm_smmu_tlb_sync,
.flush_pgtable = arm_smmu_flush_pgtable,
}; };
/* IOMMU API */ /* IOMMU API */
...@@ -1531,6 +1509,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) ...@@ -1531,6 +1509,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
.ias = ias, .ias = ias,
.oas = oas, .oas = oas,
.tlb = &arm_smmu_gather_ops, .tlb = &arm_smmu_gather_ops,
.iommu_dev = smmu->dev,
}; };
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
...@@ -2053,9 +2032,17 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) ...@@ -2053,9 +2032,17 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
int ret; int ret;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
/* Calculate the L1 size, capped to the SIDSIZE */ /*
size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); * If we can resolve everything with a single L2 table, then we
size = min(size, smmu->sid_bits - STRTAB_SPLIT); * just need a single L1 descriptor. Otherwise, calculate the L1
* size, capped to the SIDSIZE.
*/
if (smmu->sid_bits < STRTAB_SPLIT) {
size = 0;
} else {
size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
size = min(size, smmu->sid_bits - STRTAB_SPLIT);
}
cfg->num_l1_ents = 1 << size; cfg->num_l1_ents = 1 << size;
size += STRTAB_SPLIT; size += STRTAB_SPLIT;
...@@ -2198,6 +2185,7 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, ...@@ -2198,6 +2185,7 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
{ {
int ret, irq; int ret, irq;
u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
/* Disable IRQs first */ /* Disable IRQs first */
ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
...@@ -2252,13 +2240,13 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) ...@@ -2252,13 +2240,13 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
if (IS_ERR_VALUE(ret)) if (IS_ERR_VALUE(ret))
dev_warn(smmu->dev, dev_warn(smmu->dev,
"failed to enable priq irq\n"); "failed to enable priq irq\n");
else
irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
} }
} }
/* Enable interrupt generation on the SMMU */ /* Enable interrupt generation on the SMMU */
ret = arm_smmu_write_reg_sync(smmu, ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
IRQ_CTRL_EVTQ_IRQEN |
IRQ_CTRL_GERROR_IRQEN,
ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
if (ret) if (ret)
dev_warn(smmu->dev, "failed to enable irqs\n"); dev_warn(smmu->dev, "failed to enable irqs\n");
...@@ -2540,12 +2528,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu) ...@@ -2540,12 +2528,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
case IDR5_OAS_44_BIT: case IDR5_OAS_44_BIT:
smmu->oas = 44; smmu->oas = 44;
break; break;
default:
dev_info(smmu->dev,
"unknown output address size. Truncating to 48-bit\n");
/* Fallthrough */
case IDR5_OAS_48_BIT: case IDR5_OAS_48_BIT:
smmu->oas = 48; smmu->oas = 48;
break;
default:
dev_err(smmu->dev, "unknown output address size!\n");
return -ENXIO;
} }
/* Set the DMA mask for our table walker */ /* Set the DMA mask for our table walker */
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <linux/iopoll.h> #include <linux/iopoll.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/of.h> #include <linux/of.h>
#include <linux/of_address.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -607,34 +608,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, ...@@ -607,34 +608,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
} }
} }
static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
/* Ensure new page tables are visible to the hardware walker */
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
dsb(ishst);
} else {
/*
* If the SMMU can't walk tables in the CPU caches, treat them
* like non-coherent DMA since we need to flush the new entries
* all the way out to memory. There's no possibility of
* recursion here as the SMMU table walker will not be wired
* through another SMMU.
*/
dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
DMA_TO_DEVICE);
}
}
static struct iommu_gather_ops arm_smmu_gather_ops = { static struct iommu_gather_ops arm_smmu_gather_ops = {
.tlb_flush_all = arm_smmu_tlb_inv_context, .tlb_flush_all = arm_smmu_tlb_inv_context,
.tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
.tlb_sync = arm_smmu_tlb_sync, .tlb_sync = arm_smmu_tlb_sync,
.flush_pgtable = arm_smmu_flush_pgtable,
}; };
static irqreturn_t arm_smmu_context_fault(int irq, void *dev) static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
...@@ -898,6 +875,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ...@@ -898,6 +875,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
.ias = ias, .ias = ias,
.oas = oas, .oas = oas,
.tlb = &arm_smmu_gather_ops, .tlb = &arm_smmu_gather_ops,
.iommu_dev = smmu->dev,
}; };
smmu_domain->smmu = smmu; smmu_domain->smmu = smmu;
...@@ -1532,6 +1510,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) ...@@ -1532,6 +1510,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
unsigned long size; unsigned long size;
void __iomem *gr0_base = ARM_SMMU_GR0(smmu); void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
u32 id; u32 id;
bool cttw_dt, cttw_reg;
dev_notice(smmu->dev, "probing hardware configuration...\n"); dev_notice(smmu->dev, "probing hardware configuration...\n");
dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version); dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version);
...@@ -1571,10 +1550,22 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) ...@@ -1571,10 +1550,22 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
dev_notice(smmu->dev, "\taddress translation ops\n"); dev_notice(smmu->dev, "\taddress translation ops\n");
} }
if (id & ID0_CTTW) { /*
* In order for DMA API calls to work properly, we must defer to what
* the DT says about coherency, regardless of what the hardware claims.
* Fortunately, this also opens up a workaround for systems where the
* ID register value has ended up configured incorrectly.
*/
cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
cttw_reg = !!(id & ID0_CTTW);
if (cttw_dt)
smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
dev_notice(smmu->dev, "\tcoherent table walk\n"); if (cttw_dt || cttw_reg)
} dev_notice(smmu->dev, "\t%scoherent table walk\n",
cttw_dt ? "" : "non-");
if (cttw_dt != cttw_reg)
dev_notice(smmu->dev,
"\t(IDR0.CTTW overridden by dma-coherent property)\n");
if (id & ID0_SMS) { if (id & ID0_SMS) {
u32 smr, sid, mask; u32 smr, sid, mask;
......
...@@ -26,6 +26,8 @@ ...@@ -26,6 +26,8 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/types.h> #include <linux/types.h>
#include <asm/barrier.h>
#include "io-pgtable.h" #include "io-pgtable.h"
#define ARM_LPAE_MAX_ADDR_BITS 48 #define ARM_LPAE_MAX_ADDR_BITS 48
...@@ -200,12 +202,74 @@ typedef u64 arm_lpae_iopte; ...@@ -200,12 +202,74 @@ typedef u64 arm_lpae_iopte;
static bool selftest_running = false; static bool selftest_running = false;
static dma_addr_t __arm_lpae_dma_addr(struct device *dev, void *pages)
{
return phys_to_dma(dev, virt_to_phys(pages));
}
static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
struct io_pgtable_cfg *cfg)
{
struct device *dev = cfg->iommu_dev;
dma_addr_t dma;
void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO);
if (!pages)
return NULL;
if (!selftest_running) {
dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
goto out_free;
/*
* We depend on the IOMMU being able to work with any physical
* address directly, so if the DMA layer suggests it can't by
* giving us back some translation, that bodes very badly...
*/
if (dma != __arm_lpae_dma_addr(dev, pages))
goto out_unmap;
}
return pages;
out_unmap:
dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
out_free:
free_pages_exact(pages, size);
return NULL;
}
static void __arm_lpae_free_pages(void *pages, size_t size,
struct io_pgtable_cfg *cfg)
{
struct device *dev = cfg->iommu_dev;
if (!selftest_running)
dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages),
size, DMA_TO_DEVICE);
free_pages_exact(pages, size);
}
static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
struct io_pgtable_cfg *cfg)
{
struct device *dev = cfg->iommu_dev;
*ptep = pte;
if (!selftest_running)
dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep),
sizeof(pte), DMA_TO_DEVICE);
}
static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
unsigned long iova, phys_addr_t paddr, unsigned long iova, phys_addr_t paddr,
arm_lpae_iopte prot, int lvl, arm_lpae_iopte prot, int lvl,
arm_lpae_iopte *ptep) arm_lpae_iopte *ptep)
{ {
arm_lpae_iopte pte = prot; arm_lpae_iopte pte = prot;
struct io_pgtable_cfg *cfg = &data->iop.cfg;
/* We require an unmap first */ /* We require an unmap first */
if (iopte_leaf(*ptep, lvl)) { if (iopte_leaf(*ptep, lvl)) {
...@@ -213,7 +277,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, ...@@ -213,7 +277,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
return -EEXIST; return -EEXIST;
} }
if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
pte |= ARM_LPAE_PTE_NS; pte |= ARM_LPAE_PTE_NS;
if (lvl == ARM_LPAE_MAX_LEVELS - 1) if (lvl == ARM_LPAE_MAX_LEVELS - 1)
...@@ -224,8 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, ...@@ -224,8 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
pte |= pfn_to_iopte(paddr >> data->pg_shift, data); pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
*ptep = pte; __arm_lpae_set_pte(ptep, pte, cfg);
data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie);
return 0; return 0;
} }
...@@ -234,14 +297,14 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, ...@@ -234,14 +297,14 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
int lvl, arm_lpae_iopte *ptep) int lvl, arm_lpae_iopte *ptep)
{ {
arm_lpae_iopte *cptep, pte; arm_lpae_iopte *cptep, pte;
void *cookie = data->iop.cookie;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
/* Find our entry at the current level */ /* Find our entry at the current level */
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
/* If we can install a leaf entry at this level, then do so */ /* If we can install a leaf entry at this level, then do so */
if (size == block_size && (size & data->iop.cfg.pgsize_bitmap)) if (size == block_size && (size & cfg->pgsize_bitmap))
return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
/* We can't allocate tables at the final level */ /* We can't allocate tables at the final level */
...@@ -251,18 +314,15 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, ...@@ -251,18 +314,15 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
/* Grab a pointer to the next level */ /* Grab a pointer to the next level */
pte = *ptep; pte = *ptep;
if (!pte) { if (!pte) {
cptep = alloc_pages_exact(1UL << data->pg_shift, cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift,
GFP_ATOMIC | __GFP_ZERO); GFP_ATOMIC, cfg);
if (!cptep) if (!cptep)
return -ENOMEM; return -ENOMEM;
data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift,
cookie);
pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
pte |= ARM_LPAE_PTE_NSTABLE; pte |= ARM_LPAE_PTE_NSTABLE;
*ptep = pte; __arm_lpae_set_pte(ptep, pte, cfg);
data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
} else { } else {
cptep = iopte_deref(pte, data); cptep = iopte_deref(pte, data);
} }
...@@ -309,7 +369,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -309,7 +369,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
{ {
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte *ptep = data->pgd; arm_lpae_iopte *ptep = data->pgd;
int lvl = ARM_LPAE_START_LVL(data); int ret, lvl = ARM_LPAE_START_LVL(data);
arm_lpae_iopte prot; arm_lpae_iopte prot;
/* If no access, then nothing to do */ /* If no access, then nothing to do */
...@@ -317,7 +377,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -317,7 +377,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
return 0; return 0;
prot = arm_lpae_prot_to_pte(data, iommu_prot); prot = arm_lpae_prot_to_pte(data, iommu_prot);
return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep);
/*
* Synchronise all PTE updates for the new mapping before there's
* a chance for anything to kick off a table walk for the new iova.
*/
wmb();
return ret;
} }
static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
...@@ -347,7 +414,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, ...@@ -347,7 +414,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
} }
free_pages_exact(start, table_size); __arm_lpae_free_pages(start, table_size, &data->iop.cfg);
} }
static void arm_lpae_free_pgtable(struct io_pgtable *iop) static void arm_lpae_free_pgtable(struct io_pgtable *iop)
...@@ -366,8 +433,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, ...@@ -366,8 +433,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
unsigned long blk_start, blk_end; unsigned long blk_start, blk_end;
phys_addr_t blk_paddr; phys_addr_t blk_paddr;
arm_lpae_iopte table = 0; arm_lpae_iopte table = 0;
void *cookie = data->iop.cookie; struct io_pgtable_cfg *cfg = &data->iop.cfg;
const struct iommu_gather_ops *tlb = data->iop.cfg.tlb;
blk_start = iova & ~(blk_size - 1); blk_start = iova & ~(blk_size - 1);
blk_end = blk_start + blk_size; blk_end = blk_start + blk_size;
...@@ -393,10 +459,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, ...@@ -393,10 +459,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
} }
} }
*ptep = table; __arm_lpae_set_pte(ptep, table, cfg);
tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
iova &= ~(blk_size - 1); iova &= ~(blk_size - 1);
tlb->tlb_add_flush(iova, blk_size, true, cookie); cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie);
return size; return size;
} }
...@@ -418,13 +483,12 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, ...@@ -418,13 +483,12 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
/* If the size matches this level, we're in the right place */ /* If the size matches this level, we're in the right place */
if (size == blk_size) { if (size == blk_size) {
*ptep = 0; __arm_lpae_set_pte(ptep, 0, &data->iop.cfg);
tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
if (!iopte_leaf(pte, lvl)) { if (!iopte_leaf(pte, lvl)) {
/* Also flush any partial walks */ /* Also flush any partial walks */
tlb->tlb_add_flush(iova, size, false, cookie); tlb->tlb_add_flush(iova, size, false, cookie);
tlb->tlb_sync(data->iop.cookie); tlb->tlb_sync(cookie);
ptep = iopte_deref(pte, data); ptep = iopte_deref(pte, data);
__arm_lpae_free_pgtable(data, lvl + 1, ptep); __arm_lpae_free_pgtable(data, lvl + 1, ptep);
} else { } else {
...@@ -640,11 +704,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) ...@@ -640,11 +704,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
cfg->arm_lpae_s1_cfg.mair[1] = 0; cfg->arm_lpae_s1_cfg.mair[1] = 0;
/* Looking good; allocate a pgd */ /* Looking good; allocate a pgd */
data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
if (!data->pgd) if (!data->pgd)
goto out_free_data; goto out_free_data;
cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); /* Ensure the empty pgd is visible before any actual TTBR write */
wmb();
/* TTBRs */ /* TTBRs */
cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
...@@ -728,11 +793,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) ...@@ -728,11 +793,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
cfg->arm_lpae_s2_cfg.vtcr = reg; cfg->arm_lpae_s2_cfg.vtcr = reg;
/* Allocate pgd pages */ /* Allocate pgd pages */
data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
if (!data->pgd) if (!data->pgd)
goto out_free_data; goto out_free_data;
cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); /* Ensure the empty pgd is visible before any actual TTBR write */
wmb();
/* VTTBR */ /* VTTBR */
cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd); cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
...@@ -818,16 +884,10 @@ static void dummy_tlb_sync(void *cookie) ...@@ -818,16 +884,10 @@ static void dummy_tlb_sync(void *cookie)
WARN_ON(cookie != cfg_cookie); WARN_ON(cookie != cfg_cookie);
} }
static void dummy_flush_pgtable(void *ptr, size_t size, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
static struct iommu_gather_ops dummy_tlb_ops __initdata = { static struct iommu_gather_ops dummy_tlb_ops __initdata = {
.tlb_flush_all = dummy_tlb_flush_all, .tlb_flush_all = dummy_tlb_flush_all,
.tlb_add_flush = dummy_tlb_add_flush, .tlb_add_flush = dummy_tlb_add_flush,
.tlb_sync = dummy_tlb_sync, .tlb_sync = dummy_tlb_sync,
.flush_pgtable = dummy_flush_pgtable,
}; };
static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
......
...@@ -17,8 +17,9 @@ enum io_pgtable_fmt { ...@@ -17,8 +17,9 @@ enum io_pgtable_fmt {
* *
* @tlb_flush_all: Synchronously invalidate the entire TLB context. * @tlb_flush_all: Synchronously invalidate the entire TLB context.
* @tlb_add_flush: Queue up a TLB invalidation for a virtual address range. * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
* @tlb_sync: Ensure any queue TLB invalidation has taken effect. * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and
* @flush_pgtable: Ensure page table updates are visible to the IOMMU. * any corresponding page table updates are visible to the
* IOMMU.
* *
* Note that these can all be called in atomic context and must therefore * Note that these can all be called in atomic context and must therefore
* not block. * not block.
...@@ -28,7 +29,6 @@ struct iommu_gather_ops { ...@@ -28,7 +29,6 @@ struct iommu_gather_ops {
void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf, void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf,
void *cookie); void *cookie);
void (*tlb_sync)(void *cookie); void (*tlb_sync)(void *cookie);
void (*flush_pgtable)(void *ptr, size_t size, void *cookie);
}; };
/** /**
...@@ -41,6 +41,8 @@ struct iommu_gather_ops { ...@@ -41,6 +41,8 @@ struct iommu_gather_ops {
* @ias: Input address (iova) size, in bits. * @ias: Input address (iova) size, in bits.
* @oas: Output address (paddr) size, in bits. * @oas: Output address (paddr) size, in bits.
* @tlb: TLB management callbacks for this set of tables. * @tlb: TLB management callbacks for this set of tables.
* @iommu_dev: The device representing the DMA configuration for the
* page table walker.
*/ */
struct io_pgtable_cfg { struct io_pgtable_cfg {
#define IO_PGTABLE_QUIRK_ARM_NS (1 << 0) /* Set NS bit in PTEs */ #define IO_PGTABLE_QUIRK_ARM_NS (1 << 0) /* Set NS bit in PTEs */
...@@ -49,6 +51,7 @@ struct io_pgtable_cfg { ...@@ -49,6 +51,7 @@ struct io_pgtable_cfg {
unsigned int ias; unsigned int ias;
unsigned int oas; unsigned int oas;
const struct iommu_gather_ops *tlb; const struct iommu_gather_ops *tlb;
struct device *iommu_dev;
/* Low-level data specific to the table format */ /* Low-level data specific to the table format */
union { union {
......
...@@ -283,24 +283,10 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf, ...@@ -283,24 +283,10 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf,
/* The hardware doesn't support selective TLB flush. */ /* The hardware doesn't support selective TLB flush. */
} }
static void ipmmu_flush_pgtable(void *ptr, size_t size, void *cookie)
{
unsigned long offset = (unsigned long)ptr & ~PAGE_MASK;
struct ipmmu_vmsa_domain *domain = cookie;
/*
* TODO: Add support for coherent walk through CCI with DVM and remove
* cache handling.
*/
dma_map_page(domain->mmu->dev, virt_to_page(ptr), offset, size,
DMA_TO_DEVICE);
}
static struct iommu_gather_ops ipmmu_gather_ops = { static struct iommu_gather_ops ipmmu_gather_ops = {
.tlb_flush_all = ipmmu_tlb_flush_all, .tlb_flush_all = ipmmu_tlb_flush_all,
.tlb_add_flush = ipmmu_tlb_add_flush, .tlb_add_flush = ipmmu_tlb_add_flush,
.tlb_sync = ipmmu_tlb_flush_all, .tlb_sync = ipmmu_tlb_flush_all,
.flush_pgtable = ipmmu_flush_pgtable,
}; };
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
...@@ -327,6 +313,11 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) ...@@ -327,6 +313,11 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
domain->cfg.ias = 32; domain->cfg.ias = 32;
domain->cfg.oas = 40; domain->cfg.oas = 40;
domain->cfg.tlb = &ipmmu_gather_ops; domain->cfg.tlb = &ipmmu_gather_ops;
/*
* TODO: Add support for coherent walk through CCI with DVM and remove
* cache handling. For now, delegate it to the io-pgtable code.
*/
domain->cfg.iommu_dev = domain->mmu->dev;
domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg,
domain); domain);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment