Commit 1554240f authored by Will Deacon's avatar Will Deacon

Merge branches 'for-joerg/arm-smmu/smmu-v2' and 'for-joerg/arm-smmu/smmu-v3'...

Merge branches 'for-joerg/arm-smmu/smmu-v2' and 'for-joerg/arm-smmu/smmu-v3' into for-joerg/arm-smmu/updates

* for-joerg/arm-smmu/smmu-v2:
  Refactoring to allow for implementation-specific hooks in 'arm-smmu-impl.c'

* for-joerg/arm-smmu/smmu-v3:
  Support for deferred TLB invalidation and batching of commands
  Rework ATC invalidation for ATS-enabled PCIe masters
parents d720e641 a91bcc2b
......@@ -222,7 +222,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
size_t unmapped_page;
size_t pgsize = get_pgsize(iova, len - unmapped_len);
unmapped_page = ops->unmap(ops, iova, pgsize);
unmapped_page = ops->unmap(ops, iova, pgsize, NULL);
if (!unmapped_page)
break;
......@@ -247,20 +247,28 @@ static void mmu_tlb_inv_context_s1(void *cookie)
mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
}
static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
{}
static void mmu_tlb_sync_context(void *cookie)
{
//struct panfrost_device *pfdev = cookie;
// TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X
}
static const struct iommu_gather_ops mmu_tlb_ops = {
static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule,
void *cookie)
{
mmu_tlb_sync_context(cookie);
}
static void mmu_tlb_flush_leaf(unsigned long iova, size_t size, size_t granule,
void *cookie)
{
mmu_tlb_sync_context(cookie);
}
static const struct iommu_flush_ops mmu_tlb_ops = {
.tlb_flush_all = mmu_tlb_inv_context_s1,
.tlb_add_flush = mmu_tlb_inv_range_nosync,
.tlb_sync = mmu_tlb_sync_context,
.tlb_flush_walk = mmu_tlb_flush_walk,
.tlb_flush_leaf = mmu_tlb_flush_leaf,
};
static const char *access_type_name(struct panfrost_device *pfdev,
......
......@@ -3055,7 +3055,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
}
static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
size_t page_size)
size_t page_size,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
size_t unmap_size;
......@@ -3196,9 +3197,10 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
domain_flush_complete(dom);
}
static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
unsigned long iova, size_t size)
static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
amd_iommu_flush_iotlb_all(domain);
}
const struct iommu_ops amd_iommu_ops = {
......@@ -3219,8 +3221,7 @@ const struct iommu_ops amd_iommu_ops = {
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
.iotlb_range_add = amd_iommu_iotlb_range_add,
.iotlb_sync = amd_iommu_flush_iotlb_all,
.iotlb_sync = amd_iommu_iotlb_sync,
};
/*****************************************************************************
......
......@@ -181,12 +181,13 @@
#define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
#define ARM_SMMU_MEMATTR_OIWB 0xf
#define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
#define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
#define Q_OVERFLOW_FLAG (1 << 31)
#define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
#define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
#define Q_OVERFLOW_FLAG (1U << 31)
#define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG)
#define Q_ENT(q, p) ((q)->base + \
Q_IDX(q, p) * (q)->ent_dwords)
Q_IDX(&((q)->llq), p) * \
(q)->ent_dwords)
#define Q_BASE_RWA (1UL << 62)
#define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
......@@ -306,6 +307,15 @@
#define CMDQ_ERR_CERROR_ABT_IDX 2
#define CMDQ_ERR_CERROR_ATC_INV_IDX 3
#define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
/*
* This is used to size the command queue and therefore must be at least
* BITS_PER_LONG so that the valid_map works correctly (it relies on the
* total number of queue entries being a multiple of BITS_PER_LONG).
*/
#define CMDQ_BATCH_ENTRIES BITS_PER_LONG
#define CMDQ_0_OP GENMASK_ULL(7, 0)
#define CMDQ_0_SSV (1UL << 11)
......@@ -368,9 +378,8 @@
#define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
/* High-level queue structures */
#define ARM_SMMU_POLL_TIMEOUT_US 100
#define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
#define ARM_SMMU_POLL_TIMEOUT_US 1000000 /* 1s! */
#define ARM_SMMU_POLL_SPIN_COUNT 10
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
......@@ -472,13 +481,29 @@ struct arm_smmu_cmdq_ent {
#define CMDQ_OP_CMD_SYNC 0x46
struct {
u32 msidata;
u64 msiaddr;
} sync;
};
};
struct arm_smmu_ll_queue {
union {
u64 val;
struct {
u32 prod;
u32 cons;
};
struct {
atomic_t prod;
atomic_t cons;
} atomic;
u8 __pad[SMP_CACHE_BYTES];
} ____cacheline_aligned_in_smp;
u32 max_n_shift;
};
struct arm_smmu_queue {
struct arm_smmu_ll_queue llq;
int irq; /* Wired interrupt */
__le64 *base;
......@@ -486,17 +511,23 @@ struct arm_smmu_queue {
u64 q_base;
size_t ent_dwords;
u32 max_n_shift;
u32 prod;
u32 cons;
u32 __iomem *prod_reg;
u32 __iomem *cons_reg;
};
struct arm_smmu_queue_poll {
ktime_t timeout;
unsigned int delay;
unsigned int spin_cnt;
bool wfe;
};
struct arm_smmu_cmdq {
struct arm_smmu_queue q;
spinlock_t lock;
atomic_long_t *valid_map;
atomic_t owner_prod;
atomic_t lock;
};
struct arm_smmu_evtq {
......@@ -576,8 +607,6 @@ struct arm_smmu_device {
int gerr_irq;
int combined_irq;
u32 sync_nr;
u8 prev_cmd_opcode;
unsigned long ias; /* IPA */
unsigned long oas; /* PA */
......@@ -596,12 +625,6 @@ struct arm_smmu_device {
struct arm_smmu_strtab_cfg strtab_cfg;
/* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
union {
u32 sync_count;
u64 padding;
};
/* IOMMU core code handle */
struct iommu_device iommu;
};
......@@ -614,7 +637,7 @@ struct arm_smmu_master {
struct list_head domain_head;
u32 *sids;
unsigned int num_sids;
bool ats_enabled :1;
bool ats_enabled;
};
/* SMMU private data for an IOMMU domain */
......@@ -631,6 +654,7 @@ struct arm_smmu_domain {
struct io_pgtable_ops *pgtbl_ops;
bool non_strict;
atomic_t nr_ats_masters;
enum arm_smmu_domain_stage stage;
union {
......@@ -685,85 +709,97 @@ static void parse_driver_options(struct arm_smmu_device *smmu)
}
/* Low-level queue manipulation functions */
static bool queue_full(struct arm_smmu_queue *q)
static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
{
u32 space, prod, cons;
prod = Q_IDX(q, q->prod);
cons = Q_IDX(q, q->cons);
if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
space = (1 << q->max_n_shift) - (prod - cons);
else
space = cons - prod;
return space >= n;
}
static bool queue_full(struct arm_smmu_ll_queue *q)
{
return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
}
static bool queue_empty(struct arm_smmu_queue *q)
static bool queue_empty(struct arm_smmu_ll_queue *q)
{
return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
}
static void queue_sync_cons(struct arm_smmu_queue *q)
static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
{
q->cons = readl_relaxed(q->cons_reg);
return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
}
static void queue_inc_cons(struct arm_smmu_queue *q)
static void queue_sync_cons_out(struct arm_smmu_queue *q)
{
u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
/*
* Ensure that all CPU accesses (reads and writes) to the queue
* are complete before we update the cons pointer.
*/
mb();
writel_relaxed(q->cons, q->cons_reg);
writel_relaxed(q->llq.cons, q->cons_reg);
}
static void queue_inc_cons(struct arm_smmu_ll_queue *q)
{
u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
}
static int queue_sync_prod(struct arm_smmu_queue *q)
static int queue_sync_prod_in(struct arm_smmu_queue *q)
{
int ret = 0;
u32 prod = readl_relaxed(q->prod_reg);
if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
if (Q_OVF(prod) != Q_OVF(q->llq.prod))
ret = -EOVERFLOW;
q->prod = prod;
q->llq.prod = prod;
return ret;
}
static void queue_inc_prod(struct arm_smmu_queue *q)
static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
{
u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
writel(q->prod, q->prod_reg);
u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
}
/*
* Wait for the SMMU to consume items. If sync is true, wait until the queue
* is empty. Otherwise, wait until there is at least one free slot.
*/
static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
static void queue_poll_init(struct arm_smmu_device *smmu,
struct arm_smmu_queue_poll *qp)
{
ktime_t timeout;
unsigned int delay = 1, spin_cnt = 0;
/* Wait longer if it's a CMD_SYNC */
timeout = ktime_add_us(ktime_get(), sync ?
ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
ARM_SMMU_POLL_TIMEOUT_US);
qp->delay = 1;
qp->spin_cnt = 0;
qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
}
while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
if (ktime_compare(ktime_get(), timeout) > 0)
return -ETIMEDOUT;
static int queue_poll(struct arm_smmu_queue_poll *qp)
{
if (ktime_compare(ktime_get(), qp->timeout) > 0)
return -ETIMEDOUT;
if (wfe) {
wfe();
} else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
cpu_relax();
continue;
} else {
udelay(delay);
delay *= 2;
spin_cnt = 0;
}
if (qp->wfe) {
wfe();
} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
cpu_relax();
} else {
udelay(qp->delay);
qp->delay *= 2;
qp->spin_cnt = 0;
}
return 0;
......@@ -777,16 +813,6 @@ static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
*dst++ = cpu_to_le64(*src++);
}
static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
{
if (queue_full(q))
return -ENOSPC;
queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
queue_inc_prod(q);
return 0;
}
static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
{
int i;
......@@ -797,11 +823,12 @@ static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
{
if (queue_empty(q))
if (queue_empty(&q->llq))
return -EAGAIN;
queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
queue_inc_cons(q);
queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
queue_inc_cons(&q->llq);
queue_sync_cons_out(q);
return 0;
}
......@@ -868,20 +895,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
break;
case CMDQ_OP_CMD_SYNC:
if (ent->sync.msiaddr)
if (ent->sync.msiaddr) {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
else
cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
} else {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
}
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
/*
* Commands are written little-endian, but we want the SMMU to
* receive MSIData, and thus write it back to memory, in CPU
* byte order, so big-endian needs an extra byteswap here.
*/
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
cpu_to_le32(ent->sync.msidata));
cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
break;
default:
return -ENOENT;
......@@ -890,6 +911,27 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
return 0;
}
static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
u32 prod)
{
struct arm_smmu_queue *q = &smmu->cmdq.q;
struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_CMD_SYNC,
};
/*
* Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
* payload, so the write will zero the entire command on that platform.
*/
if (smmu->features & ARM_SMMU_FEAT_MSI &&
smmu->features & ARM_SMMU_FEAT_COHERENCY) {
ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
q->ent_dwords * 8;
}
arm_smmu_cmdq_build_cmd(cmd, &ent);
}
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
{
static const char *cerror_str[] = {
......@@ -948,109 +990,456 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
}
static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
/*
* Command queue locking.
* This is a form of bastardised rwlock with the following major changes:
*
* - The only LOCK routines are exclusive_trylock() and shared_lock().
* Neither have barrier semantics, and instead provide only a control
* dependency.
*
* - The UNLOCK routines are supplemented with shared_tryunlock(), which
* fails if the caller appears to be the last lock holder (yes, this is
* racy). All successful UNLOCK routines have RELEASE semantics.
*/
static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
{
struct arm_smmu_queue *q = &smmu->cmdq.q;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
int val;
/*
* We can try to avoid the cmpxchg() loop by simply incrementing the
* lock counter. When held in exclusive state, the lock counter is set
* to INT_MIN so these increments won't hurt as the value will remain
* negative.
*/
if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
return;
do {
val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
}
static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
{
(void)atomic_dec_return_release(&cmdq->lock);
}
static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
{
if (atomic_read(&cmdq->lock) == 1)
return false;
arm_smmu_cmdq_shared_unlock(cmdq);
return true;
}
#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
({ \
bool __ret; \
local_irq_save(flags); \
__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
if (!__ret) \
local_irq_restore(flags); \
__ret; \
})
#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
({ \
atomic_set_release(&cmdq->lock, 0); \
local_irq_restore(flags); \
})
/*
* Command queue insertion.
* This is made fiddly by our attempts to achieve some sort of scalability
* since there is one queue shared amongst all of the CPUs in the system. If
* you like mixed-size concurrency, dependency ordering and relaxed atomics,
* then you'll *love* this monstrosity.
*
* The basic idea is to split the queue up into ranges of commands that are
* owned by a given CPU; the owner may not have written all of the commands
* itself, but is responsible for advancing the hardware prod pointer when
* the time comes. The algorithm is roughly:
*
* 1. Allocate some space in the queue. At this point we also discover
* whether the head of the queue is currently owned by another CPU,
* or whether we are the owner.
*
* 2. Write our commands into our allocated slots in the queue.
*
* 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
*
* 4. If we are an owner:
* a. Wait for the previous owner to finish.
* b. Mark the queue head as unowned, which tells us the range
* that we are responsible for publishing.
* c. Wait for all commands in our owned range to become valid.
* d. Advance the hardware prod pointer.
* e. Tell the next owner we've finished.
*
* 5. If we are inserting a CMD_SYNC (we may or may not have been an
* owner), then we need to stick around until it has completed:
* a. If we have MSIs, the SMMU can write back into the CMD_SYNC
* to clear the first 4 bytes.
* b. Otherwise, we spin waiting for the hardware cons pointer to
* advance past our command.
*
* The devil is in the details, particularly the use of locking for handling
* SYNC completion and freeing up space in the queue before we think that it is
* full.
*/
static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod, bool set)
{
u32 swidx, sbidx, ewidx, ebidx;
struct arm_smmu_ll_queue llq = {
.max_n_shift = cmdq->q.llq.max_n_shift,
.prod = sprod,
};
ewidx = BIT_WORD(Q_IDX(&llq, eprod));
ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
while (llq.prod != eprod) {
unsigned long mask;
atomic_long_t *ptr;
u32 limit = BITS_PER_LONG;
while (queue_insert_raw(q, cmd) == -ENOSPC) {
if (queue_poll_cons(q, false, wfe))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
ptr = &cmdq->valid_map[swidx];
if ((swidx == ewidx) && (sbidx < ebidx))
limit = ebidx;
mask = GENMASK(limit - 1, sbidx);
/*
* The valid bit is the inverse of the wrap bit. This means
* that a zero-initialised queue is invalid and, after marking
* all entries as valid, they become invalid again when we
* wrap.
*/
if (set) {
atomic_long_xor(mask, ptr);
} else { /* Poll */
unsigned long valid;
valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
}
llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
}
}
static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent)
/* Mark all entries in the range [sprod, eprod) as valid */
static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod)
{
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
}
/* Wait for all entries in the range [sprod, eprod) to become valid */
static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod)
{
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
}
/* Wait for the command queue to become non-full */
static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
struct arm_smmu_ll_queue *llq)
{
u64 cmd[CMDQ_ENT_DWORDS];
unsigned long flags;
struct arm_smmu_queue_poll qp;
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
int ret = 0;
if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
ent->opcode);
return;
/*
* Try to update our copy of cons by grabbing exclusive cmdq access. If
* that fails, spin until somebody else updates it for us.
*/
if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
llq->val = READ_ONCE(cmdq->q.llq.val);
return 0;
}
spin_lock_irqsave(&smmu->cmdq.lock, flags);
arm_smmu_cmdq_insert_cmd(smmu, cmd);
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
queue_poll_init(smmu, &qp);
do {
llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
if (!queue_full(llq))
break;
ret = queue_poll(&qp);
} while (!ret);
return ret;
}
/*
* The difference between val and sync_idx is bounded by the maximum size of
* a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
* Wait until the SMMU signals a CMD_SYNC completion MSI.
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
struct arm_smmu_ll_queue *llq)
{
ktime_t timeout;
u32 val;
int ret = 0;
struct arm_smmu_queue_poll qp;
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
val = smp_cond_load_acquire(&smmu->sync_count,
(int)(VAL - sync_idx) >= 0 ||
!ktime_before(ktime_get(), timeout));
queue_poll_init(smmu, &qp);
return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
/*
* The MSI won't generate an event, since it's being written back
* into the command queue.
*/
qp.wfe = false;
smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
return ret;
}
static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
/*
* Wait until the SMMU cons index passes llq->prod.
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
struct arm_smmu_ll_queue *llq)
{
u64 cmd[CMDQ_ENT_DWORDS];
unsigned long flags;
struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_CMD_SYNC,
.sync = {
.msiaddr = virt_to_phys(&smmu->sync_count),
},
};
struct arm_smmu_queue_poll qp;
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
u32 prod = llq->prod;
int ret = 0;
spin_lock_irqsave(&smmu->cmdq.lock, flags);
queue_poll_init(smmu, &qp);
llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
do {
if (queue_consumed(llq, prod))
break;
/* Piggy-back on the previous command if it's a SYNC */
if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
ent.sync.msidata = smmu->sync_nr;
} else {
ent.sync.msidata = ++smmu->sync_nr;
arm_smmu_cmdq_build_cmd(cmd, &ent);
arm_smmu_cmdq_insert_cmd(smmu, cmd);
}
ret = queue_poll(&qp);
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
/*
* This needs to be a readl() so that our subsequent call
* to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
*
* Specifically, we need to ensure that we observe all
* shared_lock()s by other CMD_SYNCs that share our owner,
* so that a failing call to tryunlock() means that we're
* the last one out and therefore we can safely advance
* cmdq->q.llq.cons. Roughly speaking:
*
* CPU 0 CPU1 CPU2 (us)
*
* if (sync)
* shared_lock();
*
* dma_wmb();
* set_valid_map();
*
* if (owner) {
* poll_valid_map();
* <control dependency>
* writel(prod_reg);
*
* readl(cons_reg);
* tryunlock();
*
* Requires us to see CPU 0's shared_lock() acquisition.
*/
llq->cons = readl(cmdq->q.cons_reg);
} while (!ret);
return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
return ret;
}
static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
struct arm_smmu_ll_queue *llq)
{
u64 cmd[CMDQ_ENT_DWORDS];
if (smmu->features & ARM_SMMU_FEAT_MSI &&
smmu->features & ARM_SMMU_FEAT_COHERENCY)
return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
}
static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
u32 prod, int n)
{
int i;
struct arm_smmu_ll_queue llq = {
.max_n_shift = cmdq->q.llq.max_n_shift,
.prod = prod,
};
for (i = 0; i < n; ++i) {
u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
prod = queue_inc_prod_n(&llq, i);
queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
}
}
/*
* This is the actual insertion function, and provides the following
* ordering guarantees to callers:
*
* - There is a dma_wmb() before publishing any commands to the queue.
* This can be relied upon to order prior writes to data structures
* in memory (such as a CD or an STE) before the command.
*
* - On completion of a CMD_SYNC, there is a control dependency.
* This can be relied upon to order subsequent writes to memory (e.g.
* freeing an IOVA) after completion of the CMD_SYNC.
*
* - Command insertion is totally ordered, so if two CPUs each race to
* insert their own list of commands then all of the commands from one
* CPU will appear before any of the commands from the other CPU.
*/
static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
u64 *cmds, int n, bool sync)
{
u64 cmd_sync[CMDQ_ENT_DWORDS];
u32 prod;
unsigned long flags;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
int ret;
bool owner;
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
struct arm_smmu_ll_queue llq = {
.max_n_shift = cmdq->q.llq.max_n_shift,
}, head = llq;
int ret = 0;
arm_smmu_cmdq_build_cmd(cmd, &ent);
/* 1. Allocate some space in the queue */
local_irq_save(flags);
llq.val = READ_ONCE(cmdq->q.llq.val);
do {
u64 old;
while (!queue_has_space(&llq, n + sync)) {
local_irq_restore(flags);
if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
local_irq_save(flags);
}
head.cons = llq.cons;
head.prod = queue_inc_prod_n(&llq, n + sync) |
CMDQ_PROD_OWNED_FLAG;
old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
if (old == llq.val)
break;
llq.val = old;
} while (1);
owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
head.prod &= ~CMDQ_PROD_OWNED_FLAG;
llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
/*
* 2. Write our commands into the queue
* Dependency ordering from the cmpxchg() loop above.
*/
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
if (sync) {
prod = queue_inc_prod_n(&llq, n);
arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
/*
* In order to determine completion of our CMD_SYNC, we must
* ensure that the queue can't wrap twice without us noticing.
* We achieve that by taking the cmdq lock as shared before
* marking our slot as valid.
*/
arm_smmu_cmdq_shared_lock(cmdq);
}
/* 3. Mark our slots as valid, ensuring commands are visible first */
dma_wmb();
arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
/* 4. If we are the owner, take control of the SMMU hardware */
if (owner) {
/* a. Wait for previous owner to finish */
atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
/* b. Stop gathering work by clearing the owned flag */
prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
&cmdq->q.llq.atomic.prod);
prod &= ~CMDQ_PROD_OWNED_FLAG;
/*
* c. Wait for any gathered work to be written to the queue.
* Note that we read our own entries so that we have the control
* dependency required by (d).
*/
arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
/*
* d. Advance the hardware prod pointer
* Control dependency ordering from the entries becoming valid.
*/
writel_relaxed(prod, cmdq->q.prod_reg);
/*
* e. Tell the next owner we're done
* Make sure we've updated the hardware first, so that we don't
* race to update prod and potentially move it backwards.
*/
atomic_set_release(&cmdq->owner_prod, prod);
}
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
if (sync) {
llq.prod = queue_inc_prod_n(&llq, n);
ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
if (ret) {
dev_err_ratelimited(smmu->dev,
"CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
llq.prod,
readl_relaxed(cmdq->q.prod_reg),
readl_relaxed(cmdq->q.cons_reg));
}
spin_lock_irqsave(&smmu->cmdq.lock, flags);
arm_smmu_cmdq_insert_cmd(smmu, cmd);
ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
/*
* Try to unlock the cmq lock. This will fail if we're the last
* reader, in which case we can safely update cmdq->q.llq.cons
*/
if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
arm_smmu_cmdq_shared_unlock(cmdq);
}
}
local_irq_restore(flags);
return ret;
}
static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent)
{
int ret;
bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
(smmu->features & ARM_SMMU_FEAT_COHERENCY);
u64 cmd[CMDQ_ENT_DWORDS];
ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
: __arm_smmu_cmdq_issue_sync(smmu);
if (ret)
dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
return ret;
if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
ent->opcode);
return -EINVAL;
}
return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
}
static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
{
return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
}
/* Context descriptor manipulation functions */
......@@ -1305,6 +1694,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
int i;
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->evtq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
u64 evt[EVTQ_ENT_DWORDS];
do {
......@@ -1322,12 +1712,13 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
* Not much we can do on overflow, so scream and pretend we're
* trying harder.
*/
if (queue_sync_prod(q) == -EOVERFLOW)
if (queue_sync_prod_in(q) == -EOVERFLOW)
dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
} while (!queue_empty(q));
} while (!queue_empty(llq));
/* Sync our overflow flag, as we believe we're up to speed */
q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
Q_IDX(llq, llq->cons);
return IRQ_HANDLED;
}
......@@ -1373,19 +1764,21 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
{
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->priq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
u64 evt[PRIQ_ENT_DWORDS];
do {
while (!queue_remove_raw(q, evt))
arm_smmu_handle_ppr(smmu, evt);
if (queue_sync_prod(q) == -EOVERFLOW)
if (queue_sync_prod_in(q) == -EOVERFLOW)
dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
} while (!queue_empty(q));
} while (!queue_empty(llq));
/* Sync our overflow flag, as we believe we're up to speed */
q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
writel(q->cons, q->cons_reg);
llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
Q_IDX(llq, llq->cons);
queue_sync_cons_out(q);
return IRQ_HANDLED;
}
......@@ -1534,6 +1927,23 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
return 0;
/*
* Ensure that we've completed prior invalidation of the main TLBs
* before we read 'nr_ats_masters' in case of a concurrent call to
* arm_smmu_enable_ats():
*
* // unmap() // arm_smmu_enable_ats()
* TLBI+SYNC atomic_inc(&nr_ats_masters);
* smp_mb(); [...]
* atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
*
* Ensures that we always see the incremented 'nr_ats_masters' count if
* ATS was enabled at the PCI device before completion of the TLBI.
*/
smp_mb();
if (!atomic_read(&smmu_domain->nr_ats_masters))
return 0;
arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
......@@ -1545,13 +1955,6 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
}
/* IO_PGTABLE API */
static void arm_smmu_tlb_sync(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
}
static void arm_smmu_tlb_inv_context(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
......@@ -1570,25 +1973,32 @@ static void arm_smmu_tlb_inv_context(void *cookie)
/*
* NOTE: when io-pgtable is in non-strict mode, we may get here with
* PTEs previously cleared by unmaps on the current CPU not yet visible
* to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
* to guarantee those are observed before the TLBI. Do be careful, 007.
* to the SMMU. We are relying on the dma_wmb() implicit during cmd
* insertion to guarantee those are observed before the TLBI. Do be
* careful, 007.
*/
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
arm_smmu_cmdq_issue_sync(smmu);
arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
}
static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
size_t granule, bool leaf,
struct arm_smmu_domain *smmu_domain)
{
struct arm_smmu_domain *smmu_domain = cookie;
u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned long start = iova, end = iova + size;
int i = 0;
struct arm_smmu_cmdq_ent cmd = {
.tlbi = {
.leaf = leaf,
.addr = iova,
},
};
if (!size)
return;
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cmd.opcode = CMDQ_OP_TLBI_NH_VA;
cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
......@@ -1597,16 +2007,54 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
}
do {
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
cmd.tlbi.addr += granule;
} while (size -= granule);
while (iova < end) {
if (i == CMDQ_BATCH_ENTRIES) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false);
i = 0;
}
cmd.tlbi.addr = iova;
arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd);
iova += granule;
i++;
}
arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true);
/*
* Unfortunately, this can't be leaf-only since we may have
* zapped an entire table.
*/
arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
}
static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct iommu_domain *domain = &smmu_domain->domain;
iommu_iotlb_gather_add_page(domain, gather, iova, granule);
}
static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
}
static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
}
static const struct iommu_gather_ops arm_smmu_gather_ops = {
static const struct iommu_flush_ops arm_smmu_flush_ops = {
.tlb_flush_all = arm_smmu_tlb_inv_context,
.tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
.tlb_sync = arm_smmu_tlb_sync,
.tlb_flush_walk = arm_smmu_tlb_inv_walk,
.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
.tlb_add_page = arm_smmu_tlb_inv_page_nosync,
};
/* IOMMU API */
......@@ -1796,7 +2244,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
.ias = ias,
.oas = oas,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
.tlb = &arm_smmu_gather_ops,
.tlb = &arm_smmu_flush_ops,
.iommu_dev = smmu->dev,
};
......@@ -1863,44 +2311,58 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
}
}
static int arm_smmu_enable_ats(struct arm_smmu_master *master)
static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
{
int ret;
size_t stu;
struct pci_dev *pdev;
struct arm_smmu_device *smmu = master->smmu;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
return -ENXIO;
return false;
pdev = to_pci_dev(master->dev);
if (pdev->untrusted)
return -EPERM;
return !pdev->untrusted && pdev->ats_cap;
}
static void arm_smmu_enable_ats(struct arm_smmu_master *master)
{
size_t stu;
struct pci_dev *pdev;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_domain *smmu_domain = master->domain;
/* Don't enable ATS at the endpoint if it's not enabled in the STE */
if (!master->ats_enabled)
return;
/* Smallest Translation Unit: log2 of the smallest supported granule */
stu = __ffs(smmu->pgsize_bitmap);
pdev = to_pci_dev(master->dev);
ret = pci_enable_ats(pdev, stu);
if (ret)
return ret;
master->ats_enabled = true;
return 0;
atomic_inc(&smmu_domain->nr_ats_masters);
arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
if (pci_enable_ats(pdev, stu))
dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
}
static void arm_smmu_disable_ats(struct arm_smmu_master *master)
{
struct arm_smmu_cmdq_ent cmd;
struct arm_smmu_domain *smmu_domain = master->domain;
if (!master->ats_enabled || !dev_is_pci(master->dev))
if (!master->ats_enabled)
return;
pci_disable_ats(to_pci_dev(master->dev));
/*
* Ensure ATS is disabled at the endpoint before we issue the
* ATC invalidation via the SMMU.
*/
wmb();
arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
arm_smmu_atc_inv_master(master, &cmd);
pci_disable_ats(to_pci_dev(master->dev));
master->ats_enabled = false;
atomic_dec(&smmu_domain->nr_ats_masters);
}
static void arm_smmu_detach_dev(struct arm_smmu_master *master)
......@@ -1911,14 +2373,15 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master)
if (!smmu_domain)
return;
arm_smmu_disable_ats(master);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_del(&master->domain_head);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
master->domain = NULL;
master->ats_enabled = false;
arm_smmu_install_ste_for_dev(master);
arm_smmu_disable_ats(master);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
......@@ -1958,17 +2421,20 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
master->domain = smmu_domain;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_add(&master->domain_head, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
arm_smmu_enable_ats(master);
master->ats_enabled = arm_smmu_ats_supported(master);
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg);
arm_smmu_install_ste_for_dev(master);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_add(&master->domain_head, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_enable_ats(master);
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
return ret;
......@@ -1985,21 +2451,16 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
return ops->map(ops, iova, paddr, size, prot);
}
static size_t
arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather)
{
int ret;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
if (!ops)
return 0;
ret = ops->unmap(ops, iova, size);
if (ret && arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size))
return 0;
return ret;
return ops->unmap(ops, iova, size, gather);
}
static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
......@@ -2010,12 +2471,13 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
arm_smmu_tlb_inv_context(smmu_domain);
}
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
if (smmu)
arm_smmu_cmdq_issue_sync(smmu);
arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
gather->pgsize, true, smmu_domain);
}
static phys_addr_t
......@@ -2286,13 +2748,13 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
size_t qsz;
do {
qsz = ((1 << q->max_n_shift) * dwords) << 3;
qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
GFP_KERNEL);
if (q->base || qsz < PAGE_SIZE)
break;
q->max_n_shift--;
q->llq.max_n_shift--;
} while (1);
if (!q->base) {
......@@ -2304,7 +2766,7 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
if (!WARN_ON(q->base_dma & (qsz - 1))) {
dev_info(smmu->dev, "allocated %u entries for %s\n",
1 << q->max_n_shift, name);
1 << q->llq.max_n_shift, name);
}
q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
......@@ -2313,24 +2775,55 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
q->q_base = Q_BASE_RWA;
q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
q->prod = q->cons = 0;
q->llq.prod = q->llq.cons = 0;
return 0;
}
static void arm_smmu_cmdq_free_bitmap(void *data)
{
unsigned long *bitmap = data;
bitmap_free(bitmap);
}
static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
{
int ret = 0;
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
atomic_long_t *bitmap;
atomic_set(&cmdq->owner_prod, 0);
atomic_set(&cmdq->lock, 0);
bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
if (!bitmap) {
dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
ret = -ENOMEM;
} else {
cmdq->valid_map = bitmap;
devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
}
return ret;
}
static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
{
int ret;
/* cmdq */
spin_lock_init(&smmu->cmdq.lock);
ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
"cmdq");
if (ret)
return ret;
ret = arm_smmu_cmdq_init(smmu);
if (ret)
return ret;
/* evtq */
ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
......@@ -2708,8 +3201,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
/* Command queue */
writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
enables = CR0_CMDQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
......@@ -2736,9 +3229,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
writel_relaxed(smmu->evtq.q.prod,
writel_relaxed(smmu->evtq.q.llq.prod,
arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
writel_relaxed(smmu->evtq.q.cons,
writel_relaxed(smmu->evtq.q.llq.cons,
arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
enables |= CR0_EVTQEN;
......@@ -2753,9 +3246,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
if (smmu->features & ARM_SMMU_FEAT_PRI) {
writeq_relaxed(smmu->priq.q.q_base,
smmu->base + ARM_SMMU_PRIQ_BASE);
writel_relaxed(smmu->priq.q.prod,
writel_relaxed(smmu->priq.q.llq.prod,
arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
writel_relaxed(smmu->priq.q.cons,
writel_relaxed(smmu->priq.q.llq.cons,
arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
enables |= CR0_PRIQEN;
......@@ -2909,18 +3402,24 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
}
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
if (!smmu->cmdq.q.max_n_shift) {
/* Odd alignment restrictions on the base, so ignore for now */
dev_err(smmu->dev, "unit-length command queue not supported\n");
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
/*
* We don't support splitting up batches, so one batch of
* commands plus an extra sync needs to fit inside the command
* queue. There's also no way we can handle the weird alignment
* restrictions on the base pointer for a unit-length queue.
*/
dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
CMDQ_BATCH_ENTRIES);
return -ENXIO;
}
smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_EVTQS, reg));
smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_PRIQS, reg));
smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_EVTQS, reg));
smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_PRIQS, reg));
/* SID/SSID sizes */
smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
......
......@@ -366,7 +366,7 @@ static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
* On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
* almost negligible, but the benefit of getting the first one in as far ahead
* of the sync as possible is significant, hence we don't just make this a
* no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
* no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
*/
static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
......@@ -380,22 +380,67 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
}
static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
.tlb_flush_all = arm_smmu_tlb_inv_context_s1,
.tlb_add_flush = arm_smmu_tlb_inv_range_s1,
.tlb_sync = arm_smmu_tlb_sync_context,
static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
ops->tlb_inv_range(iova, size, granule, false, cookie);
ops->tlb_sync(cookie);
}
static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
ops->tlb_inv_range(iova, size, granule, true, cookie);
ops->tlb_sync(cookie);
}
static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
ops->tlb_inv_range(iova, granule, granule, true, cookie);
}
static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
.tlb = {
.tlb_flush_all = arm_smmu_tlb_inv_context_s1,
.tlb_flush_walk = arm_smmu_tlb_inv_walk,
.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
.tlb_add_page = arm_smmu_tlb_add_page,
},
.tlb_inv_range = arm_smmu_tlb_inv_range_s1,
.tlb_sync = arm_smmu_tlb_sync_context,
};
static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
.tlb_flush_all = arm_smmu_tlb_inv_context_s2,
.tlb_add_flush = arm_smmu_tlb_inv_range_s2,
.tlb_sync = arm_smmu_tlb_sync_context,
static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
.tlb = {
.tlb_flush_all = arm_smmu_tlb_inv_context_s2,
.tlb_flush_walk = arm_smmu_tlb_inv_walk,
.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
.tlb_add_page = arm_smmu_tlb_add_page,
},
.tlb_inv_range = arm_smmu_tlb_inv_range_s2,
.tlb_sync = arm_smmu_tlb_sync_context,
};
static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
.tlb_flush_all = arm_smmu_tlb_inv_context_s2,
.tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
.tlb_sync = arm_smmu_tlb_sync_vmid,
static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
.tlb = {
.tlb_flush_all = arm_smmu_tlb_inv_context_s2,
.tlb_flush_walk = arm_smmu_tlb_inv_walk,
.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
.tlb_add_page = arm_smmu_tlb_add_page,
},
.tlb_inv_range = arm_smmu_tlb_inv_vmid_nosync,
.tlb_sync = arm_smmu_tlb_sync_vmid,
};
static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
......@@ -667,7 +712,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
ias = min(ias, 32UL);
oas = min(oas, 32UL);
}
smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
break;
case ARM_SMMU_DOMAIN_NESTED:
/*
......@@ -687,9 +732,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
oas = min(oas, 40UL);
}
if (smmu->version == ARM_SMMU_V2)
smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
else
smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
break;
default:
ret = -EINVAL;
......@@ -725,7 +770,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
.ias = ias,
.oas = oas,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
.tlb = smmu_domain->tlb_ops,
.tlb = &smmu_domain->flush_ops->tlb,
.iommu_dev = smmu->dev,
};
......@@ -1131,7 +1176,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
......@@ -1141,7 +1186,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
return 0;
arm_smmu_rpm_get(smmu);
ret = ops->unmap(ops, iova, size);
ret = ops->unmap(ops, iova, size, gather);
arm_smmu_rpm_put(smmu);
return ret;
......@@ -1152,21 +1197,22 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
if (smmu_domain->tlb_ops) {
if (smmu_domain->flush_ops) {
arm_smmu_rpm_get(smmu);
smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
arm_smmu_rpm_put(smmu);
}
}
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
if (smmu_domain->tlb_ops) {
if (smmu_domain->flush_ops) {
arm_smmu_rpm_get(smmu);
smmu_domain->tlb_ops->tlb_sync(smmu_domain);
smmu_domain->flush_ops->tlb_sync(smmu_domain);
arm_smmu_rpm_put(smmu);
}
}
......
......@@ -304,10 +304,17 @@ enum arm_smmu_domain_stage {
ARM_SMMU_DOMAIN_BYPASS,
};
struct arm_smmu_flush_ops {
struct iommu_flush_ops tlb;
void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule,
bool leaf, void *cookie);
void (*tlb_sync)(void *cookie);
};
struct arm_smmu_domain {
struct arm_smmu_device *smmu;
struct io_pgtable_ops *pgtbl_ops;
const struct iommu_gather_ops *tlb_ops;
const struct arm_smmu_flush_ops *flush_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage;
bool non_strict;
......
......@@ -444,13 +444,18 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
size_t iova_off = iova_offset(iovad, dma_addr);
struct iommu_iotlb_gather iotlb_gather;
size_t unmapped;
dma_addr -= iova_off;
size = iova_align(iovad, size + iova_off);
iommu_iotlb_gather_init(&iotlb_gather);
unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
WARN_ON(unmapped != size);
WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
if (!cookie->fq_domain)
iommu_tlb_sync(domain);
iommu_tlb_sync(domain, &iotlb_gather);
iommu_dma_free_iova(cookie, dma_addr, size);
}
......
......@@ -1130,7 +1130,8 @@ static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain
}
static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
unsigned long l_iova, size_t size)
unsigned long l_iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;
......
......@@ -5147,7 +5147,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
}
static size_t intel_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct page *freelist = NULL;
......
......@@ -362,7 +362,8 @@ static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
return false;
}
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *,
struct iommu_iotlb_gather *, unsigned long,
size_t, int, arm_v7s_iopte *);
static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
......@@ -383,7 +384,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
if (WARN_ON(__arm_v7s_unmap(data, iova + i * sz,
if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
sz, lvl, tblp) != sz))
return -EINVAL;
} else if (ptep[i]) {
......@@ -493,9 +494,8 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
* a chance for anything to kick off a table walk for the new iova.
*/
if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
io_pgtable_tlb_add_flush(iop, iova, size,
ARM_V7S_BLOCK_SIZE(2), false);
io_pgtable_tlb_sync(iop);
io_pgtable_tlb_flush_walk(iop, iova, size,
ARM_V7S_BLOCK_SIZE(2));
} else {
wmb();
}
......@@ -541,12 +541,12 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
__arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
size *= ARM_V7S_CONT_PAGES;
io_pgtable_tlb_add_flush(iop, iova, size, size, true);
io_pgtable_tlb_sync(iop);
io_pgtable_tlb_flush_leaf(iop, iova, size, size);
return pte;
}
static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size,
arm_v7s_iopte blk_pte,
arm_v7s_iopte *ptep)
......@@ -583,15 +583,15 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
return 0;
tablep = iopte_deref(pte, 1);
return __arm_v7s_unmap(data, iova, size, 2, tablep);
return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
}
io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
io_pgtable_tlb_sync(&data->iop);
io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
return size;
}
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
arm_v7s_iopte *ptep)
{
......@@ -638,9 +638,8 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
for (i = 0; i < num_entries; i++) {
if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
/* Also flush any partial walks */
io_pgtable_tlb_add_flush(iop, iova, blk_size,
ARM_V7S_BLOCK_SIZE(lvl + 1), false);
io_pgtable_tlb_sync(iop);
io_pgtable_tlb_flush_walk(iop, iova, blk_size,
ARM_V7S_BLOCK_SIZE(lvl + 1));
ptep = iopte_deref(pte[i], lvl);
__arm_v7s_free_table(ptep, lvl + 1, data);
} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
......@@ -651,8 +650,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
*/
smp_wmb();
} else {
io_pgtable_tlb_add_flush(iop, iova, blk_size,
blk_size, true);
io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
}
iova += blk_size;
}
......@@ -662,23 +660,24 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
* Insert a table at the next level to map the old region,
* minus the part we want to unmap
*/
return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep);
return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
ptep);
}
/* Keep on walkin' */
ptep = iopte_deref(pte[0], lvl);
return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
}
static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
if (WARN_ON(upper_32_bits(iova)))
return 0;
return __arm_v7s_unmap(data, iova, size, 1, data->pgd);
return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
}
static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
......@@ -806,22 +805,24 @@ static void dummy_tlb_flush_all(void *cookie)
WARN_ON(cookie != cfg_cookie);
}
static void dummy_tlb_add_flush(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
static void dummy_tlb_sync(void *cookie)
static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
dummy_tlb_flush(iova, granule, granule, cookie);
}
static const struct iommu_gather_ops dummy_tlb_ops = {
static const struct iommu_flush_ops dummy_tlb_ops = {
.tlb_flush_all = dummy_tlb_flush_all,
.tlb_add_flush = dummy_tlb_add_flush,
.tlb_sync = dummy_tlb_sync,
.tlb_flush_walk = dummy_tlb_flush,
.tlb_flush_leaf = dummy_tlb_flush,
.tlb_add_page = dummy_tlb_add_page,
};
#define __FAIL(ops) ({ \
......@@ -896,7 +897,7 @@ static int __init arm_v7s_do_selftests(void)
size = 1UL << __ffs(cfg.pgsize_bitmap);
while (i < loopnr) {
iova_start = i * SZ_16M;
if (ops->unmap(ops, iova_start + size, size) != size)
if (ops->unmap(ops, iova_start + size, size, NULL) != size)
return __FAIL(ops);
/* Remap of partial unmap */
......@@ -914,7 +915,7 @@ static int __init arm_v7s_do_selftests(void)
for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
size = 1UL << i;
if (ops->unmap(ops, iova, size) != size)
if (ops->unmap(ops, iova, size, NULL) != size)
return __FAIL(ops);
if (ops->iova_to_phys(ops, iova + 42))
......
......@@ -12,7 +12,6 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/io-pgtable.h>
#include <linux/iommu.h>
#include <linux/kernel.h>
#include <linux/sizes.h>
#include <linux/slab.h>
......@@ -290,6 +289,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
arm_lpae_iopte *ptep);
......@@ -335,8 +335,10 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz))
if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
WARN_ON(1);
return -EINVAL;
}
}
__arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
......@@ -537,6 +539,7 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
}
static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size,
arm_lpae_iopte blk_pte, int lvl,
arm_lpae_iopte *ptep)
......@@ -582,15 +585,15 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
tablep = iopte_deref(pte, data);
} else if (unmap_idx >= 0) {
io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
io_pgtable_tlb_sync(&data->iop);
io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
return size;
}
return __arm_lpae_unmap(data, iova, size, lvl, tablep);
return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
arm_lpae_iopte *ptep)
{
......@@ -612,9 +615,8 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
if (!iopte_leaf(pte, lvl, iop->fmt)) {
/* Also flush any partial walks */
io_pgtable_tlb_add_flush(iop, iova, size,
ARM_LPAE_GRANULE(data), false);
io_pgtable_tlb_sync(iop);
io_pgtable_tlb_flush_walk(iop, iova, size,
ARM_LPAE_GRANULE(data));
ptep = iopte_deref(pte, data);
__arm_lpae_free_pgtable(data, lvl + 1, ptep);
} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
......@@ -625,7 +627,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
*/
smp_wmb();
} else {
io_pgtable_tlb_add_flush(iop, iova, size, size, true);
io_pgtable_tlb_add_page(iop, gather, iova, size);
}
return size;
......@@ -634,17 +636,17 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
* Insert a table at the next level to map the old region,
* minus the part we want to unmap
*/
return arm_lpae_split_blk_unmap(data, iova, size, pte,
return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
lvl + 1, ptep);
}
/* Keep on walkin' */
ptep = iopte_deref(pte, data);
return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep);
}
static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte *ptep = data->pgd;
......@@ -653,7 +655,7 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0;
return __arm_lpae_unmap(data, iova, size, lvl, ptep);
return __arm_lpae_unmap(data, gather, iova, size, lvl, ptep);
}
static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
......@@ -1070,22 +1072,24 @@ static void dummy_tlb_flush_all(void *cookie)
WARN_ON(cookie != cfg_cookie);
}
static void dummy_tlb_add_flush(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
static void dummy_tlb_sync(void *cookie)
static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
dummy_tlb_flush(iova, granule, granule, cookie);
}
static const struct iommu_gather_ops dummy_tlb_ops __initconst = {
static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
.tlb_flush_all = dummy_tlb_flush_all,
.tlb_add_flush = dummy_tlb_add_flush,
.tlb_sync = dummy_tlb_sync,
.tlb_flush_walk = dummy_tlb_flush,
.tlb_flush_leaf = dummy_tlb_flush,
.tlb_add_page = dummy_tlb_add_page,
};
static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
......@@ -1168,7 +1172,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
/* Partial unmap */
size = 1UL << __ffs(cfg->pgsize_bitmap);
if (ops->unmap(ops, SZ_1G + size, size) != size)
if (ops->unmap(ops, SZ_1G + size, size, NULL) != size)
return __FAIL(ops, i);
/* Remap of partial unmap */
......@@ -1183,7 +1187,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
size = 1UL << j;
if (ops->unmap(ops, iova, size) != size)
if (ops->unmap(ops, iova, size, NULL) != size)
return __FAIL(ops, i);
if (ops->iova_to_phys(ops, iova + 42))
......
......@@ -1862,7 +1862,7 @@ EXPORT_SYMBOL_GPL(iommu_map);
static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
bool sync)
struct iommu_iotlb_gather *iotlb_gather)
{
const struct iommu_ops *ops = domain->ops;
size_t unmapped_page, unmapped = 0;
......@@ -1899,13 +1899,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
while (unmapped < size) {
size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
unmapped_page = ops->unmap(domain, iova, pgsize);
unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
if (!unmapped_page)
break;
if (sync && ops->iotlb_range_add)
ops->iotlb_range_add(domain, iova, pgsize);
pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
iova, unmapped_page);
......@@ -1913,9 +1910,6 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
unmapped += unmapped_page;
}
if (sync && ops->iotlb_sync)
ops->iotlb_sync(domain);
trace_unmap(orig_iova, size, unmapped);
return unmapped;
}
......@@ -1923,14 +1917,22 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
size_t iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
return __iommu_unmap(domain, iova, size, true);
struct iommu_iotlb_gather iotlb_gather;
size_t ret;
iommu_iotlb_gather_init(&iotlb_gather);
ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
iommu_tlb_sync(domain, &iotlb_gather);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_unmap);
size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
{
return __iommu_unmap(domain, iova, size, false);
return __iommu_unmap(domain, iova, size, iotlb_gather);
}
EXPORT_SYMBOL_GPL(iommu_unmap_fast);
......
......@@ -361,16 +361,16 @@ static void ipmmu_tlb_flush_all(void *cookie)
ipmmu_tlb_invalidate(domain);
}
static void ipmmu_tlb_add_flush(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
static void ipmmu_tlb_flush(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
/* The hardware doesn't support selective TLB flush. */
ipmmu_tlb_flush_all(cookie);
}
static const struct iommu_gather_ops ipmmu_gather_ops = {
static const struct iommu_flush_ops ipmmu_flush_ops = {
.tlb_flush_all = ipmmu_tlb_flush_all,
.tlb_add_flush = ipmmu_tlb_add_flush,
.tlb_sync = ipmmu_tlb_flush_all,
.tlb_flush_walk = ipmmu_tlb_flush,
.tlb_flush_leaf = ipmmu_tlb_flush,
};
/* -----------------------------------------------------------------------------
......@@ -480,7 +480,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
domain->cfg.ias = 32;
domain->cfg.oas = 40;
domain->cfg.tlb = &ipmmu_gather_ops;
domain->cfg.tlb = &ipmmu_flush_ops;
domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32);
domain->io_domain.geometry.force_aperture = true;
/*
......@@ -733,14 +733,14 @@ static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
}
static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
return domain->iop->unmap(domain->iop, iova, size);
return domain->iop->unmap(domain->iop, iova, size, gather);
}
static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain)
{
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
......@@ -748,6 +748,12 @@ static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
ipmmu_tlb_flush_all(domain);
}
static void ipmmu_iotlb_sync(struct iommu_domain *io_domain,
struct iommu_iotlb_gather *gather)
{
ipmmu_flush_iotlb_all(io_domain);
}
static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
dma_addr_t iova)
{
......@@ -957,7 +963,7 @@ static const struct iommu_ops ipmmu_ops = {
.detach_dev = ipmmu_detach_device,
.map = ipmmu_map,
.unmap = ipmmu_unmap,
.flush_iotlb_all = ipmmu_iotlb_sync,
.flush_iotlb_all = ipmmu_flush_iotlb_all,
.iotlb_sync = ipmmu_iotlb_sync,
.iova_to_phys = ipmmu_iova_to_phys,
.add_device = ipmmu_add_device,
......
......@@ -168,20 +168,29 @@ static void __flush_iotlb_range(unsigned long iova, size_t size,
return;
}
static void __flush_iotlb_sync(void *cookie)
static void __flush_iotlb_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
/*
* Nothing is needed here, the barrier to guarantee
* completion of the tlb sync operation is implicitly
* taken care when the iommu client does a writel before
* kick starting the other master.
*/
__flush_iotlb_range(iova, size, granule, false, cookie);
}
static void __flush_iotlb_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
__flush_iotlb_range(iova, size, granule, true, cookie);
}
static const struct iommu_gather_ops msm_iommu_gather_ops = {
static void __flush_iotlb_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule, void *cookie)
{
__flush_iotlb_range(iova, granule, granule, true, cookie);
}
static const struct iommu_flush_ops msm_iommu_flush_ops = {
.tlb_flush_all = __flush_iotlb,
.tlb_add_flush = __flush_iotlb_range,
.tlb_sync = __flush_iotlb_sync,
.tlb_flush_walk = __flush_iotlb_walk,
.tlb_flush_leaf = __flush_iotlb_leaf,
.tlb_add_page = __flush_iotlb_page,
};
static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
......@@ -345,7 +354,7 @@ static int msm_iommu_domain_config(struct msm_priv *priv)
.pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 32,
.tlb = &msm_iommu_gather_ops,
.tlb = &msm_iommu_flush_ops,
.iommu_dev = priv->dev,
};
......@@ -509,13 +518,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t len)
size_t len, struct iommu_iotlb_gather *gather)
{
struct msm_priv *priv = to_msm_priv(domain);
unsigned long flags;
spin_lock_irqsave(&priv->pgtlock, flags);
len = priv->iop->unmap(priv->iop, iova, len);
len = priv->iop->unmap(priv->iop, iova, len, gather);
spin_unlock_irqrestore(&priv->pgtlock, flags);
return len;
......@@ -691,6 +700,13 @@ static struct iommu_ops msm_iommu_ops = {
.detach_dev = msm_iommu_detach_dev,
.map = msm_iommu_map,
.unmap = msm_iommu_unmap,
/*
* Nothing is needed here, the barrier to guarantee
* completion of the tlb sync operation is implicitly
* taken care when the iommu client does a writel before
* kick starting the other master.
*/
.iotlb_sync = NULL,
.iova_to_phys = msm_iommu_iova_to_phys,
.add_device = msm_iommu_add_device,
.remove_device = msm_iommu_remove_device,
......
......@@ -188,10 +188,32 @@ static void mtk_iommu_tlb_sync(void *cookie)
}
}
static const struct iommu_gather_ops mtk_iommu_gather_ops = {
static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
mtk_iommu_tlb_sync(cookie);
}
static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
mtk_iommu_tlb_sync(cookie);
}
static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
}
static const struct iommu_flush_ops mtk_iommu_flush_ops = {
.tlb_flush_all = mtk_iommu_tlb_flush_all,
.tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
.tlb_sync = mtk_iommu_tlb_sync,
.tlb_flush_walk = mtk_iommu_tlb_flush_walk,
.tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
.tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
};
static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
......@@ -267,7 +289,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 32,
.tlb = &mtk_iommu_gather_ops,
.tlb = &mtk_iommu_flush_ops,
.iommu_dev = data->dev,
};
......@@ -371,20 +393,27 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t mtk_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
unsigned long flags;
size_t unmapsz;
spin_lock_irqsave(&dom->pgtlock, flags);
unmapsz = dom->iop->unmap(dom->iop, iova, size);
unmapsz = dom->iop->unmap(dom->iop, iova, size, gather);
spin_unlock_irqrestore(&dom->pgtlock, flags);
return unmapsz;
}
static void mtk_iommu_iotlb_sync(struct iommu_domain *domain)
static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
}
static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
}
......@@ -490,7 +519,7 @@ static const struct iommu_ops mtk_iommu_ops = {
.detach_dev = mtk_iommu_detach_device,
.map = mtk_iommu_map,
.unmap = mtk_iommu_unmap,
.flush_iotlb_all = mtk_iommu_iotlb_sync,
.flush_iotlb_all = mtk_iommu_flush_iotlb_all,
.iotlb_sync = mtk_iommu_iotlb_sync,
.iova_to_phys = mtk_iommu_iova_to_phys,
.add_device = mtk_iommu_add_device,
......
......@@ -324,7 +324,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t mtk_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
unsigned long flags;
......
......@@ -1149,7 +1149,7 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
}
static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct device *dev = omap_domain->dev;
......
......@@ -165,10 +165,32 @@ static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size,
}
}
static const struct iommu_gather_ops qcom_gather_ops = {
static void qcom_iommu_tlb_flush_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
qcom_iommu_tlb_inv_range_nosync(iova, size, granule, false, cookie);
qcom_iommu_tlb_sync(cookie);
}
static void qcom_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
qcom_iommu_tlb_inv_range_nosync(iova, size, granule, true, cookie);
qcom_iommu_tlb_sync(cookie);
}
static void qcom_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
qcom_iommu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
}
static const struct iommu_flush_ops qcom_flush_ops = {
.tlb_flush_all = qcom_iommu_tlb_inv_context,
.tlb_add_flush = qcom_iommu_tlb_inv_range_nosync,
.tlb_sync = qcom_iommu_tlb_sync,
.tlb_flush_walk = qcom_iommu_tlb_flush_walk,
.tlb_flush_leaf = qcom_iommu_tlb_flush_leaf,
.tlb_add_page = qcom_iommu_tlb_add_page,
};
static irqreturn_t qcom_iommu_fault(int irq, void *dev)
......@@ -216,7 +238,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
.pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 40,
.tlb = &qcom_gather_ops,
.tlb = &qcom_flush_ops,
.iommu_dev = qcom_iommu->dev,
};
......@@ -418,7 +440,7 @@ static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
size_t ret;
unsigned long flags;
......@@ -435,14 +457,14 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
*/
pm_runtime_get_sync(qcom_domain->iommu->dev);
spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags);
ret = ops->unmap(ops, iova, size);
ret = ops->unmap(ops, iova, size, gather);
spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags);
pm_runtime_put_sync(qcom_domain->iommu->dev);
return ret;
}
static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
static void qcom_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops,
......@@ -455,6 +477,12 @@ static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
pm_runtime_put_sync(qcom_domain->iommu->dev);
}
static void qcom_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
qcom_iommu_flush_iotlb_all(domain);
}
static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
......@@ -582,7 +610,7 @@ static const struct iommu_ops qcom_iommu_ops = {
.detach_dev = qcom_iommu_detach_dev,
.map = qcom_iommu_map,
.unmap = qcom_iommu_unmap,
.flush_iotlb_all = qcom_iommu_iotlb_sync,
.flush_iotlb_all = qcom_iommu_flush_iotlb_all,
.iotlb_sync = qcom_iommu_iotlb_sync,
.iova_to_phys = qcom_iommu_iova_to_phys,
.add_device = qcom_iommu_add_device,
......
......@@ -794,7 +794,7 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
}
static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
......
......@@ -314,7 +314,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
}
static size_t s390_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
int flags = ZPCI_PTE_INVALID;
......
......@@ -207,7 +207,7 @@ static inline int __gart_iommu_unmap(struct gart_device *gart,
}
static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t bytes)
size_t bytes, struct iommu_iotlb_gather *gather)
{
struct gart_device *gart = gart_handle;
int err;
......@@ -273,11 +273,17 @@ static int gart_iommu_of_xlate(struct device *dev,
return 0;
}
static void gart_iommu_sync(struct iommu_domain *domain)
static void gart_iommu_sync_map(struct iommu_domain *domain)
{
FLUSH_GART_REGS(gart_handle);
}
static void gart_iommu_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
gart_iommu_sync_map(domain);
}
static const struct iommu_ops gart_iommu_ops = {
.capable = gart_iommu_capable,
.domain_alloc = gart_iommu_domain_alloc,
......@@ -292,7 +298,7 @@ static const struct iommu_ops gart_iommu_ops = {
.iova_to_phys = gart_iommu_iova_to_phys,
.pgsize_bitmap = GART_IOMMU_PGSIZES,
.of_xlate = gart_iommu_of_xlate,
.iotlb_sync_map = gart_iommu_sync,
.iotlb_sync_map = gart_iommu_sync_map,
.iotlb_sync = gart_iommu_sync,
};
......
......@@ -680,7 +680,7 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
dma_addr_t pte_dma;
......
......@@ -742,7 +742,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
int ret = 0;
size_t unmapped;
......@@ -788,7 +788,8 @@ static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain,
return paddr;
}
static void viommu_iotlb_sync(struct iommu_domain *domain)
static void viommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
struct viommu_domain *vdomain = to_viommu_domain(domain);
......
......@@ -650,12 +650,13 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
}
static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
struct list_head *regions)
struct list_head *regions,
struct iommu_iotlb_gather *iotlb_gather)
{
long unlocked = 0;
struct vfio_regions *entry, *next;
iommu_tlb_sync(domain->domain);
iommu_tlb_sync(domain->domain, iotlb_gather);
list_for_each_entry_safe(entry, next, regions, list) {
unlocked += vfio_unpin_pages_remote(dma,
......@@ -685,18 +686,19 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain,
struct vfio_dma *dma, dma_addr_t *iova,
size_t len, phys_addr_t phys, long *unlocked,
struct list_head *unmapped_list,
int *unmapped_cnt)
int *unmapped_cnt,
struct iommu_iotlb_gather *iotlb_gather)
{
size_t unmapped = 0;
struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (entry) {
unmapped = iommu_unmap_fast(domain->domain, *iova, len);
unmapped = iommu_unmap_fast(domain->domain, *iova, len,
iotlb_gather);
if (!unmapped) {
kfree(entry);
} else {
iommu_tlb_range_add(domain->domain, *iova, unmapped);
entry->iova = *iova;
entry->phys = phys;
entry->len = unmapped;
......@@ -712,8 +714,8 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain,
* or in case of errors.
*/
if (*unmapped_cnt >= VFIO_IOMMU_TLB_SYNC_MAX || !unmapped) {
*unlocked += vfio_sync_unpin(dma, domain,
unmapped_list);
*unlocked += vfio_sync_unpin(dma, domain, unmapped_list,
iotlb_gather);
*unmapped_cnt = 0;
}
......@@ -744,6 +746,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
struct vfio_domain *domain, *d;
LIST_HEAD(unmapped_region_list);
struct iommu_iotlb_gather iotlb_gather;
int unmapped_region_cnt = 0;
long unlocked = 0;
......@@ -768,6 +771,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
cond_resched();
}
iommu_iotlb_gather_init(&iotlb_gather);
while (iova < end) {
size_t unmapped, len;
phys_addr_t phys, next;
......@@ -796,7 +800,8 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
*/
unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys,
&unlocked, &unmapped_region_list,
&unmapped_region_cnt);
&unmapped_region_cnt,
&iotlb_gather);
if (!unmapped) {
unmapped = unmap_unpin_slow(domain, dma, &iova, len,
phys, &unlocked);
......@@ -807,8 +812,10 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
dma->iommu_mapped = false;
if (unmapped_region_cnt)
unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list);
if (unmapped_region_cnt) {
unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list,
&iotlb_gather);
}
if (do_accounting) {
vfio_lock_acct(dma, -unlocked, true);
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IO_PGTABLE_H
#define __IO_PGTABLE_H
#include <linux/bitops.h>
#include <linux/iommu.h>
/*
* Public API for use by IOMMU drivers
......@@ -17,22 +19,31 @@ enum io_pgtable_fmt {
};
/**
* struct iommu_gather_ops - IOMMU callbacks for TLB and page table management.
* struct iommu_flush_ops - IOMMU callbacks for TLB and page table management.
*
* @tlb_flush_all: Synchronously invalidate the entire TLB context.
* @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
* @tlb_sync: Ensure any queued TLB invalidation has taken effect, and
* any corresponding page table updates are visible to the
* IOMMU.
* @tlb_flush_all: Synchronously invalidate the entire TLB context.
* @tlb_flush_walk: Synchronously invalidate all intermediate TLB state
* (sometimes referred to as the "walk cache") for a virtual
* address range.
* @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual
* address range.
* @tlb_add_page: Optional callback to queue up leaf TLB invalidation for a
* single page. IOMMUs that cannot batch TLB invalidation
* operations efficiently will typically issue them here, but
* others may decide to update the iommu_iotlb_gather structure
* and defer the invalidation until iommu_tlb_sync() instead.
*
* Note that these can all be called in atomic context and must therefore
* not block.
*/
struct iommu_gather_ops {
struct iommu_flush_ops {
void (*tlb_flush_all)(void *cookie);
void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
bool leaf, void *cookie);
void (*tlb_sync)(void *cookie);
void (*tlb_flush_walk)(unsigned long iova, size_t size, size_t granule,
void *cookie);
void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule,
void *cookie);
void (*tlb_add_page)(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule, void *cookie);
};
/**
......@@ -84,7 +95,7 @@ struct io_pgtable_cfg {
unsigned int ias;
unsigned int oas;
bool coherent_walk;
const struct iommu_gather_ops *tlb;
const struct iommu_flush_ops *tlb;
struct device *iommu_dev;
/* Low-level data specific to the table format */
......@@ -128,7 +139,7 @@ struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size);
size_t size, struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
};
......@@ -184,15 +195,27 @@ static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
iop->cfg.tlb->tlb_flush_all(iop->cookie);
}
static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
unsigned long iova, size_t size, size_t granule, bool leaf)
static inline void
io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova,
size_t size, size_t granule)
{
iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);
}
static inline void
io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova,
size_t size, size_t granule)
{
iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);
}
static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
static inline void
io_pgtable_tlb_add_page(struct io_pgtable *iop,
struct iommu_iotlb_gather * gather, unsigned long iova,
size_t granule)
{
iop->cfg.tlb->tlb_sync(iop->cookie);
if (iop->cfg.tlb->tlb_add_page)
iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie);
}
/**
......
......@@ -191,6 +191,23 @@ struct iommu_sva_ops {
#ifdef CONFIG_IOMMU_API
/**
* struct iommu_iotlb_gather - Range information for a pending IOTLB flush
*
* @start: IOVA representing the start of the range to be flushed
* @end: IOVA representing the end of the range to be flushed (exclusive)
* @pgsize: The interval at which to perform the flush
*
* This structure is intended to be updated by multiple calls to the
* ->unmap() function in struct iommu_ops before eventually being passed
* into ->iotlb_sync().
*/
struct iommu_iotlb_gather {
unsigned long start;
unsigned long end;
size_t pgsize;
};
/**
* struct iommu_ops - iommu ops and capabilities
* @capable: check capability
......@@ -201,7 +218,6 @@ struct iommu_sva_ops {
* @map: map a physically contiguous memory region to an iommu domain
* @unmap: unmap a physically contiguous memory region from an iommu domain
* @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
* @iotlb_range_add: Add a given iova range to the flush queue for this domain
* @iotlb_sync_map: Sync mappings created recently using @map to the hardware
* @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
* queue
......@@ -242,12 +258,11 @@ struct iommu_ops {
int (*map)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
size_t size);
size_t size, struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
void (*iotlb_range_add)(struct iommu_domain *domain,
unsigned long iova, size_t size);
void (*iotlb_sync_map)(struct iommu_domain *domain);
void (*iotlb_sync)(struct iommu_domain *domain);
void (*iotlb_sync)(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather);
phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
int (*add_device)(struct device *dev);
void (*remove_device)(struct device *dev);
......@@ -378,6 +393,13 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
return (struct iommu_device *)dev_get_drvdata(dev);
}
static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
{
*gather = (struct iommu_iotlb_gather) {
.start = ULONG_MAX,
};
}
#define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */
#define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */
#define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */
......@@ -402,7 +424,8 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size);
extern size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size);
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather);
extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg,unsigned int nents, int prot);
extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
......@@ -476,17 +499,38 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
domain->ops->flush_iotlb_all(domain);
}
static inline void iommu_tlb_range_add(struct iommu_domain *domain,
unsigned long iova, size_t size)
static inline void iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather)
{
if (domain->ops->iotlb_range_add)
domain->ops->iotlb_range_add(domain, iova, size);
if (domain->ops->iotlb_sync)
domain->ops->iotlb_sync(domain, iotlb_gather);
iommu_iotlb_gather_init(iotlb_gather);
}
static inline void iommu_tlb_sync(struct iommu_domain *domain)
static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size)
{
if (domain->ops->iotlb_sync)
domain->ops->iotlb_sync(domain);
unsigned long start = iova, end = start + size;
/*
* If the new page is disjoint from the current range or is mapped at
* a different granularity, then sync the TLB so that the gather
* structure can be rewritten.
*/
if (gather->pgsize != size ||
end < gather->start || start > gather->end) {
if (gather->pgsize)
iommu_tlb_sync(domain, gather);
gather->pgsize = size;
}
if (gather->end < end)
gather->end = end;
if (gather->start > start)
gather->start = start;
}
/* PCI device grouping function */
......@@ -567,6 +611,7 @@ struct iommu_group {};
struct iommu_fwspec {};
struct iommu_device {};
struct iommu_fault_param {};
struct iommu_iotlb_gather {};
static inline bool iommu_present(struct bus_type *bus)
{
......@@ -621,7 +666,8 @@ static inline size_t iommu_unmap(struct iommu_domain *domain,
}
static inline size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, int gfp_order)
unsigned long iova, int gfp_order,
struct iommu_iotlb_gather *iotlb_gather)
{
return 0;
}
......@@ -637,12 +683,8 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
{
}
static inline void iommu_tlb_range_add(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
}
static inline void iommu_tlb_sync(struct iommu_domain *domain)
static inline void iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather)
{
}
......@@ -827,6 +869,16 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
return NULL;
}
static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
{
}
static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size)
{
}
static inline void iommu_device_unregister(struct iommu_device *iommu)
{
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment