Commit aa5cabc4 authored by Joerg Roedel's avatar Joerg Roedel

Merge tag 'arm-smmu-updates' of...

Merge tag 'arm-smmu-updates' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into arm/smmu

Arm SMMU updates for 6.7

- Device-tree binding update:
  * Add qcom,sm7150-smmu-v2 for Adreno on SM7150 SoC

- SMMUv2:
  * Support for Qualcomm SDM670 (MDSS) and SM7150 SoCs

- SMMUv3:
  * Large refactoring of the context descriptor code to
    move the CD table into the master, paving the way
    for '->set_dev_pasid()' support on non-SVA domains

  * Minor cleanups to the SVA code
parents 6465e260 54865092
......@@ -110,6 +110,7 @@ properties:
- qcom,sdm630-smmu-v2
- qcom,sdm845-smmu-v2
- qcom,sm6350-smmu-v2
- qcom,sm7150-smmu-v2
- const: qcom,adreno-smmu
- const: qcom,smmu-v2
- description: Qcom Adreno GPUs on Google Cheza platform
......@@ -270,6 +271,7 @@ allOf:
contains:
enum:
- qcom,msm8998-smmu-v2
- qcom,sdm630-smmu-v2
then:
anyOf:
- properties:
......@@ -311,7 +313,6 @@ allOf:
compatible:
contains:
enum:
- qcom,sdm630-smmu-v2
- qcom,sm6375-smmu-v2
then:
anyOf:
......@@ -409,6 +410,7 @@ allOf:
contains:
enum:
- qcom,sm6350-smmu-v2
- qcom,sm7150-smmu-v2
- qcom,sm8150-smmu-500
- qcom,sm8250-smmu-500
then:
......
......@@ -25,11 +25,9 @@ struct arm_smmu_mmu_notifier {
#define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn)
struct arm_smmu_bond {
struct iommu_sva sva;
struct mm_struct *mm;
struct arm_smmu_mmu_notifier *smmu_mn;
struct list_head list;
refcount_t refs;
};
#define sva_to_bond(handle) \
......@@ -37,6 +35,25 @@ struct arm_smmu_bond {
static DEFINE_MUTEX(sva_lock);
/*
* Write the CD to the CD tables for all masters that this domain is attached
* to. Note that this is only used to update existing CD entries in the target
* CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail.
*/
static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain,
int ssid,
struct arm_smmu_ctx_desc *cd)
{
struct arm_smmu_master *master;
unsigned long flags;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master, &smmu_domain->devices, domain_head) {
arm_smmu_write_ctx_desc(master, ssid, cd);
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
}
/*
* Check if the CPU ASID is available on the SMMU side. If a private context
* descriptor is using it, try to replace it.
......@@ -62,7 +79,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
return cd;
}
smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd);
smmu_domain = container_of(cd, struct arm_smmu_domain, cd);
smmu = smmu_domain->smmu;
ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd,
......@@ -80,7 +97,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
* be some overlap between use of both ASIDs, until we invalidate the
* TLB.
*/
arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd);
arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd);
/* Invalidate TLB entries previously associated with that context */
arm_smmu_tlb_inv_asid(smmu, asid);
......@@ -186,6 +203,15 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd)
}
}
/*
* Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this
* is used as a threshold to replace per-page TLBI commands to issue in the
* command queue with an address-space TLBI command, when SMMU w/o a range
* invalidation feature handles too many per-page TLBI commands, which will
* otherwise result in a soft lockup.
*/
#define CMDQ_MAX_TLBI_OPS (1 << (PAGE_SHIFT - 3))
static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
......@@ -201,8 +227,13 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
* range. So do a simple translation here by calculating size correctly.
*/
size = end - start;
if (size == ULONG_MAX)
size = 0;
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) {
if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE)
size = 0;
} else {
if (size == ULONG_MAX)
size = 0;
}
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) {
if (!size)
......@@ -233,7 +264,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
* DMA may still be running. Keep the cd valid to avoid C_BAD_CD events,
* but disable translation.
*/
arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd);
arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);
......@@ -259,8 +290,10 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
struct mm_struct *mm)
{
int ret;
unsigned long flags;
struct arm_smmu_ctx_desc *cd;
struct arm_smmu_mmu_notifier *smmu_mn;
struct arm_smmu_master *master;
list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) {
if (smmu_mn->mn.mm == mm) {
......@@ -290,7 +323,16 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
goto err_free_cd;
}
ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master, &smmu_domain->devices, domain_head) {
ret = arm_smmu_write_ctx_desc(master, mm->pasid, cd);
if (ret) {
list_for_each_entry_from_reverse(master, &smmu_domain->devices, domain_head)
arm_smmu_write_ctx_desc(master, mm->pasid, NULL);
break;
}
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
if (ret)
goto err_put_notifier;
......@@ -315,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
return;
list_del(&smmu_mn->list);
arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, NULL);
arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
/*
* If we went through clear(), we've already invalidated, and no
......@@ -331,8 +374,7 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
arm_smmu_free_shared_cd(cd);
}
static struct iommu_sva *
__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
{
int ret;
struct arm_smmu_bond *bond;
......@@ -341,23 +383,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
if (!master || !master->sva_enabled)
return ERR_PTR(-ENODEV);
/* If bind() was already called for this {dev, mm} pair, reuse it. */
list_for_each_entry(bond, &master->bonds, list) {
if (bond->mm == mm) {
refcount_inc(&bond->refs);
return &bond->sva;
}
}
return -ENODEV;
bond = kzalloc(sizeof(*bond), GFP_KERNEL);
if (!bond)
return ERR_PTR(-ENOMEM);
return -ENOMEM;
bond->mm = mm;
bond->sva.dev = dev;
refcount_set(&bond->refs, 1);
bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
if (IS_ERR(bond->smmu_mn)) {
......@@ -366,11 +398,11 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
}
list_add(&bond->list, &master->bonds);
return &bond->sva;
return 0;
err_free_bond:
kfree(bond);
return ERR_PTR(ret);
return ret;
}
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
......@@ -536,7 +568,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
}
}
if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) {
if (!WARN_ON(!bond)) {
list_del(&bond->list);
arm_smmu_mmu_notifier_put(bond->smmu_mn);
kfree(bond);
......@@ -548,13 +580,10 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t id)
{
int ret = 0;
struct iommu_sva *handle;
struct mm_struct *mm = domain->mm;
mutex_lock(&sva_lock);
handle = __arm_smmu_sva_bind(dev, mm);
if (IS_ERR(handle))
ret = PTR_ERR(handle);
ret = __arm_smmu_sva_bind(dev, mm);
mutex_unlock(&sva_lock);
return ret;
......
......@@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
static void arm_smmu_sync_cd(struct arm_smmu_master *master,
int ssid, bool leaf)
{
size_t i;
unsigned long flags;
struct arm_smmu_master *master;
struct arm_smmu_cmdq_batch cmds;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_cmdq_ent cmd = {
.opcode = CMDQ_OP_CFGI_CD,
.cfgi = {
......@@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
};
cmds.num = 0;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master, &smmu_domain->devices, domain_head) {
for (i = 0; i < master->num_streams; i++) {
cmd.cfgi.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
}
for (i = 0; i < master->num_streams; i++) {
cmd.cfgi.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
......@@ -1026,34 +1019,33 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
WRITE_ONCE(*dst, cpu_to_le64(val));
}
static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
u32 ssid)
static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
{
__le64 *l1ptr;
unsigned int idx;
struct arm_smmu_l1_ctx_desc *l1_desc;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
idx = ssid >> CTXDESC_SPLIT;
l1_desc = &cdcfg->l1_desc[idx];
l1_desc = &cd_table->l1_desc[idx];
if (!l1_desc->l2ptr) {
if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
return NULL;
l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
/* An invalid L1CD can be cached */
arm_smmu_sync_cd(smmu_domain, ssid, false);
arm_smmu_sync_cd(master, ssid, false);
}
idx = ssid & (CTXDESC_L2_ENTRIES - 1);
return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
}
int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
struct arm_smmu_ctx_desc *cd)
{
/*
......@@ -1070,11 +1062,12 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
u64 val;
bool cd_live;
__le64 *cdptr;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
return -E2BIG;
cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
cdptr = arm_smmu_get_cd_ptr(master, ssid);
if (!cdptr)
return -ENOMEM;
......@@ -1098,11 +1091,11 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
cdptr[3] = cpu_to_le64(cd->mair);
/*
* STE is live, and the SMMU might read dwords of this CD in any
* STE may be live, and the SMMU might read dwords of this CD in any
* order. Ensure that it observes valid values before reading
* V=1.
*/
arm_smmu_sync_cd(smmu_domain, ssid, true);
arm_smmu_sync_cd(master, ssid, true);
val = cd->tcr |
#ifdef __BIG_ENDIAN
......@@ -1114,7 +1107,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
CTXDESC_CD_0_V;
if (smmu_domain->stall_enabled)
if (cd_table->stall_enabled)
val |= CTXDESC_CD_0_S;
}
......@@ -1128,44 +1121,45 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
* without first making the structure invalid.
*/
WRITE_ONCE(cdptr[0], cpu_to_le64(val));
arm_smmu_sync_cd(smmu_domain, ssid, true);
arm_smmu_sync_cd(master, ssid, true);
return 0;
}
static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
{
int ret;
size_t l1size;
size_t max_contexts;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
max_contexts = 1 << cfg->s1cdmax;
cd_table->stall_enabled = master->stall_enabled;
cd_table->s1cdmax = master->ssid_bits;
max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
cdcfg->num_l1_ents = max_contexts;
cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
cd_table->num_l1_ents = max_contexts;
l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
} else {
cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
CTXDESC_L2_ENTRIES);
cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
sizeof(*cdcfg->l1_desc),
cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
sizeof(*cd_table->l1_desc),
GFP_KERNEL);
if (!cdcfg->l1_desc)
if (!cd_table->l1_desc)
return -ENOMEM;
l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
}
cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
GFP_KERNEL);
if (!cdcfg->cdtab) {
if (!cd_table->cdtab) {
dev_warn(smmu->dev, "failed to allocate context descriptor\n");
ret = -ENOMEM;
goto err_free_l1;
......@@ -1174,42 +1168,42 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
return 0;
err_free_l1:
if (cdcfg->l1_desc) {
devm_kfree(smmu->dev, cdcfg->l1_desc);
cdcfg->l1_desc = NULL;
if (cd_table->l1_desc) {
devm_kfree(smmu->dev, cd_table->l1_desc);
cd_table->l1_desc = NULL;
}
return ret;
}
static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
{
int i;
size_t size, l1size;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
if (cdcfg->l1_desc) {
if (cd_table->l1_desc) {
size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
for (i = 0; i < cdcfg->num_l1_ents; i++) {
if (!cdcfg->l1_desc[i].l2ptr)
for (i = 0; i < cd_table->num_l1_ents; i++) {
if (!cd_table->l1_desc[i].l2ptr)
continue;
dmam_free_coherent(smmu->dev, size,
cdcfg->l1_desc[i].l2ptr,
cdcfg->l1_desc[i].l2ptr_dma);
cd_table->l1_desc[i].l2ptr,
cd_table->l1_desc[i].l2ptr_dma);
}
devm_kfree(smmu->dev, cdcfg->l1_desc);
cdcfg->l1_desc = NULL;
devm_kfree(smmu->dev, cd_table->l1_desc);
cd_table->l1_desc = NULL;
l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
} else {
l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
}
dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
cdcfg->cdtab_dma = 0;
cdcfg->cdtab = NULL;
dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
cd_table->cdtab_dma = 0;
cd_table->cdtab = NULL;
}
bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
......@@ -1276,7 +1270,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
u64 val = le64_to_cpu(dst[0]);
bool ste_live = false;
struct arm_smmu_device *smmu = NULL;
struct arm_smmu_s1_cfg *s1_cfg = NULL;
struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
struct arm_smmu_s2_cfg *s2_cfg = NULL;
struct arm_smmu_domain *smmu_domain = NULL;
struct arm_smmu_cmdq_ent prefetch_cmd = {
......@@ -1294,7 +1288,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
if (smmu_domain) {
switch (smmu_domain->stage) {
case ARM_SMMU_DOMAIN_S1:
s1_cfg = &smmu_domain->s1_cfg;
cd_table = &master->cd_table;
break;
case ARM_SMMU_DOMAIN_S2:
case ARM_SMMU_DOMAIN_NESTED:
......@@ -1325,7 +1319,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
val = STRTAB_STE_0_V;
/* Bypass/fault */
if (!smmu_domain || !(s1_cfg || s2_cfg)) {
if (!smmu_domain || !(cd_table || s2_cfg)) {
if (!smmu_domain && disable_bypass)
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
else
......@@ -1344,7 +1338,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
return;
}
if (s1_cfg) {
if (cd_table) {
u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
......@@ -1360,10 +1354,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
!master->stall_enabled)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
}
if (s2_cfg) {
......@@ -1869,7 +1863,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
* careful, 007.
*/
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
} else {
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
......@@ -1895,18 +1889,23 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
/* Get the leaf page size */
tg = __ffs(smmu_domain->domain.pgsize_bitmap);
num_pages = size >> tg;
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
cmd->tlbi.tg = (tg - 10) / 2;
/*
* Determine what level the granule is at. For non-leaf, io-pgtable
* assumes .tlb_flush_walk can invalidate multiple levels at once,
* so ignore the nominal last-level granule and leave TTL=0.
* Determine what level the granule is at. For non-leaf, both
* io-pgtable and SVA pass a nominal last-level granule because
* they don't know what level(s) actually apply, so ignore that
* and leave TTL=0. However for various errata reasons we still
* want to use a range command, so avoid the SVA corner case
* where both scale and num could be 0 as well.
*/
if (cmd->tlbi.leaf)
cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
num_pages = size >> tg;
else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
num_pages++;
}
cmds.num = 0;
......@@ -1957,7 +1956,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
cmd.tlbi.asid = smmu_domain->cd.asid;
} else {
cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
......@@ -2062,15 +2061,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
/* Free the CD and ASID, if we allocated them */
/* Free the ASID or VMID */
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
/* Prevent SVA from touching the CD while we're freeing it */
mutex_lock(&arm_smmu_asid_lock);
if (cfg->cdcfg.cdtab)
arm_smmu_free_cd_tables(smmu_domain);
arm_smmu_free_asid(&cfg->cd);
arm_smmu_free_asid(&smmu_domain->cd);
mutex_unlock(&arm_smmu_asid_lock);
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
......@@ -2082,66 +2077,43 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
}
static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int ret;
u32 asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
refcount_set(&cfg->cd.refs, 1);
refcount_set(&cd->refs, 1);
/* Prevent SVA from modifying the ASID until it is written to the CD */
mutex_lock(&arm_smmu_asid_lock);
ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
if (ret)
goto out_unlock;
cfg->s1cdmax = master->ssid_bits;
smmu_domain->stall_enabled = master->stall_enabled;
ret = arm_smmu_alloc_cd_tables(smmu_domain);
if (ret)
goto out_free_asid;
cfg->cd.asid = (u16)asid;
cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
cd->asid = (u16)asid;
cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
/*
* Note that this will end up calling arm_smmu_sync_cd() before
* the master has been added to the devices list for this domain.
* This isn't an issue because the STE hasn't been installed yet.
*/
ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
if (ret)
goto out_free_cd_tables;
cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
mutex_unlock(&arm_smmu_asid_lock);
return 0;
out_free_cd_tables:
arm_smmu_free_cd_tables(smmu_domain);
out_free_asid:
arm_smmu_free_asid(&cfg->cd);
out_unlock:
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int vmid;
......@@ -2168,8 +2140,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
return 0;
}
static int arm_smmu_domain_finalise(struct iommu_domain *domain,
struct arm_smmu_master *master)
static int arm_smmu_domain_finalise(struct iommu_domain *domain)
{
int ret;
unsigned long ias, oas;
......@@ -2177,7 +2148,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
struct io_pgtable_cfg pgtbl_cfg;
struct io_pgtable_ops *pgtbl_ops;
int (*finalise_stage_fn)(struct arm_smmu_domain *,
struct arm_smmu_master *,
struct io_pgtable_cfg *);
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
......@@ -2229,7 +2199,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
domain->geometry.force_aperture = true;
ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
return ret;
......@@ -2398,6 +2368,14 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master)
master->domain = NULL;
master->ats_enabled = false;
arm_smmu_install_ste_for_dev(master);
/*
* Clearing the CD entry isn't strictly required to detach the domain
* since the table is uninstalled anyway, but it helps avoid confusion
* in the call to arm_smmu_write_ctx_desc on the next attach (which
* expects the entry to be empty).
*/
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
......@@ -2431,23 +2409,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (!smmu_domain->smmu) {
smmu_domain->smmu = smmu;
ret = arm_smmu_domain_finalise(domain, master);
if (ret) {
ret = arm_smmu_domain_finalise(domain);
if (ret)
smmu_domain->smmu = NULL;
goto out_unlock;
}
} else if (smmu_domain->smmu != smmu) {
} else if (smmu_domain->smmu != smmu)
ret = -EINVAL;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
ret = -EINVAL;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
smmu_domain->stall_enabled != master->stall_enabled) {
ret = -EINVAL;
goto out_unlock;
}
mutex_unlock(&smmu_domain->init_mutex);
if (ret)
return ret;
master->domain = smmu_domain;
......@@ -2461,16 +2431,42 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
master->ats_enabled = arm_smmu_ats_supported(master);
arm_smmu_install_ste_for_dev(master);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_add(&master->domain_head, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
if (!master->cd_table.cdtab) {
ret = arm_smmu_alloc_cd_tables(master);
if (ret) {
master->domain = NULL;
goto out_list_del;
}
}
/*
* Prevent SVA from concurrently modifying the CD or writing to
* the CD entry
*/
mutex_lock(&arm_smmu_asid_lock);
ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
mutex_unlock(&arm_smmu_asid_lock);
if (ret) {
master->domain = NULL;
goto out_list_del;
}
}
arm_smmu_install_ste_for_dev(master);
arm_smmu_enable_ats(master);
return 0;
out_list_del:
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_del(&master->domain_head);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
return ret;
}
......@@ -2715,6 +2711,8 @@ static void arm_smmu_release_device(struct device *dev)
arm_smmu_detach_dev(master);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
if (master->cd_table.cdtab)
arm_smmu_free_cd_tables(master);
kfree(master);
}
......
......@@ -595,13 +595,11 @@ struct arm_smmu_ctx_desc_cfg {
dma_addr_t cdtab_dma;
struct arm_smmu_l1_ctx_desc *l1_desc;
unsigned int num_l1_ents;
};
struct arm_smmu_s1_cfg {
struct arm_smmu_ctx_desc_cfg cdcfg;
struct arm_smmu_ctx_desc cd;
u8 s1fmt;
/* log2 of the maximum number of CDs supported by this table */
u8 s1cdmax;
/* Whether CD entries in this table have the stall bit set. */
u8 stall_enabled:1;
};
struct arm_smmu_s2_cfg {
......@@ -697,6 +695,8 @@ struct arm_smmu_master {
struct arm_smmu_domain *domain;
struct list_head domain_head;
struct arm_smmu_stream *streams;
/* Locked by the iommu core using the group mutex */
struct arm_smmu_ctx_desc_cfg cd_table;
unsigned int num_streams;
bool ats_enabled;
bool stall_enabled;
......@@ -719,13 +719,12 @@ struct arm_smmu_domain {
struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
bool stall_enabled;
atomic_t nr_ats_masters;
enum arm_smmu_domain_stage stage;
union {
struct arm_smmu_s1_cfg s1_cfg;
struct arm_smmu_s2_cfg s2_cfg;
struct arm_smmu_ctx_desc cd;
struct arm_smmu_s2_cfg s2_cfg;
};
struct iommu_domain domain;
......@@ -745,7 +744,7 @@ extern struct xarray arm_smmu_asid_xa;
extern struct mutex arm_smmu_asid_lock;
extern struct arm_smmu_ctx_desc quiet_cd;
int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
struct arm_smmu_ctx_desc *cd);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
......
......@@ -251,6 +251,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sc7280-mss-pil" },
{ .compatible = "qcom,sc8180x-mdss" },
{ .compatible = "qcom,sc8280xp-mdss" },
{ .compatible = "qcom,sdm670-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
{ .compatible = "qcom,sm6350-mdss" },
......@@ -532,6 +533,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm7150-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment