Commit 6043257b authored by Jason Gunthorpe's avatar Jason Gunthorpe Committed by Joerg Roedel

iommu: Introduce the domain op enforce_cache_coherency()

This new mechanism will replace using IOMMU_CAP_CACHE_COHERENCY and
IOMMU_CACHE to control the no-snoop blocking behavior of the IOMMU.

Currently only Intel and AMD IOMMUs are known to support this
feature. They both implement it as an IOPTE bit, that when set, will cause
PCIe TLPs to that IOVA with the no-snoop bit set to be treated as though
the no-snoop bit was clear.

The new API is triggered by calling enforce_cache_coherency() before
mapping any IOVA to the domain which globally switches on no-snoop
blocking. This allows other implementations that might block no-snoop
globally and outside the IOPTE - AMD also documents such a HW capability.

Leave AMD out of sync with Intel and have it block no-snoop even for
in-kernel users. This can be trivially resolved in a follow up patch.

Only VFIO needs to call this API because it does not have detailed control
over the device to avoid requesting no-snoop behavior at the device
level. Other places using domains with real kernel drivers should simply
avoid asking their devices to set the no-snoop bit.
Reviewed-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Acked-by: default avatarRobin Murphy <robin.murphy@arm.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/1-v3-2cf356649677+a32-intel_no_snoop_jgg@nvidia.comSigned-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 5b1553bf
...@@ -2266,6 +2266,12 @@ static int amd_iommu_def_domain_type(struct device *dev) ...@@ -2266,6 +2266,12 @@ static int amd_iommu_def_domain_type(struct device *dev)
return 0; return 0;
} }
static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain)
{
/* IOMMU_PTE_FC is always set */
return true;
}
const struct iommu_ops amd_iommu_ops = { const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable, .capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc, .domain_alloc = amd_iommu_domain_alloc,
...@@ -2288,6 +2294,7 @@ const struct iommu_ops amd_iommu_ops = { ...@@ -2288,6 +2294,7 @@ const struct iommu_ops amd_iommu_ops = {
.flush_iotlb_all = amd_iommu_flush_iotlb_all, .flush_iotlb_all = amd_iommu_flush_iotlb_all,
.iotlb_sync = amd_iommu_iotlb_sync, .iotlb_sync = amd_iommu_iotlb_sync,
.free = amd_iommu_domain_free, .free = amd_iommu_domain_free,
.enforce_cache_coherency = amd_iommu_enforce_cache_coherency,
} }
}; };
......
...@@ -4422,7 +4422,8 @@ static int intel_iommu_map(struct iommu_domain *domain, ...@@ -4422,7 +4422,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
prot |= DMA_PTE_READ; prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE) if (iommu_prot & IOMMU_WRITE)
prot |= DMA_PTE_WRITE; prot |= DMA_PTE_WRITE;
if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) if (((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) ||
dmar_domain->force_snooping)
prot |= DMA_PTE_SNP; prot |= DMA_PTE_SNP;
max_addr = iova + size; max_addr = iova + size;
...@@ -4545,6 +4546,16 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, ...@@ -4545,6 +4546,16 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
return phys; return phys;
} }
static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
if (!dmar_domain->iommu_snooping)
return false;
dmar_domain->force_snooping = true;
return true;
}
static bool intel_iommu_capable(enum iommu_cap cap) static bool intel_iommu_capable(enum iommu_cap cap)
{ {
if (cap == IOMMU_CAP_CACHE_COHERENCY) if (cap == IOMMU_CAP_CACHE_COHERENCY)
...@@ -4900,6 +4911,7 @@ const struct iommu_ops intel_iommu_ops = { ...@@ -4900,6 +4911,7 @@ const struct iommu_ops intel_iommu_ops = {
.iotlb_sync = intel_iommu_tlb_sync, .iotlb_sync = intel_iommu_tlb_sync,
.iova_to_phys = intel_iommu_iova_to_phys, .iova_to_phys = intel_iommu_iova_to_phys,
.free = intel_iommu_domain_free, .free = intel_iommu_domain_free,
.enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
} }
}; };
......
...@@ -540,6 +540,7 @@ struct dmar_domain { ...@@ -540,6 +540,7 @@ struct dmar_domain {
u8 has_iotlb_device: 1; u8 has_iotlb_device: 1;
u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 iommu_coherency: 1; /* indicate coherency of iommu access */
u8 iommu_snooping: 1; /* indicate snooping control feature */ u8 iommu_snooping: 1; /* indicate snooping control feature */
u8 force_snooping : 1; /* Create IOPTEs with snoop control */
struct list_head devices; /* all devices' list */ struct list_head devices; /* all devices' list */
struct iova_domain iovad; /* iova's that belong to this domain */ struct iova_domain iovad; /* iova's that belong to this domain */
......
...@@ -274,6 +274,9 @@ struct iommu_ops { ...@@ -274,6 +274,9 @@ struct iommu_ops {
* @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
* queue * queue
* @iova_to_phys: translate iova to physical address * @iova_to_phys: translate iova to physical address
* @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE,
* including no-snoop TLPs on PCIe or other platform
* specific mechanisms.
* @enable_nesting: Enable nesting * @enable_nesting: Enable nesting
* @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
* @free: Release the domain after use. * @free: Release the domain after use.
...@@ -302,6 +305,7 @@ struct iommu_domain_ops { ...@@ -302,6 +305,7 @@ struct iommu_domain_ops {
phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
dma_addr_t iova); dma_addr_t iova);
bool (*enforce_cache_coherency)(struct iommu_domain *domain);
int (*enable_nesting)(struct iommu_domain *domain); int (*enable_nesting)(struct iommu_domain *domain);
int (*set_pgtable_quirks)(struct iommu_domain *domain, int (*set_pgtable_quirks)(struct iommu_domain *domain,
unsigned long quirks); unsigned long quirks);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment