Commit c76c067e authored by Niklas Schnelle's avatar Niklas Schnelle Committed by Joerg Roedel

s390/pci: Use dma-iommu layer

While s390 already has a standard IOMMU driver and previous changes have
added I/O TLB flushing operations this driver is currently only used for
user-space PCI access such as vfio-pci. For the DMA API s390 instead
utilizes its own implementation in arch/s390/pci/pci_dma.c which drives
the same hardware and shares some code but requires a complex and
fragile hand over between DMA API and IOMMU API use of a device and
despite code sharing still leads to significant duplication and
maintenance effort. Let's utilize the common code DMAP API
implementation from drivers/iommu/dma-iommu.c instead allowing us to
get rid of arch/s390/pci/pci_dma.c.
Reviewed-by: default avatarMatthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: default avatarNiklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20230928-dma_iommu-v13-3-9e5fc4dacc36@linux.ibm.comSigned-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent b6f88870
......@@ -2220,7 +2220,7 @@
forcing Dual Address Cycle for PCI cards supporting
greater than 32-bit addressing.
iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour
iommu.strict= [ARM64, X86, S390] Configure TLB invalidation behaviour
Format: { "0" | "1" }
0 - Lazy mode.
Request that DMA unmap operations use deferred
......@@ -5611,9 +5611,10 @@
s390_iommu= [HW,S390]
Set s390 IOTLB flushing mode
strict
With strict flushing every unmap operation will result in
an IOTLB flush. Default is lazy flushing before reuse,
which is faster.
With strict flushing every unmap operation will result
in an IOTLB flush. Default is lazy flushing before
reuse, which is faster. Deprecated, equivalent to
iommu.strict=1.
s390_iommu_aperture= [KNL,S390]
Specifies the size of the per device DMA address space
......
......@@ -159,13 +159,6 @@ struct zpci_dev {
unsigned long *dma_table;
int tlb_refresh;
spinlock_t iommu_bitmap_lock;
unsigned long *iommu_bitmap;
unsigned long *lazy_bitmap;
unsigned long iommu_size;
unsigned long iommu_pages;
unsigned int next_bit;
struct iommu_device iommu_dev; /* IOMMU core handle */
char res_name[16];
......
......@@ -50,6 +50,9 @@ struct clp_fh_list_entry {
#define CLP_UTIL_STR_LEN 64
#define CLP_PFIP_NR_SEGMENTS 4
/* PCI function type numbers */
#define PCI_FUNC_TYPE_ISM 0x5 /* ISM device */
extern bool zpci_unique_uid;
struct clp_rsp_slpc_pci {
......
......@@ -82,117 +82,16 @@ enum zpci_ioat_dtype {
#define ZPCI_TABLE_VALID_MASK 0x20
#define ZPCI_TABLE_PROT_MASK 0x200
static inline unsigned int calc_rtx(dma_addr_t ptr)
{
return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
}
static inline unsigned int calc_sx(dma_addr_t ptr)
{
return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
}
static inline unsigned int calc_px(dma_addr_t ptr)
{
return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
}
static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
{
*entry &= ZPCI_PTE_FLAG_MASK;
*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
}
static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
{
*entry &= ZPCI_RTE_FLAG_MASK;
*entry |= (sto & ZPCI_RTE_ADDR_MASK);
*entry |= ZPCI_TABLE_TYPE_RTX;
}
static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
{
*entry &= ZPCI_STE_FLAG_MASK;
*entry |= (pto & ZPCI_STE_ADDR_MASK);
*entry |= ZPCI_TABLE_TYPE_SX;
}
static inline void validate_rt_entry(unsigned long *entry)
{
*entry &= ~ZPCI_TABLE_VALID_MASK;
*entry &= ~ZPCI_TABLE_OFFSET_MASK;
*entry |= ZPCI_TABLE_VALID;
*entry |= ZPCI_TABLE_LEN_RTX;
}
static inline void validate_st_entry(unsigned long *entry)
{
*entry &= ~ZPCI_TABLE_VALID_MASK;
*entry |= ZPCI_TABLE_VALID;
}
static inline void invalidate_pt_entry(unsigned long *entry)
{
WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
*entry &= ~ZPCI_PTE_VALID_MASK;
*entry |= ZPCI_PTE_INVALID;
}
static inline void validate_pt_entry(unsigned long *entry)
{
WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
*entry &= ~ZPCI_PTE_VALID_MASK;
*entry |= ZPCI_PTE_VALID;
}
static inline void entry_set_protected(unsigned long *entry)
{
*entry &= ~ZPCI_TABLE_PROT_MASK;
*entry |= ZPCI_TABLE_PROTECTED;
}
static inline void entry_clr_protected(unsigned long *entry)
{
*entry &= ~ZPCI_TABLE_PROT_MASK;
*entry |= ZPCI_TABLE_UNPROTECTED;
}
static inline int reg_entry_isvalid(unsigned long entry)
{
return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
}
static inline int pt_entry_isvalid(unsigned long entry)
{
return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
}
static inline unsigned long *get_rt_sto(unsigned long entry)
{
if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
else
return NULL;
}
static inline unsigned long *get_st_pto(unsigned long entry)
{
if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
else
return NULL;
}
/* Prototypes */
void dma_free_seg_table(unsigned long);
unsigned long *dma_alloc_cpu_table(gfp_t gfp);
void dma_cleanup_tables(unsigned long *);
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
gfp_t gfp);
void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
extern const struct dma_map_ops s390_pci_dma_ops;
struct zpci_iommu_ctrs {
atomic64_t mapped_pages;
atomic64_t unmapped_pages;
atomic64_t global_rpcits;
atomic64_t sync_map_rpcits;
atomic64_t sync_rpcits;
};
struct zpci_dev;
struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev);
#endif
......@@ -3,7 +3,7 @@
# Makefile for the s390 PCI subsystem.
#
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
pci_bus.o pci_kvm_hook.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
......@@ -124,6 +124,10 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
/* Work around off by one in ISM virt device */
if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
fib.pal = limit + (1 << 12);
else
fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
fib.gd = zdev->gisa;
......@@ -582,7 +586,6 @@ int pcibios_device_add(struct pci_dev *pdev)
pdev->no_vf_scan = 1;
pdev->dev.groups = zpci_attr_groups;
pdev->dev.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
......@@ -756,8 +759,6 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
if (zdev->dma_table)
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(zdev->dma_table), &status);
else
rc = zpci_dma_init_device(zdev);
if (rc) {
zpci_disable_device(zdev);
return rc;
......@@ -865,11 +866,6 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
if (zdev->dma_table) {
rc = zpci_dma_exit_device(zdev);
if (rc)
return rc;
}
if (zdev_enabled(zdev)) {
rc = zpci_disable_device(zdev);
if (rc)
......@@ -918,8 +914,6 @@ void zpci_release_device(struct kref *kref)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
if (zdev->dma_table)
zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
......@@ -1109,10 +1103,6 @@ static int __init pci_base_init(void)
if (rc)
goto out_irq;
rc = zpci_dma_init();
if (rc)
goto out_dma;
rc = clp_scan_pci_devices();
if (rc)
goto out_find;
......@@ -1122,8 +1112,6 @@ static int __init pci_base_init(void)
return 0;
out_find:
zpci_dma_exit();
out_dma:
zpci_irq_exit();
out_irq:
zpci_mem_exit();
......
......@@ -47,11 +47,6 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
rc = zpci_enable_device(zdev);
if (rc)
return rc;
rc = zpci_dma_init_device(zdev);
if (rc) {
zpci_disable_device(zdev);
return rc;
}
}
if (!zdev->has_resources) {
......
......@@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = {
};
static char *pci_sw_names[] = {
"Allocated pages",
"Mapped pages",
"Unmapped pages",
"Global RPCITs",
"Sync Map RPCITs",
"Sync RPCITs",
};
static void pci_fmb_show(struct seq_file *m, char *name[], int length,
......@@ -69,10 +71,14 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
static void pci_sw_counter_show(struct seq_file *m)
{
struct zpci_dev *zdev = m->private;
atomic64_t *counter = &zdev->allocated_pages;
struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private);
atomic64_t *counter;
int i;
if (!ctrs)
return;
counter = &ctrs->mapped_pages;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
......
This diff is collapsed.
......@@ -313,8 +313,6 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
/* Even though the device is already gone we still
* need to free zPCI resources as part of the disable.
*/
if (zdev->dma_table)
zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
......
......@@ -56,6 +56,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
struct pci_dev *pdev = to_pci_dev(dev);
struct zpci_dev *zdev = to_zpci(pdev);
int ret = 0;
u8 status;
/* Can't use device_remove_self() here as that would lead us to lock
* the pci_rescan_remove_lock while holding the device' kernfs lock.
......@@ -82,12 +83,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
pci_lock_rescan_remove();
if (pci_dev_is_added(pdev)) {
pci_stop_and_remove_bus_device(pdev);
if (zdev->dma_table) {
ret = zpci_dma_exit_device(zdev);
if (ret)
goto out;
}
if (zdev_enabled(zdev)) {
ret = zpci_disable_device(zdev);
/*
......@@ -105,14 +100,16 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
ret = zpci_enable_device(zdev);
if (ret)
goto out;
ret = zpci_dma_init_device(zdev);
if (ret) {
if (zdev->dma_table) {
ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(zdev->dma_table), &status);
if (ret)
zpci_disable_device(zdev);
goto out;
}
pci_rescan_bus(zdev->zbus->bus);
}
out:
pci_rescan_bus(zdev->zbus->bus);
pci_unlock_rescan_remove();
if (kn)
sysfs_unbreak_active_protection(kn);
......
......@@ -91,7 +91,7 @@ config IOMMU_DEBUGFS
choice
prompt "IOMMU default domain type"
depends on IOMMU_API
default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64
default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64 || S390
default IOMMU_DEFAULT_DMA_STRICT
help
Choose the type of IOMMU domain used to manage DMA API usage by
......@@ -146,7 +146,7 @@ config OF_IOMMU
# IOMMU-agnostic DMA-mapping layer
config IOMMU_DMA
def_bool ARM64 || IA64 || X86
def_bool ARM64 || IA64 || X86 || S390
select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment