Commit ff5163bb authored by Gaurav Batra's avatar Gaurav Batra Committed by Michael Ellerman

powerpc/pseries/iommu: Split Dynamic DMA Window to be used in Hybrid mode

Dynamic DMA Window (DDW) supports TCEs that are backed by 2MB page
size. In most configurations, DDW is big enough to pre-map all of LPAR
memory for IO. Pre-mapping of memory for DMA results in improvements in
IO performance.

Persistent memory, vPMEM, can be assigned to an LPAR as well. vPMEM is
not contiguous with LPAR memory and usually is assigned at high memory
addresses.  This makes is not possible to pre-map both vPMEM and LPAR
memory in the same DDW.

For a dedicated adapter this limitation is not an issue. Dedicated
adapters can have both Default DMA window, which is backed by 4K page
size and a DDW backed by 2MB page size TCEs. In this scenario, LPAR
memory is pre-mapped in the DDW.  Any DMA going to the vPMEM is routed
via dynamically allocated TCEs in the default window.

The issue arises with SR-IOV adapters. There is only one DMA window -
either Default or DDW. If an LPAR has vPMEM assigned, memory is not
pre-mapped in the DDW since TCEs needs to be allocated for vPMEM as well.
In this case, DDW is created and TCEs are dynamically allocated for both
vPMEM and LPAR memory.

Today, DDW is only used in single mode - direct mapped TCEs or
dynamically mapped TCEs. This enhancement breaks a single DDW in 2
regions -

	1. First region to pre-map LPAR memory
	2. Second region to dynamically allocate TCEs for IO to vPMEM

The DDW is split only if it is big enough to pre-map complete LPAR
memory and still have some space left to dynamically map vPMEM. Maximum
size possible DDW is created as permitted by the Hypervisor.
Signed-off-by: default avatarGaurav Batra <gbatra@linux.ibm.com>
Reviewed-by: default avatarBrian King <brking@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20240514014608.35537-1-gbatra@linux.ibm.com
parent 214f33fc
......@@ -31,6 +31,8 @@
#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
#define MIN_DDW_VPMEM_DMA_WINDOW SZ_2G
/* Boot time flags */
extern int iommu_is_off;
extern int iommu_force_on;
......
......@@ -1304,7 +1304,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
struct ddw_query_response query;
struct ddw_create_response create;
int page_shift;
u64 win_addr;
u64 win_addr, dynamic_offset = 0;
const char *win_name;
struct device_node *dn;
u32 ddw_avail[DDW_APPLICABLE_SIZE];
......@@ -1312,6 +1312,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
struct property *win64;
struct failed_ddw_pdn *fpdn;
bool default_win_removed = false, direct_mapping = false;
bool dynamic_mapping = false;
bool pmem_present;
struct pci_dn *pci = PCI_DN(pdn);
struct property *default_win = NULL;
......@@ -1407,7 +1408,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
goto out_failed;
}
/*
* The "ibm,pmemory" can appear anywhere in the address space.
* Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
......@@ -1432,14 +1432,42 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
1ULL << page_shift);
len = order_base_2(query.largest_available_block << page_shift);
win_name = DMA64_PROPNAME;
dynamic_mapping = true;
} else {
direct_mapping = !default_win_removed ||
(len == MAX_PHYSMEM_BITS) ||
(!pmem_present && (len == max_ram_len));
win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;
/* DDW is big enough to direct map RAM. If there is vPMEM, check
* if enough space is left in DDW where we can dynamically
* allocate TCEs for vPMEM. For now, this Hybrid sharing of DDW
* is only for SR-IOV devices.
*/
if (default_win_removed && pmem_present && !direct_mapping) {
/* DDW is big enough to be split */
if ((query.largest_available_block << page_shift) >=
MIN_DDW_VPMEM_DMA_WINDOW + (1ULL << max_ram_len)) {
direct_mapping = true;
/* offset of the Dynamic part of DDW */
dynamic_offset = 1ULL << max_ram_len;
}
/* DDW will at least have dynamic allocation */
dynamic_mapping = true;
/* create max size DDW possible */
len = order_base_2(query.largest_available_block
<< page_shift);
}
}
/* Even if the DDW is split into both direct mapped RAM and dynamically
* mapped vPMEM, the DDW property in OF will be marked as Direct.
*/
win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;
ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
if (ret != 0)
goto out_failed;
......@@ -1467,9 +1495,9 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
if (!window)
goto out_del_prop;
if (direct_mapping) {
window->direct = true;
window->direct = direct_mapping;
if (direct_mapping) {
/* DDW maps the whole partition, so enable direct DMA mapping */
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
win64->value, tce_setrange_multi_pSeriesLP_walk);
......@@ -1481,12 +1509,13 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
clean_dma_window(pdn, win64->value);
goto out_del_list;
}
} else {
}
if (dynamic_mapping) {
struct iommu_table *newtbl;
int i;
unsigned long start = 0, end = 0;
window->direct = false;
u64 dynamic_addr, dynamic_len;
for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
......@@ -1506,8 +1535,15 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
goto out_del_list;
}
iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
/* If the DDW is split between directly mapped RAM and Dynamic
* mapped for TCES, offset into the DDW where the dynamic part
* begins.
*/
dynamic_addr = win_addr + dynamic_offset;
dynamic_len = (1UL << len) - dynamic_offset;
iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn,
dynamic_addr, dynamic_len, page_shift, NULL,
&iommu_table_lpar_multi_ops);
iommu_init_table(newtbl, pci->phb->node, start, end);
pci->table_group->tables[1] = newtbl;
......@@ -1559,13 +1595,12 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
out_unlock:
mutex_unlock(&dma_win_init_mutex);
/*
* If we have persistent memory and the window size is only as big
* as RAM, then we failed to create a window to cover persistent
* memory and need to set the DMA limit.
/* If we have persistent memory and the window size is not big enough
* to directly map both RAM and vPMEM, then we need to set DMA limit.
*/
if (pmem_present && direct_mapping && len == max_ram_len)
dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
if (pmem_present && direct_mapping && len != MAX_PHYSMEM_BITS)
dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset +
(1ULL << max_ram_len);
return direct_mapping;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment