Commit 7296f230 authored by Michael Kelley's avatar Michael Kelley Committed by Christoph Hellwig

swiotlb: reduce swiotlb pool lookups

With CONFIG_SWIOTLB_DYNAMIC enabled, each round-trip map/unmap pair
in the swiotlb results in 6 calls to swiotlb_find_pool(). In multiple
places, the pool is found and used in one function, and then must
be found again in the next function that is called because only the
tlb_addr is passed as an argument. These are the six call sites:

dma_direct_map_page:
 1. swiotlb_map -> swiotlb_tbl_map_single -> swiotlb_bounce

dma_direct_unmap_page:
 2. dma_direct_sync_single_for_cpu -> is_swiotlb_buffer
 3. dma_direct_sync_single_for_cpu -> swiotlb_sync_single_for_cpu ->
	swiotlb_bounce
 4. is_swiotlb_buffer
 5. swiotlb_tbl_unmap_single -> swiotlb_del_transient
 6. swiotlb_tbl_unmap_single -> swiotlb_release_slots

Reduce the number of calls by finding the pool at a higher level, and
passing it as an argument instead of searching again. A key change is
for is_swiotlb_buffer() to return a pool pointer instead of a boolean,
and then pass this pool pointer to subsequent swiotlb functions.

There are 9 occurrences of is_swiotlb_buffer() used to test if a buffer
is a swiotlb buffer before calling a swiotlb function. To reduce code
duplication in getting the pool pointer and passing it as an argument,
introduce inline wrappers for this pattern. The generated code is
essentially unchanged.

Since is_swiotlb_buffer() no longer returns a boolean, rename some
functions to reflect the change:

 * swiotlb_find_pool() becomes __swiotlb_find_pool()
 * is_swiotlb_buffer() becomes swiotlb_find_pool()
 * is_xen_swiotlb_buffer() becomes xen_swiotlb_find_pool()

With these changes, a round-trip map/unmap pair requires only 2 pool
lookups (listed using the new names and wrappers):

dma_direct_unmap_page:
 1. dma_direct_sync_single_for_cpu -> swiotlb_find_pool
 2. swiotlb_tbl_unmap_single -> swiotlb_find_pool

These changes come from noticing the inefficiencies in a code review,
not from performance measurements. With CONFIG_SWIOTLB_DYNAMIC,
__swiotlb_find_pool() is not trivial, and it uses an RCU read lock,
so avoiding the redundant calls helps performance in a hot path.
When CONFIG_SWIOTLB_DYNAMIC is *not* set, the code size reduction
is minimal and the perf benefits are likely negligible, but no
harm is done.

No functional change is intended.
Signed-off-by: default avatarMichael Kelley <mhklinux@outlook.com>
Reviewed-by: default avatarPetr Tesarik <petr@tesarici.cz>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 54624acf
...@@ -1081,7 +1081,6 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev, ...@@ -1081,7 +1081,6 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev)) if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(phys, size, dir); arch_sync_dma_for_cpu(phys, size, dir);
if (is_swiotlb_buffer(dev, phys))
swiotlb_sync_single_for_cpu(dev, phys, size, dir); swiotlb_sync_single_for_cpu(dev, phys, size, dir);
} }
...@@ -1094,7 +1093,6 @@ static void iommu_dma_sync_single_for_device(struct device *dev, ...@@ -1094,7 +1093,6 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
return; return;
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
if (is_swiotlb_buffer(dev, phys))
swiotlb_sync_single_for_device(dev, phys, size, dir); swiotlb_sync_single_for_device(dev, phys, size, dir);
if (!dev_is_dma_coherent(dev)) if (!dev_is_dma_coherent(dev))
...@@ -1189,7 +1187,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, ...@@ -1189,7 +1187,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
arch_sync_dma_for_device(phys, size, dir); arch_sync_dma_for_device(phys, size, dir);
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) if (iova == DMA_MAPPING_ERROR)
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova; return iova;
} }
...@@ -1209,7 +1207,6 @@ static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, ...@@ -1209,7 +1207,6 @@ static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
__iommu_dma_unmap(dev, dma_handle, size); __iommu_dma_unmap(dev, dma_handle, size);
if (unlikely(is_swiotlb_buffer(dev, phys)))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
} }
......
...@@ -88,7 +88,8 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) ...@@ -88,7 +88,8 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
return 0; return 0;
} }
static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) static struct io_tlb_pool *xen_swiotlb_find_pool(struct device *dev,
dma_addr_t dma_addr)
{ {
unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr)); unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
unsigned long xen_pfn = bfn_to_local_pfn(bfn); unsigned long xen_pfn = bfn_to_local_pfn(bfn);
...@@ -99,8 +100,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) ...@@ -99,8 +100,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
* in our domain. Therefore _only_ check address within our domain. * in our domain. Therefore _only_ check address within our domain.
*/ */
if (pfn_valid(PFN_DOWN(paddr))) if (pfn_valid(PFN_DOWN(paddr)))
return is_swiotlb_buffer(dev, paddr); return swiotlb_find_pool(dev, paddr);
return 0; return NULL;
} }
#ifdef CONFIG_X86 #ifdef CONFIG_X86
...@@ -227,8 +228,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, ...@@ -227,8 +228,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* Ensure that the address returned is DMA'ble * Ensure that the address returned is DMA'ble
*/ */
if (unlikely(!dma_capable(dev, dev_addr, size, true))) { if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
swiotlb_tbl_unmap_single(dev, map, size, dir, __swiotlb_tbl_unmap_single(dev, map, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC); attrs | DMA_ATTR_SKIP_CPU_SYNC,
swiotlb_find_pool(dev, map));
return DMA_MAPPING_ERROR; return DMA_MAPPING_ERROR;
} }
...@@ -254,6 +256,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, ...@@ -254,6 +256,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs) size_t size, enum dma_data_direction dir, unsigned long attrs)
{ {
phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr); phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
struct io_tlb_pool *pool;
BUG_ON(dir == DMA_NONE); BUG_ON(dir == DMA_NONE);
...@@ -265,8 +268,10 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, ...@@ -265,8 +268,10 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
} }
/* NOTE: We use dev_addr here, not paddr! */ /* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(hwdev, dev_addr)) pool = xen_swiotlb_find_pool(hwdev, dev_addr);
swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); if (pool)
__swiotlb_tbl_unmap_single(hwdev, paddr, size, dir,
attrs, pool);
} }
static void static void
...@@ -274,6 +279,7 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, ...@@ -274,6 +279,7 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir) size_t size, enum dma_data_direction dir)
{ {
phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
struct io_tlb_pool *pool;
if (!dev_is_dma_coherent(dev)) { if (!dev_is_dma_coherent(dev)) {
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
...@@ -282,8 +288,9 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, ...@@ -282,8 +288,9 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
xen_dma_sync_for_cpu(dev, dma_addr, size, dir); xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
} }
if (is_xen_swiotlb_buffer(dev, dma_addr)) pool = xen_swiotlb_find_pool(dev, dma_addr);
swiotlb_sync_single_for_cpu(dev, paddr, size, dir); if (pool)
__swiotlb_sync_single_for_cpu(dev, paddr, size, dir, pool);
} }
static void static void
...@@ -291,9 +298,11 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, ...@@ -291,9 +298,11 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir) size_t size, enum dma_data_direction dir)
{ {
phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
struct io_tlb_pool *pool;
if (is_xen_swiotlb_buffer(dev, dma_addr)) pool = xen_swiotlb_find_pool(dev, dma_addr);
swiotlb_sync_single_for_device(dev, paddr, size, dir); if (pool)
__swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
if (!dev_is_dma_coherent(dev)) { if (!dev_is_dma_coherent(dev)) {
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
......
...@@ -332,7 +332,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) ...@@ -332,7 +332,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
* Description: * Description:
* Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all * Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all
* elements may have been bounced, so the caller would have to check * elements may have been bounced, so the caller would have to check
* individual SG entries with is_swiotlb_buffer(). * individual SG entries with swiotlb_find_pool().
*/ */
static inline bool sg_dma_is_swiotlb(struct scatterlist *sg) static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
{ {
......
...@@ -42,24 +42,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, ...@@ -42,24 +42,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
int (*remap)(void *tlb, unsigned long nslabs)); int (*remap)(void *tlb, unsigned long nslabs));
extern void __init swiotlb_update_mem_attributes(void); extern void __init swiotlb_update_mem_attributes(void);
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
size_t mapping_size,
unsigned int alloc_aligned_mask, enum dma_data_direction dir,
unsigned long attrs);
extern void swiotlb_tbl_unmap_single(struct device *hwdev,
phys_addr_t tlb_addr,
size_t mapping_size,
enum dma_data_direction dir,
unsigned long attrs);
void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir);
void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir);
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir, unsigned long attrs);
#ifdef CONFIG_SWIOTLB #ifdef CONFIG_SWIOTLB
/** /**
...@@ -143,37 +125,27 @@ struct io_tlb_mem { ...@@ -143,37 +125,27 @@ struct io_tlb_mem {
#endif #endif
}; };
#ifdef CONFIG_SWIOTLB_DYNAMIC struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
#else
static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
phys_addr_t paddr)
{
return &dev->dma_io_tlb_mem->defpool;
}
#endif
/** /**
* is_swiotlb_buffer() - check if a physical address belongs to a swiotlb * swiotlb_find_pool() - find swiotlb pool to which a physical address belongs
* @dev: Device which has mapped the buffer. * @dev: Device which has mapped the buffer.
* @paddr: Physical address within the DMA buffer. * @paddr: Physical address within the DMA buffer.
* *
* Check if @paddr points into a bounce buffer. * Find the swiotlb pool that @paddr points into.
* *
* Return: * Return:
* * %true if @paddr points into a bounce buffer * * pool address if @paddr points into a bounce buffer
* * %false otherwise * * NULL if @paddr does not point into a bounce buffer. As such, this function
* can be used to determine if @paddr denotes a swiotlb bounce buffer.
*/ */
static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
phys_addr_t paddr)
{ {
struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
if (!mem) if (!mem)
return false; return NULL;
#ifdef CONFIG_SWIOTLB_DYNAMIC #ifdef CONFIG_SWIOTLB_DYNAMIC
/* /*
...@@ -182,16 +154,19 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) ...@@ -182,16 +154,19 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
* If a SWIOTLB address is checked on another CPU, then it was * If a SWIOTLB address is checked on another CPU, then it was
* presumably loaded by the device driver from an unspecified private * presumably loaded by the device driver from an unspecified private
* data structure. Make sure that this load is ordered before reading * data structure. Make sure that this load is ordered before reading
* dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool(). * dev->dma_uses_io_tlb here and mem->pools in __swiotlb_find_pool().
* *
* This barrier pairs with smp_mb() in swiotlb_find_slots(). * This barrier pairs with smp_mb() in swiotlb_find_slots().
*/ */
smp_rmb(); smp_rmb();
return READ_ONCE(dev->dma_uses_io_tlb) && if (READ_ONCE(dev->dma_uses_io_tlb))
swiotlb_find_pool(dev, paddr); return __swiotlb_find_pool(dev, paddr);
#else #else
return paddr >= mem->defpool.start && paddr < mem->defpool.end; if (paddr >= mem->defpool.start && paddr < mem->defpool.end)
return &mem->defpool;
#endif #endif
return NULL;
} }
static inline bool is_swiotlb_force_bounce(struct device *dev) static inline bool is_swiotlb_force_bounce(struct device *dev)
...@@ -219,9 +194,10 @@ static inline void swiotlb_dev_init(struct device *dev) ...@@ -219,9 +194,10 @@ static inline void swiotlb_dev_init(struct device *dev)
{ {
} }
static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
phys_addr_t paddr)
{ {
return false; return NULL;
} }
static inline bool is_swiotlb_force_bounce(struct device *dev) static inline bool is_swiotlb_force_bounce(struct device *dev)
{ {
...@@ -260,6 +236,49 @@ static inline phys_addr_t default_swiotlb_limit(void) ...@@ -260,6 +236,49 @@ static inline phys_addr_t default_swiotlb_limit(void)
} }
#endif /* CONFIG_SWIOTLB */ #endif /* CONFIG_SWIOTLB */
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
size_t mapping_size, unsigned int alloc_aligned_mask,
enum dma_data_direction dir, unsigned long attrs);
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir, unsigned long attrs);
void __swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
size_t mapping_size, enum dma_data_direction dir,
unsigned long attrs, struct io_tlb_pool *pool);
static inline void swiotlb_tbl_unmap_single(struct device *dev,
phys_addr_t addr, size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
if (unlikely(pool))
__swiotlb_tbl_unmap_single(dev, addr, size, dir, attrs, pool);
}
void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir,
struct io_tlb_pool *pool);
static inline void swiotlb_sync_single_for_device(struct device *dev,
phys_addr_t addr, size_t size, enum dma_data_direction dir)
{
struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
if (unlikely(pool))
__swiotlb_sync_single_for_device(dev, addr, size, dir, pool);
}
void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir,
struct io_tlb_pool *pool);
static inline void swiotlb_sync_single_for_cpu(struct device *dev,
phys_addr_t addr, size_t size, enum dma_data_direction dir)
{
struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
if (unlikely(pool))
__swiotlb_sync_single_for_cpu(dev, addr, size, dir, pool);
}
extern void swiotlb_print_info(void); extern void swiotlb_print_info(void);
#ifdef CONFIG_DMA_RESTRICTED_POOL #ifdef CONFIG_DMA_RESTRICTED_POOL
......
...@@ -404,9 +404,7 @@ void dma_direct_sync_sg_for_device(struct device *dev, ...@@ -404,9 +404,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
for_each_sg(sgl, sg, nents, i) { for_each_sg(sgl, sg, nents, i) {
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
if (unlikely(is_swiotlb_buffer(dev, paddr))) swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
swiotlb_sync_single_for_device(dev, paddr, sg->length,
dir);
if (!dev_is_dma_coherent(dev)) if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(paddr, sg->length, arch_sync_dma_for_device(paddr, sg->length,
...@@ -430,9 +428,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, ...@@ -430,9 +428,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev)) if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(paddr, sg->length, dir); arch_sync_dma_for_cpu(paddr, sg->length, dir);
if (unlikely(is_swiotlb_buffer(dev, paddr))) swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
dir);
if (dir == DMA_FROM_DEVICE) if (dir == DMA_FROM_DEVICE)
arch_dma_mark_clean(paddr, sg->length); arch_dma_mark_clean(paddr, sg->length);
...@@ -640,7 +636,7 @@ size_t dma_direct_max_mapping_size(struct device *dev) ...@@ -640,7 +636,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
{ {
return !dev_is_dma_coherent(dev) || return !dev_is_dma_coherent(dev) ||
is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr)); swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr));
} }
/** /**
......
...@@ -58,7 +58,6 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, ...@@ -58,7 +58,6 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
{ {
phys_addr_t paddr = dma_to_phys(dev, addr); phys_addr_t paddr = dma_to_phys(dev, addr);
if (unlikely(is_swiotlb_buffer(dev, paddr)))
swiotlb_sync_single_for_device(dev, paddr, size, dir); swiotlb_sync_single_for_device(dev, paddr, size, dir);
if (!dev_is_dma_coherent(dev)) if (!dev_is_dma_coherent(dev))
...@@ -75,7 +74,6 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, ...@@ -75,7 +74,6 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
arch_sync_dma_for_cpu_all(); arch_sync_dma_for_cpu_all();
} }
if (unlikely(is_swiotlb_buffer(dev, paddr)))
swiotlb_sync_single_for_cpu(dev, paddr, size, dir); swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
if (dir == DMA_FROM_DEVICE) if (dir == DMA_FROM_DEVICE)
...@@ -121,7 +119,6 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, ...@@ -121,7 +119,6 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
dma_direct_sync_single_for_cpu(dev, addr, size, dir); dma_direct_sync_single_for_cpu(dev, addr, size, dir);
if (unlikely(is_swiotlb_buffer(dev, phys)))
swiotlb_tbl_unmap_single(dev, phys, size, dir, swiotlb_tbl_unmap_single(dev, phys, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC); attrs | DMA_ATTR_SKIP_CPU_SYNC);
} }
......
...@@ -763,16 +763,18 @@ static void swiotlb_dyn_free(struct rcu_head *rcu) ...@@ -763,16 +763,18 @@ static void swiotlb_dyn_free(struct rcu_head *rcu)
} }
/** /**
* swiotlb_find_pool() - find the IO TLB pool for a physical address * __swiotlb_find_pool() - find the IO TLB pool for a physical address
* @dev: Device which has mapped the DMA buffer. * @dev: Device which has mapped the DMA buffer.
* @paddr: Physical address within the DMA buffer. * @paddr: Physical address within the DMA buffer.
* *
* Find the IO TLB memory pool descriptor which contains the given physical * Find the IO TLB memory pool descriptor which contains the given physical
* address, if any. * address, if any. This function is for use only when the dev is known to
* be using swiotlb. Use swiotlb_find_pool() for the more general case
* when this condition is not met.
* *
* Return: Memory pool which contains @paddr, or %NULL if none. * Return: Memory pool which contains @paddr, or %NULL if none.
*/ */
struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
{ {
struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
struct io_tlb_pool *pool; struct io_tlb_pool *pool;
...@@ -855,9 +857,8 @@ static unsigned int swiotlb_align_offset(struct device *dev, ...@@ -855,9 +857,8 @@ static unsigned int swiotlb_align_offset(struct device *dev,
* Bounce: copy the swiotlb buffer from or back to the original dma location * Bounce: copy the swiotlb buffer from or back to the original dma location
*/ */
static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
enum dma_data_direction dir) enum dma_data_direction dir, struct io_tlb_pool *mem)
{ {
struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = mem->slots[index].orig_addr; phys_addr_t orig_addr = mem->slots[index].orig_addr;
size_t alloc_size = mem->slots[index].alloc_size; size_t alloc_size = mem->slots[index].alloc_size;
...@@ -1243,7 +1244,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, ...@@ -1243,7 +1244,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
* that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy
* atomicity). * atomicity).
* *
* See also the comment in is_swiotlb_buffer(). * See also the comment in swiotlb_find_pool().
*/ */
smp_mb(); smp_mb();
...@@ -1435,13 +1436,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, ...@@ -1435,13 +1436,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
* hardware behavior. Use of swiotlb is supposed to be transparent, * hardware behavior. Use of swiotlb is supposed to be transparent,
* i.e. swiotlb must not corrupt memory by clobbering unwritten bytes. * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes.
*/ */
swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool);
return tlb_addr; return tlb_addr;
} }
static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr,
struct io_tlb_pool *mem)
{ {
struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
unsigned long flags; unsigned long flags;
unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
int index, nslots, aindex; int index, nslots, aindex;
...@@ -1505,11 +1506,9 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) ...@@ -1505,11 +1506,9 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
* *
* Return: %true if @tlb_addr belonged to a transient pool that was released. * Return: %true if @tlb_addr belonged to a transient pool that was released.
*/ */
static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr,
struct io_tlb_pool *pool)
{ {
struct io_tlb_pool *pool;
pool = swiotlb_find_pool(dev, tlb_addr);
if (!pool->transient) if (!pool->transient)
return false; return false;
...@@ -1522,7 +1521,7 @@ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) ...@@ -1522,7 +1521,7 @@ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr)
#else /* !CONFIG_SWIOTLB_DYNAMIC */ #else /* !CONFIG_SWIOTLB_DYNAMIC */
static inline bool swiotlb_del_transient(struct device *dev, static inline bool swiotlb_del_transient(struct device *dev,
phys_addr_t tlb_addr) phys_addr_t tlb_addr, struct io_tlb_pool *pool)
{ {
return false; return false;
} }
...@@ -1532,36 +1531,39 @@ static inline bool swiotlb_del_transient(struct device *dev, ...@@ -1532,36 +1531,39 @@ static inline bool swiotlb_del_transient(struct device *dev,
/* /*
* tlb_addr is the physical address of the bounce buffer to unmap. * tlb_addr is the physical address of the bounce buffer to unmap.
*/ */
void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
size_t mapping_size, enum dma_data_direction dir, size_t mapping_size, enum dma_data_direction dir,
unsigned long attrs) unsigned long attrs, struct io_tlb_pool *pool)
{ {
/* /*
* First, sync the memory before unmapping the entry * First, sync the memory before unmapping the entry
*/ */
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE); swiotlb_bounce(dev, tlb_addr, mapping_size,
DMA_FROM_DEVICE, pool);
if (swiotlb_del_transient(dev, tlb_addr)) if (swiotlb_del_transient(dev, tlb_addr, pool))
return; return;
swiotlb_release_slots(dev, tlb_addr); swiotlb_release_slots(dev, tlb_addr, pool);
} }
void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir) size_t size, enum dma_data_direction dir,
struct io_tlb_pool *pool)
{ {
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool);
else else
BUG_ON(dir != DMA_FROM_DEVICE); BUG_ON(dir != DMA_FROM_DEVICE);
} }
void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir) size_t size, enum dma_data_direction dir,
struct io_tlb_pool *pool)
{ {
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE); swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool);
else else
BUG_ON(dir != DMA_TO_DEVICE); BUG_ON(dir != DMA_TO_DEVICE);
} }
...@@ -1585,8 +1587,9 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, ...@@ -1585,8 +1587,9 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
/* Ensure that the address returned is DMA'ble */ /* Ensure that the address returned is DMA'ble */
dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
if (unlikely(!dma_capable(dev, dma_addr, size, true))) { if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC); attrs | DMA_ATTR_SKIP_CPU_SYNC,
swiotlb_find_pool(dev, swiotlb_addr));
dev_WARN_ONCE(dev, 1, dev_WARN_ONCE(dev, 1,
"swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
...@@ -1764,7 +1767,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) ...@@ -1764,7 +1767,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
if (unlikely(!PAGE_ALIGNED(tlb_addr))) { if (unlikely(!PAGE_ALIGNED(tlb_addr))) {
dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n",
&tlb_addr); &tlb_addr);
swiotlb_release_slots(dev, tlb_addr); swiotlb_release_slots(dev, tlb_addr, pool);
return NULL; return NULL;
} }
...@@ -1774,11 +1777,13 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) ...@@ -1774,11 +1777,13 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
bool swiotlb_free(struct device *dev, struct page *page, size_t size) bool swiotlb_free(struct device *dev, struct page *page, size_t size)
{ {
phys_addr_t tlb_addr = page_to_phys(page); phys_addr_t tlb_addr = page_to_phys(page);
struct io_tlb_pool *pool;
if (!is_swiotlb_buffer(dev, tlb_addr)) pool = swiotlb_find_pool(dev, tlb_addr);
if (!pool)
return false; return false;
swiotlb_release_slots(dev, tlb_addr); swiotlb_release_slots(dev, tlb_addr, pool);
return true; return true;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment