Commit 893e2f9e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dma-mapping-6.8-2024-01-08' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

 - reduce area lock contention for non-primary IO TLB pools (Petr
   Tesarik)

 - don't store redundant offsets in the dma_ranges stuctures (Robin
   Murphy)

 - clear dev->dma_mem when freeing per-device pools (Joakim Zhang)

* tag 'dma-mapping-6.8-2024-01-08' of git://git.infradead.org/users/hch/dma-mapping:
  dma-mapping: clear dev->dma_mem to NULL after freeing it
  swiotlb: reduce area lock contention for non-primary IO TLB pools
  dma-mapping: don't store redundant offsets
parents 457e4f99 b07bc234
...@@ -1532,7 +1532,6 @@ int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) ...@@ -1532,7 +1532,6 @@ int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map)
r->cpu_start = rentry->res->start; r->cpu_start = rentry->res->start;
r->dma_start = rentry->res->start - rentry->offset; r->dma_start = rentry->res->start - rentry->offset;
r->size = resource_size(rentry->res); r->size = resource_size(rentry->res);
r->offset = rentry->offset;
r++; r++;
} }
} }
......
...@@ -955,7 +955,6 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) ...@@ -955,7 +955,6 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map)
r->cpu_start = range.cpu_addr; r->cpu_start = range.cpu_addr;
r->dma_start = range.bus_addr; r->dma_start = range.bus_addr;
r->size = range.size; r->size = range.size;
r->offset = range.cpu_addr - range.bus_addr;
r++; r++;
} }
out: out:
......
...@@ -21,7 +21,6 @@ struct bus_dma_region { ...@@ -21,7 +21,6 @@ struct bus_dma_region {
phys_addr_t cpu_start; phys_addr_t cpu_start;
dma_addr_t dma_start; dma_addr_t dma_start;
u64 size; u64 size;
u64 offset;
}; };
static inline dma_addr_t translate_phys_to_dma(struct device *dev, static inline dma_addr_t translate_phys_to_dma(struct device *dev,
...@@ -29,9 +28,12 @@ static inline dma_addr_t translate_phys_to_dma(struct device *dev, ...@@ -29,9 +28,12 @@ static inline dma_addr_t translate_phys_to_dma(struct device *dev,
{ {
const struct bus_dma_region *m; const struct bus_dma_region *m;
for (m = dev->dma_range_map; m->size; m++) for (m = dev->dma_range_map; m->size; m++) {
if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size) u64 offset = paddr - m->cpu_start;
return (dma_addr_t)paddr - m->offset;
if (paddr >= m->cpu_start && offset < m->size)
return m->dma_start + offset;
}
/* make sure dma_capable fails when no translation is available */ /* make sure dma_capable fails when no translation is available */
return DMA_MAPPING_ERROR; return DMA_MAPPING_ERROR;
...@@ -42,9 +44,12 @@ static inline phys_addr_t translate_dma_to_phys(struct device *dev, ...@@ -42,9 +44,12 @@ static inline phys_addr_t translate_dma_to_phys(struct device *dev,
{ {
const struct bus_dma_region *m; const struct bus_dma_region *m;
for (m = dev->dma_range_map; m->size; m++) for (m = dev->dma_range_map; m->size; m++) {
if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size) u64 offset = dma_addr - m->dma_start;
return (phys_addr_t)dma_addr + m->offset;
if (dma_addr >= m->dma_start && offset < m->size)
return m->cpu_start + offset;
}
return (phys_addr_t)-1; return (phys_addr_t)-1;
} }
......
...@@ -132,8 +132,10 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, ...@@ -132,8 +132,10 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
void dma_release_coherent_memory(struct device *dev) void dma_release_coherent_memory(struct device *dev)
{ {
if (dev) if (dev) {
_dma_release_coherent_memory(dev->dma_mem); _dma_release_coherent_memory(dev->dma_mem);
dev->dma_mem = NULL;
}
} }
static void *__dma_alloc_from_coherent(struct device *dev, static void *__dma_alloc_from_coherent(struct device *dev,
......
...@@ -677,7 +677,6 @@ int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, ...@@ -677,7 +677,6 @@ int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start,
return -ENOMEM; return -ENOMEM;
map[0].cpu_start = cpu_start; map[0].cpu_start = cpu_start;
map[0].dma_start = dma_start; map[0].dma_start = dma_start;
map[0].offset = offset;
map[0].size = size; map[0].size = size;
dev->dma_range_map = map; dev->dma_range_map = map;
return 0; return 0;
......
...@@ -957,7 +957,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) ...@@ -957,7 +957,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
#endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_DEBUG_FS */
/** /**
* swiotlb_area_find_slots() - search for slots in one IO TLB memory area * swiotlb_search_pool_area() - search one memory area in one pool
* @dev: Device which maps the buffer. * @dev: Device which maps the buffer.
* @pool: Memory pool to be searched. * @pool: Memory pool to be searched.
* @area_index: Index of the IO TLB memory area to be searched. * @area_index: Index of the IO TLB memory area to be searched.
...@@ -972,7 +972,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) ...@@ -972,7 +972,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
* *
* Return: Index of the first allocated slot, or -1 on error. * Return: Index of the first allocated slot, or -1 on error.
*/ */
static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool,
int area_index, phys_addr_t orig_addr, size_t alloc_size, int area_index, phys_addr_t orig_addr, size_t alloc_size,
unsigned int alloc_align_mask) unsigned int alloc_align_mask)
{ {
...@@ -1066,41 +1066,50 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, ...@@ -1066,41 +1066,50 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool,
return slot_index; return slot_index;
} }
#ifdef CONFIG_SWIOTLB_DYNAMIC
/** /**
* swiotlb_pool_find_slots() - search for slots in one memory pool * swiotlb_search_area() - search one memory area in all pools
* @dev: Device which maps the buffer. * @dev: Device which maps the buffer.
* @pool: Memory pool to be searched. * @start_cpu: Start CPU number.
* @cpu_offset: Offset from @start_cpu.
* @orig_addr: Original (non-bounced) IO buffer address. * @orig_addr: Original (non-bounced) IO buffer address.
* @alloc_size: Total requested size of the bounce buffer, * @alloc_size: Total requested size of the bounce buffer,
* including initial alignment padding. * including initial alignment padding.
* @alloc_align_mask: Required alignment of the allocated buffer. * @alloc_align_mask: Required alignment of the allocated buffer.
* @retpool: Used memory pool, updated on return.
* *
* Search through one memory pool to find a sequence of slots that match the * Search one memory area in all pools for a sequence of slots that match the
* allocation constraints. * allocation constraints.
* *
* Return: Index of the first allocated slot, or -1 on error. * Return: Index of the first allocated slot, or -1 on error.
*/ */
static int swiotlb_pool_find_slots(struct device *dev, struct io_tlb_pool *pool, static int swiotlb_search_area(struct device *dev, int start_cpu,
phys_addr_t orig_addr, size_t alloc_size, int cpu_offset, phys_addr_t orig_addr, size_t alloc_size,
unsigned int alloc_align_mask) unsigned int alloc_align_mask, struct io_tlb_pool **retpool)
{ {
int start = raw_smp_processor_id() & (pool->nareas - 1); struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
int i = start, index; struct io_tlb_pool *pool;
int area_index;
do { int index = -1;
index = swiotlb_area_find_slots(dev, pool, i, orig_addr,
alloc_size, alloc_align_mask);
if (index >= 0)
return index;
if (++i >= pool->nareas)
i = 0;
} while (i != start);
return -1; rcu_read_lock();
list_for_each_entry_rcu(pool, &mem->pools, node) {
if (cpu_offset >= pool->nareas)
continue;
area_index = (start_cpu + cpu_offset) & (pool->nareas - 1);
index = swiotlb_search_pool_area(dev, pool, area_index,
orig_addr, alloc_size,
alloc_align_mask);
if (index >= 0) {
*retpool = pool;
break;
}
}
rcu_read_unlock();
return index;
} }
#ifdef CONFIG_SWIOTLB_DYNAMIC
/** /**
* swiotlb_find_slots() - search for slots in the whole swiotlb * swiotlb_find_slots() - search for slots in the whole swiotlb
* @dev: Device which maps the buffer. * @dev: Device which maps the buffer.
...@@ -1124,18 +1133,17 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, ...@@ -1124,18 +1133,17 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
unsigned long nslabs; unsigned long nslabs;
unsigned long flags; unsigned long flags;
u64 phys_limit; u64 phys_limit;
int cpu, i;
int index; int index;
rcu_read_lock(); cpu = raw_smp_processor_id();
list_for_each_entry_rcu(pool, &mem->pools, node) { for (i = 0; i < default_nareas; ++i) {
index = swiotlb_pool_find_slots(dev, pool, orig_addr, index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size,
alloc_size, alloc_align_mask); alloc_align_mask, &pool);
if (index >= 0) { if (index >= 0)
rcu_read_unlock();
goto found; goto found;
}
} }
rcu_read_unlock();
if (!mem->can_grow) if (!mem->can_grow)
return -1; return -1;
...@@ -1148,8 +1156,8 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, ...@@ -1148,8 +1156,8 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
if (!pool) if (!pool)
return -1; return -1;
index = swiotlb_pool_find_slots(dev, pool, orig_addr, index = swiotlb_search_pool_area(dev, pool, 0, orig_addr,
alloc_size, alloc_align_mask); alloc_size, alloc_align_mask);
if (index < 0) { if (index < 0) {
swiotlb_dyn_free(&pool->rcu); swiotlb_dyn_free(&pool->rcu);
return -1; return -1;
...@@ -1192,9 +1200,21 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, ...@@ -1192,9 +1200,21 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
size_t alloc_size, unsigned int alloc_align_mask, size_t alloc_size, unsigned int alloc_align_mask,
struct io_tlb_pool **retpool) struct io_tlb_pool **retpool)
{ {
*retpool = &dev->dma_io_tlb_mem->defpool; struct io_tlb_pool *pool;
return swiotlb_pool_find_slots(dev, *retpool, int start, i;
orig_addr, alloc_size, alloc_align_mask); int index;
*retpool = pool = &dev->dma_io_tlb_mem->defpool;
i = start = raw_smp_processor_id() & (pool->nareas - 1);
do {
index = swiotlb_search_pool_area(dev, pool, i, orig_addr,
alloc_size, alloc_align_mask);
if (index >= 0)
return index;
if (++i >= pool->nareas)
i = 0;
} while (i != start);
return -1;
} }
#endif /* CONFIG_SWIOTLB_DYNAMIC */ #endif /* CONFIG_SWIOTLB_DYNAMIC */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment