Commit 1ef0c327 authored by Ohad Sharabi's avatar Ohad Sharabi Committed by Oded Gabbay

habanalabs: refactor dma asic-specific functions

This is a pre-requisite patch for adding tracepoints to the DMA memory
operations (allocation/free) in the driver.

The main purpose is to be able to cross data with the map operations and
determine whether memory violation occurred, for example free DMA
allocation before unmapping it from device memory.

To achieve this the DMA alloc/free code flows were refactored so that a
single DMA tracepoint will catch many flows.
Signed-off-by: default avatarOhad Sharabi <osharabi@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent c37d50e8
...@@ -143,8 +143,7 @@ static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) ...@@ -143,8 +143,7 @@ static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
gen_pool_free(hdev->internal_cb_pool, gen_pool_free(hdev->internal_cb_pool,
(uintptr_t)cb->kernel_address, cb->size); (uintptr_t)cb->kernel_address, cb->size);
else else
hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, hl_asic_dma_free_coherent(hdev, cb->size, cb->kernel_address, cb->bus_address);
cb->kernel_address, cb->bus_address);
kfree(cb); kfree(cb);
} }
...@@ -195,14 +194,11 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, ...@@ -195,14 +194,11 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
cb->is_internal = true; cb->is_internal = true;
cb->bus_address = hdev->internal_cb_va_base + cb_offset; cb->bus_address = hdev->internal_cb_va_base + cb_offset;
} else if (ctx_id == HL_KERNEL_ASID_ID) { } else if (ctx_id == HL_KERNEL_ASID_ID) {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC);
&cb->bus_address, GFP_ATOMIC);
if (!p) if (!p)
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_KERNEL);
cb_size, &cb->bus_address, GFP_KERNEL);
} else { } else {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address,
&cb->bus_address,
GFP_USER | __GFP_ZERO); GFP_USER | __GFP_ZERO);
} }
......
...@@ -17,6 +17,12 @@ ...@@ -17,6 +17,12 @@
#define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788 #define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788
enum dma_alloc_type {
DMA_ALLOC_COHERENT,
DMA_ALLOC_CPU_ACCESSIBLE,
DMA_ALLOC_POOL,
};
/* /*
* hl_set_dram_bar- sets the bar to allow later access to address * hl_set_dram_bar- sets the bar to allow later access to address
* *
...@@ -90,6 +96,75 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val ...@@ -90,6 +96,75 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val
return 0; return 0;
} }
static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
gfp_t flag, enum dma_alloc_type alloc_type)
{
void *ptr;
switch (alloc_type) {
case DMA_ALLOC_COHERENT:
ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag);
break;
case DMA_ALLOC_CPU_ACCESSIBLE:
ptr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
break;
case DMA_ALLOC_POOL:
ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle);
break;
}
return ptr;
}
static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr,
dma_addr_t dma_handle, enum dma_alloc_type alloc_type)
{
switch (alloc_type) {
case DMA_ALLOC_COHERENT:
hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle);
break;
case DMA_ALLOC_CPU_ACCESSIBLE:
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, cpu_addr);
break;
case DMA_ALLOC_POOL:
hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle);
break;
}
}
void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
gfp_t flag)
{
return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT);
}
void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
dma_addr_t dma_handle)
{
hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT);
}
void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle)
{
return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE);
}
void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
{
hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE);
}
void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
dma_addr_t *dma_handle)
{
return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL);
}
void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
{
hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL);
}
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
{ {
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
......
...@@ -218,8 +218,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, ...@@ -218,8 +218,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
u32 tmp, expected_ack_val, pi; u32 tmp, expected_ack_val, pi;
int rc; int rc;
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr);
&pkt_dma_addr);
if (!pkt) { if (!pkt) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate DMA memory for packet to CPU\n"); "Failed to allocate DMA memory for packet to CPU\n");
...@@ -303,7 +302,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, ...@@ -303,7 +302,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
out: out:
mutex_unlock(&hdev->send_cpu_message_lock); mutex_unlock(&hdev->send_cpu_message_lock);
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt); hl_cpu_accessible_dma_pool_free(hdev, len, pkt);
return rc; return rc;
} }
...@@ -644,9 +643,7 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev, ...@@ -644,9 +643,7 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
u64 result; u64 result;
int rc; int rc;
cpucp_info_cpu_addr = cpucp_info_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, sizeof(struct cpucp_info),
hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
sizeof(struct cpucp_info),
&cpucp_info_dma_addr); &cpucp_info_dma_addr);
if (!cpucp_info_cpu_addr) { if (!cpucp_info_cpu_addr) {
dev_err(hdev->dev, dev_err(hdev->dev,
...@@ -708,8 +705,7 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev, ...@@ -708,8 +705,7 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg); prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
out: out:
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, hl_cpu_accessible_dma_pool_free(hdev, sizeof(struct cpucp_info), cpucp_info_cpu_addr);
sizeof(struct cpucp_info), cpucp_info_cpu_addr);
return rc; return rc;
} }
...@@ -792,9 +788,8 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) ...@@ -792,9 +788,8 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
u64 result; u64 result;
int rc; int rc;
eeprom_info_cpu_addr = eeprom_info_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, max_size,
hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, &eeprom_info_dma_addr);
max_size, &eeprom_info_dma_addr);
if (!eeprom_info_cpu_addr) { if (!eeprom_info_cpu_addr) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate DMA memory for CPU-CP EEPROM packet\n"); "Failed to allocate DMA memory for CPU-CP EEPROM packet\n");
...@@ -822,8 +817,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) ...@@ -822,8 +817,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size)); memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
out: out:
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size, hl_cpu_accessible_dma_pool_free(hdev, max_size, eeprom_info_cpu_addr);
eeprom_info_cpu_addr);
return rc; return rc;
} }
...@@ -840,8 +834,7 @@ int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data) ...@@ -840,8 +834,7 @@ int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data)
int i, rc; int i, rc;
data_size = sizeof(struct cpucp_monitor_dump); data_size = sizeof(struct cpucp_monitor_dump);
mon_dump_cpu_addr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, data_size, mon_dump_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, data_size, &mon_dump_dma_addr);
&mon_dump_dma_addr);
if (!mon_dump_cpu_addr) { if (!mon_dump_cpu_addr) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate DMA memory for CPU-CP monitor-dump packet\n"); "Failed to allocate DMA memory for CPU-CP monitor-dump packet\n");
...@@ -871,7 +864,7 @@ int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data) ...@@ -871,7 +864,7 @@ int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data)
} }
out: out:
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, data_size, mon_dump_cpu_addr); hl_cpu_accessible_dma_pool_free(hdev, data_size, mon_dump_cpu_addr);
return rc; return rc;
} }
...@@ -1064,8 +1057,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev, ...@@ -1064,8 +1057,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
u64 result; u64 result;
int rc; int rc;
cpucp_repl_rows_info_cpu_addr = cpucp_repl_rows_info_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev,
hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
sizeof(struct cpucp_hbm_row_info), sizeof(struct cpucp_hbm_row_info),
&cpucp_repl_rows_info_dma_addr); &cpucp_repl_rows_info_dma_addr);
if (!cpucp_repl_rows_info_cpu_addr) { if (!cpucp_repl_rows_info_cpu_addr) {
...@@ -1092,8 +1084,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev, ...@@ -1092,8 +1084,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
memcpy(info, cpucp_repl_rows_info_cpu_addr, sizeof(*info)); memcpy(info, cpucp_repl_rows_info_cpu_addr, sizeof(*info));
out: out:
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, hl_cpu_accessible_dma_pool_free(hdev, sizeof(struct cpucp_hbm_row_info),
sizeof(struct cpucp_hbm_row_info),
cpucp_repl_rows_info_cpu_addr); cpucp_repl_rows_info_cpu_addr);
return rc; return rc;
......
...@@ -3121,6 +3121,15 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, ...@@ -3121,6 +3121,15 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
} }
uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr); uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr);
void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
gfp_t flag);
void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
dma_addr_t dma_handle);
void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle);
void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr);
void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
dma_addr_t *dma_handle);
void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr);
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir); enum dma_data_direction dir);
......
...@@ -807,13 +807,9 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, ...@@ -807,13 +807,9 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
int rc; int rc;
if (is_cpu_queue) if (is_cpu_queue)
p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address);
HL_QUEUE_SIZE_IN_BYTES,
&q->bus_address);
else else
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address,
HL_QUEUE_SIZE_IN_BYTES,
&q->bus_address,
GFP_KERNEL | __GFP_ZERO); GFP_KERNEL | __GFP_ZERO);
if (!p) if (!p)
return -ENOMEM; return -ENOMEM;
...@@ -839,13 +835,9 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, ...@@ -839,13 +835,9 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
free_queue: free_queue:
if (is_cpu_queue) if (is_cpu_queue)
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address);
HL_QUEUE_SIZE_IN_BYTES,
q->kernel_address);
else else
hdev->asic_funcs->asic_dma_free_coherent(hdev, hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address,
HL_QUEUE_SIZE_IN_BYTES,
q->kernel_address,
q->bus_address); q->bus_address);
return rc; return rc;
...@@ -885,9 +877,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) ...@@ -885,9 +877,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
{ {
void *p; void *p;
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address,
HL_QUEUE_SIZE_IN_BYTES,
&q->bus_address,
GFP_KERNEL | __GFP_ZERO); GFP_KERNEL | __GFP_ZERO);
if (!p) if (!p)
return -ENOMEM; return -ENOMEM;
...@@ -1061,13 +1051,9 @@ static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) ...@@ -1061,13 +1051,9 @@ static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
kfree(q->shadow_queue); kfree(q->shadow_queue);
if (q->queue_type == QUEUE_TYPE_CPU) if (q->queue_type == QUEUE_TYPE_CPU)
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address);
HL_QUEUE_SIZE_IN_BYTES,
q->kernel_address);
else else
hdev->asic_funcs->asic_dma_free_coherent(hdev, hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address,
HL_QUEUE_SIZE_IN_BYTES,
q->kernel_address,
q->bus_address); q->bus_address);
} }
......
...@@ -403,8 +403,8 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) ...@@ -403,8 +403,8 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
{ {
void *p; void *p;
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES, p = hl_asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES, &q->bus_address,
&q->bus_address, GFP_KERNEL | __GFP_ZERO); GFP_KERNEL | __GFP_ZERO);
if (!p) if (!p)
return -ENOMEM; return -ENOMEM;
...@@ -429,9 +429,7 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) ...@@ -429,9 +429,7 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
*/ */
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q) void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
{ {
hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, hl_asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, q->kernel_address, q->bus_address);
q->kernel_address,
q->bus_address);
} }
void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q)
...@@ -464,9 +462,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) ...@@ -464,9 +462,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
{ {
void *p; void *p;
p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_EQ_SIZE_IN_BYTES, &q->bus_address);
HL_EQ_SIZE_IN_BYTES,
&q->bus_address);
if (!p) if (!p)
return -ENOMEM; return -ENOMEM;
...@@ -490,9 +486,7 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) ...@@ -490,9 +486,7 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
{ {
flush_workqueue(hdev->eq_wq); flush_workqueue(hdev->eq_wq);
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, hl_cpu_accessible_dma_pool_free(hdev, HL_EQ_SIZE_IN_BYTES, q->kernel_address);
HL_EQ_SIZE_IN_BYTES,
q->kernel_address);
} }
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
......
...@@ -1070,8 +1070,7 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev) ...@@ -1070,8 +1070,7 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev)
} }
fw_size = fw->size; fw_size = fw->size;
cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size, cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
&dma_handle, GFP_KERNEL | __GFP_ZERO);
if (!cpu_addr) { if (!cpu_addr) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate %zu of dma memory for TPC kernel\n", "Failed to allocate %zu of dma memory for TPC kernel\n",
...@@ -1084,8 +1083,7 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev) ...@@ -1084,8 +1083,7 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev)
rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr, hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
dma_handle);
out: out:
release_firmware(fw); release_firmware(fw);
...@@ -1729,9 +1727,7 @@ static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) ...@@ -1729,9 +1727,7 @@ static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
*/ */
for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
virt_addr_arr[i] = virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
HL_CPU_ACCESSIBLE_MEM_SIZE,
&dma_addr_arr[i], &dma_addr_arr[i],
GFP_KERNEL | __GFP_ZERO); GFP_KERNEL | __GFP_ZERO);
if (!virt_addr_arr[i]) { if (!virt_addr_arr[i]) {
...@@ -1762,9 +1758,7 @@ static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) ...@@ -1762,9 +1758,7 @@ static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
free_dma_mem_arr: free_dma_mem_arr:
for (j = 0 ; j < i ; j++) for (j = 0 ; j < i ; j++)
hdev->asic_funcs->asic_dma_free_coherent(hdev, hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
HL_CPU_ACCESSIBLE_MEM_SIZE,
virt_addr_arr[j],
dma_addr_arr[j]); dma_addr_arr[j]);
return rc; return rc;
...@@ -1780,9 +1774,7 @@ static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) ...@@ -1780,9 +1774,7 @@ static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
q = &gaudi->internal_qmans[i]; q = &gaudi->internal_qmans[i];
if (!q->pq_kernel_addr) if (!q->pq_kernel_addr)
continue; continue;
hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size, hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
q->pq_kernel_addr,
q->pq_dma_addr);
} }
} }
...@@ -1817,9 +1809,7 @@ static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) ...@@ -1817,9 +1809,7 @@ static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
goto free_internal_qmans_pq_mem; goto free_internal_qmans_pq_mem;
} }
q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
hdev, q->pq_size,
&q->pq_dma_addr,
GFP_KERNEL | __GFP_ZERO); GFP_KERNEL | __GFP_ZERO);
if (!q->pq_kernel_addr) { if (!q->pq_kernel_addr) {
rc = -ENOMEM; rc = -ENOMEM;
...@@ -1961,9 +1951,7 @@ static int gaudi_sw_init(struct hl_device *hdev) ...@@ -1961,9 +1951,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
if (!hdev->asic_prop.fw_security_enabled) if (!hdev->asic_prop.fw_security_enabled)
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
hdev->cpu_pci_msb_addr); hdev->cpu_pci_msb_addr);
hdev->asic_funcs->asic_dma_free_coherent(hdev, hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address); hdev->cpu_accessible_dma_address);
free_dma_pool: free_dma_pool:
dma_pool_destroy(hdev->dma_pool); dma_pool_destroy(hdev->dma_pool);
...@@ -1984,9 +1972,7 @@ static int gaudi_sw_fini(struct hl_device *hdev) ...@@ -1984,9 +1972,7 @@ static int gaudi_sw_fini(struct hl_device *hdev)
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
hdev->cpu_pci_msb_addr); hdev->cpu_pci_msb_addr);
hdev->asic_funcs->asic_dma_free_coherent(hdev, hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address); hdev->cpu_accessible_dma_address);
dma_pool_destroy(hdev->dma_pool); dma_pool_destroy(hdev->dma_pool);
...@@ -4936,8 +4922,7 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) ...@@ -4936,8 +4922,7 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
fence_val = GAUDI_QMAN0_FENCE_VAL; fence_val = GAUDI_QMAN0_FENCE_VAL;
fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
&fence_dma_addr);
if (!fence_ptr) { if (!fence_ptr) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate memory for H/W queue %d testing\n", "Failed to allocate memory for H/W queue %d testing\n",
...@@ -4947,9 +4932,8 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) ...@@ -4947,9 +4932,8 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
*fence_ptr = 0; *fence_ptr = 0;
fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
sizeof(struct packet_msg_prot), &pkt_dma_addr);
GFP_KERNEL, &pkt_dma_addr);
if (!fence_pkt) { if (!fence_pkt) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate packet for H/W queue %d testing\n", "Failed to allocate packet for H/W queue %d testing\n",
...@@ -4989,11 +4973,9 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) ...@@ -4989,11 +4973,9 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
} }
free_pkt: free_pkt:
hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt, hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
pkt_dma_addr);
free_fence_ptr: free_fence_ptr:
hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
fence_dma_addr);
return rc; return rc;
} }
...@@ -6164,10 +6146,7 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, ...@@ -6164,10 +6146,7 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
bool is_eng_idle; bool is_eng_idle;
int rc = 0, dma_id; int rc = 0, dma_id;
kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
hdev, SZ_2M,
&dma_addr,
GFP_KERNEL | __GFP_ZERO);
if (!kernel_addr) if (!kernel_addr)
return -ENOMEM; return -ENOMEM;
...@@ -6256,8 +6235,7 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, ...@@ -6256,8 +6235,7 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
out: out:
hdev->asic_funcs->hw_queues_unlock(hdev); hdev->asic_funcs->hw_queues_unlock(hdev);
hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
dma_addr);
return rc; return rc;
} }
...@@ -6603,8 +6581,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev, ...@@ -6603,8 +6581,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
return -EBUSY; return -EBUSY;
} }
fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
&fence_dma_addr);
if (!fence_ptr) { if (!fence_ptr) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate fence memory for QMAN0\n"); "Failed to allocate fence memory for QMAN0\n");
...@@ -6650,8 +6627,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev, ...@@ -6650,8 +6627,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
free_fence_ptr: free_fence_ptr:
WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
fence_dma_addr);
return rc; return rc;
} }
...@@ -8504,8 +8480,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev, ...@@ -8504,8 +8480,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
return 0; return 0;
hdev->internal_cb_pool_virt_addr = hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
HOST_SPACE_INTERNAL_CB_SZ, HOST_SPACE_INTERNAL_CB_SZ,
&hdev->internal_cb_pool_dma_addr, &hdev->internal_cb_pool_dma_addr,
GFP_KERNEL | __GFP_ZERO); GFP_KERNEL | __GFP_ZERO);
...@@ -8563,9 +8538,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev, ...@@ -8563,9 +8538,7 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
destroy_internal_cb_pool: destroy_internal_cb_pool:
gen_pool_destroy(hdev->internal_cb_pool); gen_pool_destroy(hdev->internal_cb_pool);
free_internal_cb_pool: free_internal_cb_pool:
hdev->asic_funcs->asic_dma_free_coherent(hdev, hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
HOST_SPACE_INTERNAL_CB_SZ,
hdev->internal_cb_pool_virt_addr,
hdev->internal_cb_pool_dma_addr); hdev->internal_cb_pool_dma_addr);
return rc; return rc;
...@@ -8589,9 +8562,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, ...@@ -8589,9 +8562,7 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
gen_pool_destroy(hdev->internal_cb_pool); gen_pool_destroy(hdev->internal_cb_pool);
hdev->asic_funcs->asic_dma_free_coherent(hdev, hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
HOST_SPACE_INTERNAL_CB_SZ,
hdev->internal_cb_pool_virt_addr,
hdev->internal_cb_pool_dma_addr); hdev->internal_cb_pool_dma_addr);
} }
......
...@@ -1012,9 +1012,7 @@ static int goya_sw_init(struct hl_device *hdev) ...@@ -1012,9 +1012,7 @@ static int goya_sw_init(struct hl_device *hdev)
goto free_goya_device; goto free_goya_device;
} }
hdev->cpu_accessible_dma_mem = hdev->cpu_accessible_dma_mem = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
HL_CPU_ACCESSIBLE_MEM_SIZE,
&hdev->cpu_accessible_dma_address, &hdev->cpu_accessible_dma_address,
GFP_KERNEL | __GFP_ZERO); GFP_KERNEL | __GFP_ZERO);
...@@ -1066,9 +1064,7 @@ static int goya_sw_init(struct hl_device *hdev) ...@@ -1066,9 +1064,7 @@ static int goya_sw_init(struct hl_device *hdev)
free_cpu_accessible_dma_pool: free_cpu_accessible_dma_pool:
gen_pool_destroy(hdev->cpu_accessible_dma_pool); gen_pool_destroy(hdev->cpu_accessible_dma_pool);
free_cpu_dma_mem: free_cpu_dma_mem:
hdev->asic_funcs->asic_dma_free_coherent(hdev, hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address); hdev->cpu_accessible_dma_address);
free_dma_pool: free_dma_pool:
dma_pool_destroy(hdev->dma_pool); dma_pool_destroy(hdev->dma_pool);
...@@ -1090,9 +1086,7 @@ static int goya_sw_fini(struct hl_device *hdev) ...@@ -1090,9 +1086,7 @@ static int goya_sw_fini(struct hl_device *hdev)
gen_pool_destroy(hdev->cpu_accessible_dma_pool); gen_pool_destroy(hdev->cpu_accessible_dma_pool);
hdev->asic_funcs->asic_dma_free_coherent(hdev, hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address); hdev->cpu_accessible_dma_address);
dma_pool_destroy(hdev->dma_pool); dma_pool_destroy(hdev->dma_pool);
...@@ -3102,8 +3096,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -3102,8 +3096,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
return -EBUSY; return -EBUSY;
} }
fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
&fence_dma_addr);
if (!fence_ptr) { if (!fence_ptr) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate fence memory for QMAN0\n"); "Failed to allocate fence memory for QMAN0\n");
...@@ -3143,8 +3136,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -3143,8 +3136,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
} }
free_fence_ptr: free_fence_ptr:
hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
fence_dma_addr);
goya_qman0_set_security(hdev, false); goya_qman0_set_security(hdev, false);
...@@ -3180,8 +3172,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) ...@@ -3180,8 +3172,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
fence_val = GOYA_QMAN0_FENCE_VAL; fence_val = GOYA_QMAN0_FENCE_VAL;
fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
&fence_dma_addr);
if (!fence_ptr) { if (!fence_ptr) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate memory for H/W queue %d testing\n", "Failed to allocate memory for H/W queue %d testing\n",
...@@ -3191,9 +3182,8 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) ...@@ -3191,9 +3182,8 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
*fence_ptr = 0; *fence_ptr = 0;
fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
sizeof(struct packet_msg_prot), &pkt_dma_addr);
GFP_KERNEL, &pkt_dma_addr);
if (!fence_pkt) { if (!fence_pkt) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate packet for H/W queue %d testing\n", "Failed to allocate packet for H/W queue %d testing\n",
...@@ -3232,11 +3222,9 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) ...@@ -3232,11 +3222,9 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
} }
free_pkt: free_pkt:
hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt, hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
pkt_dma_addr);
free_fence_ptr: free_fence_ptr:
hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
fence_dma_addr);
return rc; return rc;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment