Commit ab4ea587 authored by Ohad Sharabi's avatar Ohad Sharabi Committed by Greg Kroah-Hartman

habanalabs: use for_each_sgtable_dma_sg for dma sgt

Instead of using for_each_sg when iterating sgt that contains dma
entries, use the more proper for_each_sgtable_dma_sg macro.

In addition, both Goya and Gaudi have the exact same implementation
of the asic function that encapsulate the usage of this macro, so
it is better to move that implementation to the common code.
Signed-off-by: default avatarOhad Sharabi <osharabi@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent b8d852ad
...@@ -370,8 +370,7 @@ static int userptr_lookup_show(struct seq_file *s, void *data) ...@@ -370,8 +370,7 @@ static int userptr_lookup_show(struct seq_file *s, void *data)
if (dev_entry->userptr_lookup >= userptr->addr && if (dev_entry->userptr_lookup >= userptr->addr &&
dev_entry->userptr_lookup < userptr->addr + userptr->size) { dev_entry->userptr_lookup < userptr->addr + userptr->size) {
total_npages = 0; total_npages = 0;
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
i) {
npages = hl_get_sg_info(sg, &dma_addr); npages = hl_get_sg_info(sg, &dma_addr);
sg_start = userptr->addr + sg_start = userptr->addr +
total_npages * PAGE_SIZE; total_npages * PAGE_SIZE;
......
...@@ -80,6 +80,38 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val ...@@ -80,6 +80,38 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val
return 0; return 0;
} }
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct scatterlist *sg;
int rc, i;
rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0);
if (rc)
return rc;
/* Shift to the device's base physical address of host memory if necessary */
if (prop->device_dma_offset_for_host_access)
for_each_sgtable_dma_sg(sgt, sg, i)
sg->dma_address += prop->device_dma_offset_for_host_access;
return 0;
}
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct scatterlist *sg;
int i;
/* Cancel the device's base physical address of host memory if necessary */
if (prop->device_dma_offset_for_host_access)
for_each_sgtable_dma_sg(sgt, sg, i)
sg->dma_address -= prop->device_dma_offset_for_host_access;
dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0);
}
/* /*
* hl_access_cfg_region - access the config region * hl_access_cfg_region - access the config region
* *
......
...@@ -1274,9 +1274,9 @@ struct fw_load_mgr { ...@@ -1274,9 +1274,9 @@ struct fw_load_mgr {
* @asic_dma_pool_free: free small DMA allocation from pool. * @asic_dma_pool_free: free small DMA allocation from pool.
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
* @hl_dma_unmap_sg: DMA unmap scatter-gather list. * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
* @cs_parser: parse Command Submission. * @cs_parser: parse Command Submission.
* @asic_dma_map_sg: DMA map scatter-gather list. * @asic_dma_map_sgtable: DMA map scatter-gather table.
* @get_dma_desc_list_size: get number of LIN_DMA packets required for CB. * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
* @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
* @update_eq_ci: update event queue CI. * @update_eq_ci: update event queue CI.
...@@ -1389,12 +1389,11 @@ struct hl_asic_funcs { ...@@ -1389,12 +1389,11 @@ struct hl_asic_funcs {
size_t size, dma_addr_t *dma_handle); size_t size, dma_addr_t *dma_handle);
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
size_t size, void *vaddr); size_t size, void *vaddr);
void (*hl_dma_unmap_sg)(struct hl_device *hdev, void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
struct scatterlist *sgl, int nents, struct sg_table *sgt,
enum dma_data_direction dir); enum dma_data_direction dir);
int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
int (*asic_dma_map_sg)(struct hl_device *hdev, int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
struct scatterlist *sgl, int nents,
enum dma_data_direction dir); enum dma_data_direction dir);
u32 (*get_dma_desc_list_size)(struct hl_device *hdev, u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
struct sg_table *sgt); struct sg_table *sgt);
...@@ -3011,6 +3010,9 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, ...@@ -3011,6 +3010,9 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
} }
uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr); uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr);
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir);
int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
enum debugfs_access_type acc_type); enum debugfs_access_type acc_type);
int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region, int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region,
......
...@@ -238,19 +238,18 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, ...@@ -238,19 +238,18 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
goto pin_err; goto pin_err;
} }
rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
userptr->sgt->nents, DMA_BIDIRECTIONAL);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto dma_map_err;
}
userptr->dma_mapped = true; userptr->dma_mapped = true;
userptr->dir = DMA_BIDIRECTIONAL; userptr->dir = DMA_BIDIRECTIONAL;
userptr->vm_type = VM_TYPE_USERPTR; userptr->vm_type = VM_TYPE_USERPTR;
*p_userptr = userptr; *p_userptr = userptr;
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto dma_map_err;
}
return 0; return 0;
dma_map_err: dma_map_err:
...@@ -901,7 +900,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, ...@@ -901,7 +900,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
* consecutive block. * consecutive block.
*/ */
total_npages = 0; total_npages = 0;
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
npages = hl_get_sg_info(sg, &dma_addr); npages = hl_get_sg_info(sg, &dma_addr);
total_npages += npages; total_npages += npages;
...@@ -930,7 +929,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, ...@@ -930,7 +929,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
phys_pg_pack->total_size = total_npages * page_size; phys_pg_pack->total_size = total_npages * page_size;
j = 0; j = 0;
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
npages = hl_get_sg_info(sg, &dma_addr); npages = hl_get_sg_info(sg, &dma_addr);
/* align down to physical page size and save the offset */ /* align down to physical page size and save the offset */
...@@ -2444,9 +2443,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) ...@@ -2444,9 +2443,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
hl_debugfs_remove_userptr(hdev, userptr); hl_debugfs_remove_userptr(hdev, userptr);
if (userptr->dma_mapped) if (userptr->dma_mapped)
hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
userptr->sgt->nents,
userptr->dir);
unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true); unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
kvfree(userptr->pages); kvfree(userptr->pages);
......
...@@ -5038,37 +5038,7 @@ static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, ...@@ -5038,37 +5038,7 @@ static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
} }
static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
return -ENOMEM;
/* Shift to the device's base physical address of host memory */
for_each_sg(sgl, sg, nents, i)
sg->dma_address += HOST_PHYS_BASE;
return 0;
}
static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
/* Cancel the device's base physical address of host memory */
for_each_sg(sgl, sg, nents, i)
sg->dma_address -= HOST_PHYS_BASE;
dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
}
static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
struct sg_table *sgt)
{ {
struct scatterlist *sg, *sg_next_iter; struct scatterlist *sg, *sg_next_iter;
u32 count, dma_desc_cnt; u32 count, dma_desc_cnt;
...@@ -5077,8 +5047,7 @@ static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, ...@@ -5077,8 +5047,7 @@ static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
dma_desc_cnt = 0; dma_desc_cnt = 0;
for_each_sg(sgt->sgl, sg, sgt->nents, count) { for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg); len = sg_dma_len(sg);
addr = sg_dma_address(sg); addr = sg_dma_address(sg);
...@@ -5132,8 +5101,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, ...@@ -5132,8 +5101,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list); list_add_tail(&userptr->job_node, parser->job_userptr_list);
rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
userptr->sgt->nents, dir);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n"); dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory; goto unpin_memory;
...@@ -5408,7 +5376,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev, ...@@ -5408,7 +5376,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
sgt = userptr->sgt; sgt = userptr->sgt;
dma_desc_cnt = 0; dma_desc_cnt = 0;
for_each_sg(sgt->sgl, sg, sgt->nents, count) { for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg); len = sg_dma_len(sg);
dma_addr = sg_dma_address(sg); dma_addr = sg_dma_address(sg);
...@@ -9261,9 +9229,9 @@ static const struct hl_asic_funcs gaudi_funcs = { ...@@ -9261,9 +9229,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
.asic_dma_pool_free = gaudi_dma_pool_free, .asic_dma_pool_free = gaudi_dma_pool_free,
.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
.hl_dma_unmap_sg = gaudi_dma_unmap_sg, .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.cs_parser = gaudi_cs_parser, .cs_parser = gaudi_cs_parser,
.asic_dma_map_sg = gaudi_dma_map_sg, .asic_dma_map_sgtable = hl_dma_map_sgtable,
.get_dma_desc_list_size = gaudi_get_dma_desc_list_size, .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
.add_end_of_cb_packets = gaudi_add_end_of_cb_packets, .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
.update_eq_ci = gaudi_update_eq_ci, .update_eq_ci = gaudi_update_eq_ci,
......
...@@ -3311,35 +3311,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, ...@@ -3311,35 +3311,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
} }
static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
return -ENOMEM;
/* Shift to the device's base physical address of host memory */
for_each_sg(sgl, sg, nents, i)
sg->dma_address += HOST_PHYS_BASE;
return 0;
}
static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
/* Cancel the device's base physical address of host memory */
for_each_sg(sgl, sg, nents, i)
sg->dma_address -= HOST_PHYS_BASE;
dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
}
u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
{ {
struct scatterlist *sg, *sg_next_iter; struct scatterlist *sg, *sg_next_iter;
...@@ -3349,8 +3320,7 @@ u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) ...@@ -3349,8 +3320,7 @@ u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
dma_desc_cnt = 0; dma_desc_cnt = 0;
for_each_sg(sgt->sgl, sg, sgt->nents, count) { for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg); len = sg_dma_len(sg);
addr = sg_dma_address(sg); addr = sg_dma_address(sg);
...@@ -3404,8 +3374,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev, ...@@ -3404,8 +3374,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list); list_add_tail(&userptr->job_node, parser->job_userptr_list);
rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
userptr->sgt->nents, dir);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n"); dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory; goto unpin_memory;
...@@ -3869,7 +3838,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev, ...@@ -3869,7 +3838,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
sgt = userptr->sgt; sgt = userptr->sgt;
dma_desc_cnt = 0; dma_desc_cnt = 0;
for_each_sg(sgt->sgl, sg, sgt->nents, count) { for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg); len = sg_dma_len(sg);
dma_addr = sg_dma_address(sg); dma_addr = sg_dma_address(sg);
...@@ -5497,9 +5466,9 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5497,9 +5466,9 @@ static const struct hl_asic_funcs goya_funcs = {
.asic_dma_pool_free = goya_dma_pool_free, .asic_dma_pool_free = goya_dma_pool_free,
.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free, .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
.hl_dma_unmap_sg = goya_dma_unmap_sg, .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.cs_parser = goya_cs_parser, .cs_parser = goya_cs_parser,
.asic_dma_map_sg = goya_dma_map_sg, .asic_dma_map_sgtable = hl_dma_map_sgtable,
.get_dma_desc_list_size = goya_get_dma_desc_list_size, .get_dma_desc_list_size = goya_get_dma_desc_list_size,
.add_end_of_cb_packets = goya_add_end_of_cb_packets, .add_end_of_cb_packets = goya_add_end_of_cb_packets,
.update_eq_ci = goya_update_eq_ci, .update_eq_ci = goya_update_eq_ci,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment