Commit 5f8ee3c9 authored by Ohad Sharabi's avatar Ohad Sharabi Committed by Oded Gabbay

habanalabs: fix dmabuf to export only required size

This patch fixes a bug that was found in the dmabuf flow.
Bug description as found on Gaudi2 device:
1. User allocates 4MB of device memory
    - Note that although the allocation size was 4MB the HMMU allocated
      a full page of 768MB to back the request.
    - The user gets a memory handle that points to a single page (768MB)
    - Mapping the handle, the user gets virtual address to the start of
      the page.
2. User exports the buffer
3. User registers the exported buffer in the importer. This flow has
   a callback to the exporter which in turn converts the phys_page_pack
   to an SG list for the importer. This SG list is of single entry of
   size 768MB. However, the size that was passed to the importer was
   only 4MB.

The solution for this is to make sure the importer gets exposure only
to the exported size.

This will be done by fixing the SG created by the exporter to be of
the total size of the actual exported memory requested by the user.
Signed-off-by: default avatarOhad Sharabi <osharabi@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent d7088580
...@@ -2120,6 +2120,7 @@ struct hl_vm_hw_block_list_node { ...@@ -2120,6 +2120,7 @@ struct hl_vm_hw_block_list_node {
* @pages: the physical page array. * @pages: the physical page array.
* @npages: num physical pages in the pack. * @npages: num physical pages in the pack.
* @total_size: total size of all the pages in this list. * @total_size: total size of all the pages in this list.
* @exported_size: buffer exported size.
* @node: used to attach to deletion list that is used when all the allocations are cleared * @node: used to attach to deletion list that is used when all the allocations are cleared
* at the teardown of the context. * at the teardown of the context.
* @mapping_cnt: number of shared mappings. * @mapping_cnt: number of shared mappings.
...@@ -2136,6 +2137,7 @@ struct hl_vm_phys_pg_pack { ...@@ -2136,6 +2137,7 @@ struct hl_vm_phys_pg_pack {
u64 *pages; u64 *pages;
u64 npages; u64 npages;
u64 total_size; u64 total_size;
u64 exported_size;
struct list_head node; struct list_head node;
atomic_t mapping_cnt; atomic_t mapping_cnt;
u32 asid; u32 asid;
......
...@@ -1548,10 +1548,10 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size, ...@@ -1548,10 +1548,10 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
} }
static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages, static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
u64 page_size, struct device *dev, u64 page_size, u64 exported_size,
enum dma_data_direction dir) struct device *dev, enum dma_data_direction dir)
{ {
u64 chunk_size, bar_address, dma_max_seg_size; u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
struct asic_fixed_properties *prop; struct asic_fixed_properties *prop;
int rc, i, j, nents, cur_page; int rc, i, j, nents, cur_page;
struct scatterlist *sg; struct scatterlist *sg;
...@@ -1577,16 +1577,23 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 ...@@ -1577,16 +1577,23 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
if (!sgt) if (!sgt)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
/* remove export size restrictions in case not explicitly defined */
cur_size_to_export = exported_size ? exported_size : (npages * page_size);
/* If the size of each page is larger than the dma max segment size, /* If the size of each page is larger than the dma max segment size,
* then we can't combine pages and the number of entries in the SGL * then we can't combine pages and the number of entries in the SGL
* will just be the * will just be the
* <number of pages> * <chunks of max segment size in each page> * <number of pages> * <chunks of max segment size in each page>
*/ */
if (page_size > dma_max_seg_size) if (page_size > dma_max_seg_size) {
nents = npages * DIV_ROUND_UP_ULL(page_size, dma_max_seg_size); /* we should limit number of pages according to the exported size */
else cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
} else {
cur_npages = npages;
/* Get number of non-contiguous chunks */ /* Get number of non-contiguous chunks */
for (i = 1, nents = 1, chunk_size = page_size ; i < npages ; i++) { for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
if (pages[i - 1] + page_size != pages[i] || if (pages[i - 1] + page_size != pages[i] ||
chunk_size + page_size > dma_max_seg_size) { chunk_size + page_size > dma_max_seg_size) {
nents++; nents++;
...@@ -1596,6 +1603,7 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 ...@@ -1596,6 +1603,7 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
chunk_size += page_size; chunk_size += page_size;
} }
}
rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
if (rc) if (rc)
...@@ -1618,7 +1626,8 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 ...@@ -1618,7 +1626,8 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
else else
cur_device_address += dma_max_seg_size; cur_device_address += dma_max_seg_size;
chunk_size = min(size_left, dma_max_seg_size); /* make sure not to export over exported size */
chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
bar_address = hdev->dram_pci_bar_start + cur_device_address; bar_address = hdev->dram_pci_bar_start + cur_device_address;
...@@ -1626,6 +1635,8 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 ...@@ -1626,6 +1635,8 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
if (rc) if (rc)
goto error_unmap; goto error_unmap;
cur_size_to_export -= chunk_size;
if (size_left > dma_max_seg_size) { if (size_left > dma_max_seg_size) {
size_left -= dma_max_seg_size; size_left -= dma_max_seg_size;
} else { } else {
...@@ -1637,7 +1648,7 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 ...@@ -1637,7 +1648,7 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
/* Merge pages and put them into the scatterlist */ /* Merge pages and put them into the scatterlist */
for_each_sgtable_dma_sg(sgt, sg, i) { for_each_sgtable_dma_sg(sgt, sg, i) {
chunk_size = page_size; chunk_size = page_size;
for (j = cur_page + 1 ; j < npages ; j++) { for (j = cur_page + 1 ; j < cur_npages ; j++) {
if (pages[j - 1] + page_size != pages[j] || if (pages[j - 1] + page_size != pages[j] ||
chunk_size + page_size > dma_max_seg_size) chunk_size + page_size > dma_max_seg_size)
break; break;
...@@ -1648,10 +1659,13 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 ...@@ -1648,10 +1659,13 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
bar_address = hdev->dram_pci_bar_start + bar_address = hdev->dram_pci_bar_start +
(pages[cur_page] - prop->dram_base_address); (pages[cur_page] - prop->dram_base_address);
/* make sure not to export over exported size */
chunk_size = min(chunk_size, cur_size_to_export);
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
if (rc) if (rc)
goto error_unmap; goto error_unmap;
cur_size_to_export -= chunk_size;
cur_page = j; cur_page = j;
} }
} }
...@@ -1722,6 +1736,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, ...@@ -1722,6 +1736,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
phys_pg_pack->pages, phys_pg_pack->pages,
phys_pg_pack->npages, phys_pg_pack->npages,
phys_pg_pack->page_size, phys_pg_pack->page_size,
phys_pg_pack->exported_size,
attachment->dev, attachment->dev,
dir); dir);
else else
...@@ -1729,6 +1744,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, ...@@ -1729,6 +1744,7 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
&hl_dmabuf->device_address, &hl_dmabuf->device_address,
1, 1,
hl_dmabuf->dmabuf->size, hl_dmabuf->dmabuf->size,
0,
attachment->dev, attachment->dev,
dir); dir);
...@@ -2033,6 +2049,7 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o ...@@ -2033,6 +2049,7 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
if (rc) if (rc)
goto dec_memhash_export_cnt; goto dec_memhash_export_cnt;
phys_pg_pack->exported_size = size;
hl_dmabuf->phys_pg_pack = phys_pg_pack; hl_dmabuf->phys_pg_pack = phys_pg_pack;
hl_dmabuf->memhash_hnode = hnode; hl_dmabuf->memhash_hnode = hnode;
} else { } else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment