Commit 7fc40bca authored by Pawel Piskorski's avatar Pawel Piskorski Committed by Oded Gabbay

habanalabs: flush only at the end of the map/unmap

Optimize hl_mmu_map and hl_mmu_unmap by not calling flush(ctx)
within per-page loop.
Signed-off-by: default avatarPawel Piskorski <ppiskorski@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent bbde5709
...@@ -4776,7 +4776,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) ...@@ -4776,7 +4776,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) { for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off, rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
prop->dram_base_address + off, PAGE_SIZE_2MB); prop->dram_base_address + off, PAGE_SIZE_2MB,
(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
if (rc) { if (rc) {
dev_err(hdev->dev, "Map failed for address 0x%llx\n", dev_err(hdev->dev, "Map failed for address 0x%llx\n",
prop->dram_base_address + off); prop->dram_base_address + off);
...@@ -4786,7 +4787,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) ...@@ -4786,7 +4787,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB); hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
...@@ -4799,7 +4800,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) ...@@ -4799,7 +4800,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
rc = hl_mmu_map(hdev->kernel_ctx, rc = hl_mmu_map(hdev->kernel_ctx,
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
hdev->cpu_accessible_dma_address + cpu_off, hdev->cpu_accessible_dma_address + cpu_off,
PAGE_SIZE_4KB); PAGE_SIZE_4KB, true);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Map failed for CPU accessible memory\n"); "Map failed for CPU accessible memory\n");
...@@ -4825,14 +4826,15 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) ...@@ -4825,14 +4826,15 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB) for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
if (hl_mmu_unmap(hdev->kernel_ctx, if (hl_mmu_unmap(hdev->kernel_ctx,
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
PAGE_SIZE_4KB)) PAGE_SIZE_4KB, true))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"failed to unmap address 0x%llx\n", "failed to unmap address 0x%llx\n",
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
unmap: unmap:
for (; off >= 0 ; off -= PAGE_SIZE_2MB) for (; off >= 0 ; off -= PAGE_SIZE_2MB)
if (hl_mmu_unmap(hdev->kernel_ctx, if (hl_mmu_unmap(hdev->kernel_ctx,
prop->dram_base_address + off, PAGE_SIZE_2MB)) prop->dram_base_address + off, PAGE_SIZE_2MB,
true))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"failed to unmap address 0x%llx\n", "failed to unmap address 0x%llx\n",
prop->dram_base_address + off); prop->dram_base_address + off);
...@@ -4857,14 +4859,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) ...@@ -4857,14 +4859,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
PAGE_SIZE_2MB)) PAGE_SIZE_2MB, true))
dev_warn(hdev->dev, dev_warn(hdev->dev,
"Failed to unmap CPU accessible memory\n"); "Failed to unmap CPU accessible memory\n");
} else { } else {
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
if (hl_mmu_unmap(hdev->kernel_ctx, if (hl_mmu_unmap(hdev->kernel_ctx,
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
PAGE_SIZE_4KB)) PAGE_SIZE_4KB,
(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"failed to unmap address 0x%llx\n", "failed to unmap address 0x%llx\n",
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
...@@ -4872,7 +4875,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) ...@@ -4872,7 +4875,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
if (hl_mmu_unmap(hdev->kernel_ctx, if (hl_mmu_unmap(hdev->kernel_ctx,
prop->dram_base_address + off, PAGE_SIZE_2MB)) prop->dram_base_address + off, PAGE_SIZE_2MB,
(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"Failed to unmap address 0x%llx\n", "Failed to unmap address 0x%llx\n",
prop->dram_base_address + off); prop->dram_base_address + off);
......
...@@ -1573,8 +1573,10 @@ int hl_mmu_init(struct hl_device *hdev); ...@@ -1573,8 +1573,10 @@ int hl_mmu_init(struct hl_device *hdev);
void hl_mmu_fini(struct hl_device *hdev); void hl_mmu_fini(struct hl_device *hdev);
int hl_mmu_ctx_init(struct hl_ctx *ctx); int hl_mmu_ctx_init(struct hl_ctx *ctx);
void hl_mmu_ctx_fini(struct hl_ctx *ctx); void hl_mmu_ctx_fini(struct hl_ctx *ctx);
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size); int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size); u32 page_size, bool flush_pte);
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte);
void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx);
......
...@@ -747,7 +747,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, ...@@ -747,7 +747,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
for (i = 0 ; i < phys_pg_pack->npages ; i++) { for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i]; paddr = phys_pg_pack->pages[i];
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size); rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
(i + 1) == phys_pg_pack->npages);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"map failed for handle %u, npages: %llu, mapped: %llu", "map failed for handle %u, npages: %llu, mapped: %llu",
...@@ -765,7 +766,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, ...@@ -765,7 +766,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
err: err:
next_vaddr = vaddr; next_vaddr = vaddr;
for (i = 0 ; i < mapped_pg_cnt ; i++) { for (i = 0 ; i < mapped_pg_cnt ; i++) {
if (hl_mmu_unmap(ctx, next_vaddr, page_size)) if (hl_mmu_unmap(ctx, next_vaddr, page_size,
(i + 1) == mapped_pg_cnt))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
phys_pg_pack->handle, next_vaddr, phys_pg_pack->handle, next_vaddr,
...@@ -794,7 +796,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, ...@@ -794,7 +796,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
next_vaddr = vaddr; next_vaddr = vaddr;
for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
if (hl_mmu_unmap(ctx, next_vaddr, page_size)) if (hl_mmu_unmap(ctx, next_vaddr, page_size,
(i + 1) == phys_pg_pack->npages))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"unmap failed for vaddr: 0x%llx\n", next_vaddr); "unmap failed for vaddr: 0x%llx\n", next_vaddr);
......
...@@ -637,29 +637,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) ...@@ -637,29 +637,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
clear_hop3 = true; clear_hop3 = true;
if (!clear_hop3) if (!clear_hop3)
goto flush; goto mapped;
clear_pte(ctx, hop3_pte_addr); clear_pte(ctx, hop3_pte_addr);
if (put_pte(ctx, hop3_addr)) if (put_pte(ctx, hop3_addr))
goto flush; goto mapped;
clear_pte(ctx, hop2_pte_addr); clear_pte(ctx, hop2_pte_addr);
if (put_pte(ctx, hop2_addr)) if (put_pte(ctx, hop2_addr))
goto flush; goto mapped;
clear_pte(ctx, hop1_pte_addr); clear_pte(ctx, hop1_pte_addr);
if (put_pte(ctx, hop1_addr)) if (put_pte(ctx, hop1_addr))
goto flush; goto mapped;
clear_pte(ctx, hop0_pte_addr); clear_pte(ctx, hop0_pte_addr);
} }
flush: mapped:
flush(ctx);
return 0; return 0;
not_mapped: not_mapped:
...@@ -675,6 +673,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) ...@@ -675,6 +673,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
* @ctx: pointer to the context structure * @ctx: pointer to the context structure
* @virt_addr: virt addr to map from * @virt_addr: virt addr to map from
* @page_size: size of the page to unmap * @page_size: size of the page to unmap
* @flush_pte: whether to do a PCI flush
* *
* This function does the following: * This function does the following:
* - Check that the virt addr is mapped * - Check that the virt addr is mapped
...@@ -685,15 +684,19 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) ...@@ -685,15 +684,19 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
* changes the MMU hash, it must be protected by a lock. * changes the MMU hash, it must be protected by a lock.
* However, because it maps only a single page, the lock should be implemented * However, because it maps only a single page, the lock should be implemented
* in a higher level in order to protect the entire mapping of the memory area * in a higher level in order to protect the entire mapping of the memory area
*
* For optimization reasons PCI flush may be requested once after unmapping of
* large area.
*/ */
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop; struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr; u64 real_virt_addr;
u32 real_page_size, npages; u32 real_page_size, npages;
int i, rc; int i, rc = 0;
bool is_dram_addr; bool is_dram_addr;
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
...@@ -729,12 +732,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) ...@@ -729,12 +732,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
for (i = 0 ; i < npages ; i++) { for (i = 0 ; i < npages ; i++) {
rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr); rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
if (rc) if (rc)
return rc; break;
real_virt_addr += real_page_size; real_virt_addr += real_page_size;
} }
return 0; if (flush_pte)
flush(ctx);
return rc;
} }
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
...@@ -885,8 +891,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -885,8 +891,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
get_pte(ctx, hop3_addr); get_pte(ctx, hop3_addr);
} }
flush(ctx);
return 0; return 0;
err: err:
...@@ -909,6 +913,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -909,6 +913,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* @virt_addr: virt addr to map from * @virt_addr: virt addr to map from
* @phys_addr: phys addr to map to * @phys_addr: phys addr to map to
* @page_size: physical page size * @page_size: physical page size
* @flush_pte: whether to do a PCI flush
* *
* This function does the following: * This function does the following:
* - Check that the virt addr is not mapped * - Check that the virt addr is not mapped
...@@ -919,8 +924,12 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -919,8 +924,12 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* changes the MMU hash, it must be protected by a lock. * changes the MMU hash, it must be protected by a lock.
* However, because it maps only a single page, the lock should be implemented * However, because it maps only a single page, the lock should be implemented
* in a higher level in order to protect the entire mapping of the memory area * in a higher level in order to protect the entire mapping of the memory area
*
* For optimization reasons PCI flush may be requested once after mapping of
* large area.
*/ */
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
bool flush_pte)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
...@@ -976,6 +985,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) ...@@ -976,6 +985,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
mapped_cnt++; mapped_cnt++;
} }
if (flush_pte)
flush(ctx);
return 0; return 0;
err: err:
...@@ -988,6 +1000,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) ...@@ -988,6 +1000,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
real_virt_addr += real_page_size; real_virt_addr += real_page_size;
} }
flush(ctx);
return rc; return rc;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment