Commit 08c03a19 authored by Alon Mizrahi's avatar Alon Mizrahi Committed by Oded Gabbay

habanalabs: use mmu cache range invalidation

Use mmu cache range invalidation instead of entire cache invalidation
because it yields better performance.

In GOYA and GAUDI, always use entire cache invalidation because these
ASICs don't support range invalidation.
Signed-off-by: default avatarAlon Mizrahi <amizrahi@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent a22f0ec0
......@@ -1035,7 +1035,7 @@ struct hl_asic_funcs {
int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
u32 flags);
int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
u32 asid, u64 va, u64 size);
u32 flags, u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
void (*set_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
......
......@@ -1117,7 +1117,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
goto map_err;
}
rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false,
*vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock);
......@@ -1261,8 +1262,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
* at the loop end rather than for each iteration
*/
if (!ctx_free)
rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
*vm_type);
rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true,
*vm_type, ctx->asid, vaddr,
phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock);
......
......@@ -7862,52 +7862,13 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
}
static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
bool is_hard, u32 asid, u64 va, u64 size)
bool is_hard, u32 flags,
u32 asid, u64 va, u64 size)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 status, timeout_usec;
u32 inv_data;
u32 pi;
int rc;
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
hdev->hard_reset_pending)
return 0;
if (hdev->pldm)
timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
else
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
/*
* TODO: currently invalidate entire L0 & L1 as in regular hard
* invalidation. Need to apply invalidation of specific cache
* lines with mask of ASID & VA & size.
* Note that L1 with be flushed entirely in any case.
/* Treat as invalidate all because there is no range invalidation
* in Gaudi
*/
/* L0 & L1 invalidation */
inv_data = RREG32(mmSTLB_CACHE_INV);
/* PI is 8 bit */
pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
WREG32(mmSTLB_CACHE_INV,
(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
rc = hl_poll_timeout(
hdev,
mmSTLB_INV_CONSUMER_INDEX,
status,
status == pi,
1000,
timeout_usec);
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
hl_device_reset(hdev, HL_RESET_HARD);
}
return rc;
return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
}
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
......
......@@ -5178,54 +5178,13 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
}
static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
bool is_hard, u32 asid, u64 va, u64 size)
bool is_hard, u32 flags,
u32 asid, u64 va, u64 size)
{
struct goya_device *goya = hdev->asic_specific;
u32 status, timeout_usec, inv_data, pi;
int rc;
if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
hdev->hard_reset_pending)
return 0;
/* no need in L1 only invalidation in Goya */
if (!is_hard)
return 0;
if (hdev->pldm)
timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
else
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
/*
* TODO: currently invalidate entire L0 & L1 as in regular hard
* invalidation. Need to apply invalidation of specific cache lines with
* mask of ASID & VA & size.
* Note that L1 with be flushed entirely in any case.
/* Treat as invalidate all because there is no range invalidation
* in Goya
*/
/* L0 & L1 invalidation */
inv_data = RREG32(mmSTLB_CACHE_INV);
/* PI is 8 bit */
pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
WREG32(mmSTLB_CACHE_INV,
(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
rc = hl_poll_timeout(
hdev,
mmSTLB_INV_CONSUMER_INDEX,
status,
status == pi,
1000,
timeout_usec);
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
hl_device_reset(hdev, HL_RESET_HARD);
}
return rc;
return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
}
int goya_send_heartbeat(struct hl_device *hdev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment