Commit 077bd800 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-msm-next-2022-11-28' of https://gitlab.freedesktop.org/drm/msm into drm-next

msm-next for v6.2 (the gpu/gem bits)

- Remove exclusive-fence hack that caused over-synchronization
- Fix speed-bin detection vs. probe-defer
- Enable clamp_to_idle on 7c3
- Improved hangcheck detection
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Rob Clark <robdclark@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/CAF6AEGvT1h_S4d=YRgphgR8i7aMaxQaNW8mru7QaoUo9uiUk2A@mail.gmail.com
parents 92e11ddb d73b1d02
...@@ -606,8 +606,7 @@ static int a4xx_pm_suspend(struct msm_gpu *gpu) { ...@@ -606,8 +606,7 @@ static int a4xx_pm_suspend(struct msm_gpu *gpu) {
static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
{ {
*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO, *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);
REG_A4XX_RBBM_PERFCTR_CP_0_HI);
return 0; return 0;
} }
......
...@@ -605,11 +605,9 @@ static int a5xx_ucode_init(struct msm_gpu *gpu) ...@@ -605,11 +605,9 @@ static int a5xx_ucode_init(struct msm_gpu *gpu)
a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo); a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
} }
gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);
REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);
REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
return 0; return 0;
} }
...@@ -868,8 +866,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) ...@@ -868,8 +866,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
* memory rendering at this point in time and we don't want to block off * memory rendering at this point in time and we don't want to block off
* part of the virtual memory space. * part of the virtual memory space.
*/ */
gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
/* Put the GPU into 64 bit by default */ /* Put the GPU into 64 bit by default */
...@@ -908,8 +905,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) ...@@ -908,8 +905,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
return ret; return ret;
/* Set the ringbuffer address */ /* Set the ringbuffer address */
gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI, gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);
gpu->rb[0]->iova);
/* /*
* If the microcode supports the WHERE_AM_I opcode then we can use that * If the microcode supports the WHERE_AM_I opcode then we can use that
...@@ -936,7 +932,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) ...@@ -936,7 +932,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
} }
gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR, gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0])); shadowptr(a5xx_gpu, gpu->rb[0]));
} else if (gpu->nr_rings > 1) { } else if (gpu->nr_rings > 1) {
/* Disable preemption if WHERE_AM_I isn't available */ /* Disable preemption if WHERE_AM_I isn't available */
a5xx_preempt_fini(gpu); a5xx_preempt_fini(gpu);
...@@ -1239,9 +1235,9 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) ...@@ -1239,9 +1235,9 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_RBBM_STATUS),
gpu_read(gpu, REG_A5XX_CP_RB_RPTR), gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
gpu_read(gpu, REG_A5XX_CP_RB_WPTR), gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
/* Turn off the hangcheck timer to keep it from bothering us */ /* Turn off the hangcheck timer to keep it from bothering us */
...@@ -1427,8 +1423,7 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu) ...@@ -1427,8 +1423,7 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu)
static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
{ {
*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO, *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);
REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
return 0; return 0;
} }
...@@ -1465,8 +1460,7 @@ static int a5xx_crashdumper_run(struct msm_gpu *gpu, ...@@ -1465,8 +1460,7 @@ static int a5xx_crashdumper_run(struct msm_gpu *gpu,
if (IS_ERR_OR_NULL(dumper->ptr)) if (IS_ERR_OR_NULL(dumper->ptr))
return -EINVAL; return -EINVAL;
gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1); gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
...@@ -1666,8 +1660,7 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) ...@@ -1666,8 +1660,7 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
{ {
u64 busy_cycles; u64 busy_cycles;
busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
*out_sample_rate = clk_get_rate(gpu->core_clk); *out_sample_rate = clk_get_rate(gpu->core_clk);
return busy_cycles; return busy_cycles;
......
...@@ -137,7 +137,6 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) ...@@ -137,7 +137,6 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
/* Set the address of the incoming preemption record */ /* Set the address of the incoming preemption record */
gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
a5xx_gpu->preempt_iova[ring->id]); a5xx_gpu->preempt_iova[ring->id]);
a5xx_gpu->next_ring = ring; a5xx_gpu->next_ring = ring;
...@@ -211,8 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu) ...@@ -211,8 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
} }
/* Write a 0 to signal that we aren't switching pagetables */ /* Write a 0 to signal that we aren't switching pagetables */
gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, 0);
REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
/* Reset the preemption state */ /* Reset the preemption state */
set_preempt_state(a5xx_gpu, PREEMPT_NONE); set_preempt_state(a5xx_gpu, PREEMPT_NONE);
......
...@@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) ...@@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_RING(ring, submit->seqno); OUT_RING(ring, submit->seqno);
trace_msm_gpu_submit_flush(submit, trace_msm_gpu_submit_flush(submit,
gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO, gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO));
REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
a6xx_flush(gpu, ring); a6xx_flush(gpu, ring);
} }
...@@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu) ...@@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
} }
} }
gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova);
return 0; return 0;
} }
...@@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu) ...@@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu)
* memory rendering at this point in time and we don't want to block off * memory rendering at this point in time and we don't want to block off
* part of the virtual memory space. * part of the virtual memory space.
*/ */
gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
/* Turn on 64 bit addressing for all blocks */ /* Turn on 64 bit addressing for all blocks */
...@@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu) ...@@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu)
if (!adreno_is_a650_family(adreno_gpu)) { if (!adreno_is_a650_family(adreno_gpu)) {
/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO, gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
0x00100000 + adreno_gpu->gmem - 1); 0x00100000 + adreno_gpu->gmem - 1);
} }
...@@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu) ...@@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu)
goto out; goto out;
/* Set the ringbuffer address */ /* Set the ringbuffer address */
gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI, gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
gpu->rb[0]->iova);
/* Targets that support extended APRIV can use the RPTR shadow from /* Targets that support extended APRIV can use the RPTR shadow from
* hardware but all the other ones need to disable the feature. Targets * hardware but all the other ones need to disable the feature. Targets
...@@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu) ...@@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu)
} }
gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO, gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
REG_A6XX_CP_RB_RPTR_ADDR_HI,
shadowptr(a6xx_gpu, gpu->rb[0])); shadowptr(a6xx_gpu, gpu->rb[0]));
} }
...@@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) ...@@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
gpu_read(gpu, REG_A6XX_RBBM_STATUS), gpu_read(gpu, REG_A6XX_RBBM_STATUS),
gpu_read(gpu, REG_A6XX_CP_RB_RPTR), gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
gpu_read(gpu, REG_A6XX_CP_RB_WPTR), gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI), gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI), gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
/* Turn off the hangcheck timer to keep it from bothering us */ /* Turn off the hangcheck timer to keep it from bothering us */
...@@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) ...@@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
/* Force the GPU power on so we can read this register */ /* Force the GPU power on so we can read this register */
a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO, *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO);
REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
...@@ -1824,6 +1816,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) ...@@ -1824,6 +1816,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
} }
static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct msm_cp_state cp_state = {
.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
.ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
.ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
};
bool progress;
/*
* Adjust the remaining data to account for what has already been
* fetched from memory, but not yet consumed by the SQE.
*
* This is not *technically* correct, the amount buffered could
* exceed the IB size due to hw prefetching ahead, but:
*
* (1) We aren't trying to find the exact position, just whether
* progress has been made
* (2) The CP_REG_TO_MEM at the end of a submit should be enough
* to prevent prefetching into an unrelated submit. (And
* either way, at some point the ROQ will be full.)
*/
cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16;
progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
ring->last_cp_state = cp_state;
return progress;
}
static u32 a618_get_speed_bin(u32 fuse) static u32 a618_get_speed_bin(u32 fuse)
{ {
if (fuse == 0) if (fuse == 0)
...@@ -1879,7 +1904,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) ...@@ -1879,7 +1904,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
if (val == UINT_MAX) { if (val == UINT_MAX) {
DRM_DEV_ERROR(dev, DRM_DEV_ERROR(dev,
"missing support for speed-bin: %u. Some OPPs may not be supported by hardware", "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
fuse); fuse);
return UINT_MAX; return UINT_MAX;
} }
...@@ -1889,7 +1914,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) ...@@ -1889,7 +1914,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
{ {
u32 supp_hw = UINT_MAX; u32 supp_hw;
u32 speedbin; u32 speedbin;
int ret; int ret;
...@@ -1901,15 +1926,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) ...@@ -1901,15 +1926,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
if (ret == -ENOENT) { if (ret == -ENOENT) {
return 0; return 0;
} else if (ret) { } else if (ret) {
DRM_DEV_ERROR(dev, dev_err_probe(dev, ret,
"failed to read speed-bin (%d). Some OPPs may not be supported by hardware", "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
ret); return ret;
goto done;
} }
supp_hw = fuse_to_supp_hw(dev, rev, speedbin); supp_hw = fuse_to_supp_hw(dev, rev, speedbin);
done:
ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
if (ret) if (ret)
return ret; return ret;
...@@ -1942,6 +1965,7 @@ static const struct adreno_gpu_funcs funcs = { ...@@ -1942,6 +1965,7 @@ static const struct adreno_gpu_funcs funcs = {
.create_address_space = a6xx_create_address_space, .create_address_space = a6xx_create_address_space,
.create_private_address_space = a6xx_create_private_address_space, .create_private_address_space = a6xx_create_private_address_space,
.get_rptr = a6xx_get_rptr, .get_rptr = a6xx_get_rptr,
.progress = a6xx_progress,
}, },
.get_timestamp = a6xx_get_timestamp, .get_timestamp = a6xx_get_timestamp,
}; };
...@@ -1978,13 +2002,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) ...@@ -1978,13 +2002,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev))) adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev)))
adreno_gpu->base.hw_apriv = true; adreno_gpu->base.hw_apriv = true;
/*
* For now only clamp to idle freq for devices where this is known not
* to cause power supply issues:
*/
if (info && (info->revn == 618))
gpu->clamp_to_idle = true;
a6xx_llc_slices_init(pdev, a6xx_gpu); a6xx_llc_slices_init(pdev, a6xx_gpu);
ret = a6xx_set_supported_hw(&pdev->dev, config->rev); ret = a6xx_set_supported_hw(&pdev->dev, config->rev);
...@@ -1999,6 +2016,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) ...@@ -1999,6 +2016,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
return ERR_PTR(ret); return ERR_PTR(ret);
} }
/*
* For now only clamp to idle freq for devices where this is known not
* to cause power supply issues:
*/
if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
gpu->clamp_to_idle = true;
/* Check if there is a GMU phandle and set it up */ /* Check if there is a GMU phandle and set it up */
node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
......
...@@ -147,8 +147,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu, ...@@ -147,8 +147,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu,
/* Make sure all pending memory writes are posted */ /* Make sure all pending memory writes are posted */
wmb(); wmb();
gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
......
...@@ -418,7 +418,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) ...@@ -418,7 +418,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
priv->dev = ddev; priv->dev = ddev;
priv->wq = alloc_ordered_workqueue("msm", 0); priv->wq = alloc_ordered_workqueue("msm", 0);
priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
INIT_LIST_HEAD(&priv->objects); INIT_LIST_HEAD(&priv->objects);
mutex_init(&priv->obj_lock); mutex_init(&priv->obj_lock);
......
...@@ -224,7 +224,13 @@ struct msm_drm_private { ...@@ -224,7 +224,13 @@ struct msm_drm_private {
struct drm_atomic_state *pm_state; struct drm_atomic_state *pm_state;
/* For hang detection, in ms */ /**
* hangcheck_period: For hang detection, in ms
*
* Note that in practice, a submit/job will get at least two hangcheck
* periods, due to checking for progress being implemented as simply
* "have the CP position registers changed since last time?"
*/
unsigned int hangcheck_period; unsigned int hangcheck_period;
/** /**
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
/* Default disabled for now until it has some more testing on the different /* Default disabled for now until it has some more testing on the different
* iommu combinations that can be paired with the driver: * iommu combinations that can be paired with the driver:
*/ */
static bool enable_eviction = false; static bool enable_eviction = true;
MODULE_PARM_DESC(enable_eviction, "Enable swappable GEM buffers"); MODULE_PARM_DESC(enable_eviction, "Enable swappable GEM buffers");
module_param(enable_eviction, bool, 0600); module_param(enable_eviction, bool, 0600);
......
...@@ -334,8 +334,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) ...@@ -334,8 +334,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
if (ret) if (ret)
return ret; return ret;
/* exclusive fences must be ordered */ if (no_implicit)
if (no_implicit && !write)
continue; continue;
ret = drm_sched_job_add_implicit_dependencies(&submit->base, ret = drm_sched_job_add_implicit_dependencies(&submit->base,
......
...@@ -492,6 +492,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu) ...@@ -492,6 +492,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu)
round_jiffies_up(jiffies + msecs_to_jiffies(priv->hangcheck_period))); round_jiffies_up(jiffies + msecs_to_jiffies(priv->hangcheck_period)));
} }
static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
if (ring->hangcheck_progress_retries >= DRM_MSM_HANGCHECK_PROGRESS_RETRIES)
return false;
if (!gpu->funcs->progress)
return false;
if (!gpu->funcs->progress(gpu, ring))
return false;
ring->hangcheck_progress_retries++;
return true;
}
static void hangcheck_handler(struct timer_list *t) static void hangcheck_handler(struct timer_list *t)
{ {
struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer); struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
...@@ -502,9 +517,12 @@ static void hangcheck_handler(struct timer_list *t) ...@@ -502,9 +517,12 @@ static void hangcheck_handler(struct timer_list *t)
if (fence != ring->hangcheck_fence) { if (fence != ring->hangcheck_fence) {
/* some progress has been made.. ya! */ /* some progress has been made.. ya! */
ring->hangcheck_fence = fence; ring->hangcheck_fence = fence;
} else if (fence_before(fence, ring->fctx->last_fence)) { ring->hangcheck_progress_retries = 0;
} else if (fence_before(fence, ring->fctx->last_fence) &&
!made_progress(gpu, ring)) {
/* no progress and not done.. hung! */ /* no progress and not done.. hung! */
ring->hangcheck_fence = fence; ring->hangcheck_fence = fence;
ring->hangcheck_progress_retries = 0;
DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
gpu->name, ring->id); gpu->name, ring->id);
DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n", DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n",
...@@ -830,6 +848,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, ...@@ -830,6 +848,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
const char *name, struct msm_gpu_config *config) const char *name, struct msm_gpu_config *config)
{ {
struct msm_drm_private *priv = drm->dev_private;
int i, ret, nr_rings = config->nr_rings; int i, ret, nr_rings = config->nr_rings;
void *memptrs; void *memptrs;
uint64_t memptrs_iova; uint64_t memptrs_iova;
...@@ -857,6 +876,16 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, ...@@ -857,6 +876,16 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
kthread_init_work(&gpu->recover_work, recover_worker); kthread_init_work(&gpu->recover_work, recover_worker);
kthread_init_work(&gpu->fault_work, fault_worker); kthread_init_work(&gpu->fault_work, fault_worker);
priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
/*
* If progress detection is supported, halve the hangcheck timer
* duration, as it takes two iterations of the hangcheck handler
* to detect a hang.
*/
if (funcs->progress)
priv->hangcheck_period /= 2;
timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
spin_lock_init(&gpu->perf_lock); spin_lock_init(&gpu->perf_lock);
......
...@@ -78,6 +78,15 @@ struct msm_gpu_funcs { ...@@ -78,6 +78,15 @@ struct msm_gpu_funcs {
struct msm_gem_address_space *(*create_private_address_space) struct msm_gem_address_space *(*create_private_address_space)
(struct msm_gpu *gpu); (struct msm_gpu *gpu);
uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
/**
* progress: Has the GPU made progress?
*
* Return true if GPU position in cmdstream has advanced (or changed)
* since the last call. To avoid false negatives, this should account
* for cmdstream that is buffered in this FIFO upstream of the CP fw.
*/
bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
}; };
/* Additional state for iommu faults: */ /* Additional state for iommu faults: */
...@@ -237,6 +246,7 @@ struct msm_gpu { ...@@ -237,6 +246,7 @@ struct msm_gpu {
#define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */
#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
struct timer_list hangcheck_timer; struct timer_list hangcheck_timer;
/* Fault info for most recent iova fault: */ /* Fault info for most recent iova fault: */
...@@ -540,7 +550,7 @@ static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) ...@@ -540,7 +550,7 @@ static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
msm_rmw(gpu->mmio + (reg << 2), mask, or); msm_rmw(gpu->mmio + (reg << 2), mask, or);
} }
static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg)
{ {
u64 val; u64 val;
...@@ -558,17 +568,17 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) ...@@ -558,17 +568,17 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
* when the lo is read, so make sure to read the lo first to trigger * when the lo is read, so make sure to read the lo first to trigger
* that * that
*/ */
val = (u64) msm_readl(gpu->mmio + (lo << 2)); val = (u64) msm_readl(gpu->mmio + (reg << 2));
val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32); val |= ((u64) msm_readl(gpu->mmio + ((reg + 1) << 2)) << 32);
return val; return val;
} }
static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val)
{ {
/* Why not a writeq here? Read the screed above */ /* Why not a writeq here? Read the screed above */
msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2)); msm_writel(lower_32_bits(val), gpu->mmio + (reg << 2));
msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2)); msm_writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2));
} }
int msm_gpu_pm_suspend(struct msm_gpu *gpu); int msm_gpu_pm_suspend(struct msm_gpu *gpu);
......
...@@ -35,6 +35,11 @@ struct msm_rbmemptrs { ...@@ -35,6 +35,11 @@ struct msm_rbmemptrs {
volatile u64 ttbr0; volatile u64 ttbr0;
}; };
struct msm_cp_state {
uint64_t ib1_base, ib2_base;
uint32_t ib1_rem, ib2_rem;
};
struct msm_ringbuffer { struct msm_ringbuffer {
struct msm_gpu *gpu; struct msm_gpu *gpu;
int id; int id;
...@@ -64,6 +69,29 @@ struct msm_ringbuffer { ...@@ -64,6 +69,29 @@ struct msm_ringbuffer {
uint64_t memptrs_iova; uint64_t memptrs_iova;
struct msm_fence_context *fctx; struct msm_fence_context *fctx;
/**
* hangcheck_progress_retries:
*
* The number of extra hangcheck duration cycles that we have given
* due to it appearing that the GPU is making forward progress.
*
* For GPU generations which support progress detection (see.
* msm_gpu_funcs::progress()), if the GPU appears to be making progress
* (ie. the CP has advanced in the command stream, we'll allow up to
* DRM_MSM_HANGCHECK_PROGRESS_RETRIES expirations of the hangcheck timer
* before killing the job. But to detect progress we need two sample
* points, so the duration of the hangcheck timer is halved. In other
* words we'll let the submit run for up to:
*
* (DRM_MSM_HANGCHECK_DEFAULT_PERIOD / 2) * (DRM_MSM_HANGCHECK_PROGRESS_RETRIES + 1)
*/
int hangcheck_progress_retries;
/**
* last_cp_state: The state of the CP at the last call to gpu->progress()
*/
struct msm_cp_state last_cp_state;
/* /*
* preempt_lock protects preemption and serializes wptr updates against * preempt_lock protects preemption and serializes wptr updates against
* preemption. Can be aquired from irq context. * preemption. Can be aquired from irq context.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment