Commit bff77e86 authored by Le Ma's avatar Le Ma Committed by Alex Deucher

drm/amdgpu: bypass some cleanup work after err_event_athub (v2)

PSP lost connection when err_event_athub occurs. These cleanup work can be
skipped in BACO reset.

v2: squash in missing include (Alex)
Signed-off-by: default avatarLe Ma <le.ma@amd.com>
Reviewed-by: default avatarHawking Zhang <hawking.zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8baaadba
...@@ -2271,6 +2271,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) ...@@ -2271,6 +2271,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
/* displays are handled in phase1 */ /* displays are handled in phase1 */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
continue; continue;
/* PSP lost connection when err_event_athub occurs */
if (amdgpu_ras_intr_triggered() &&
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
adev->ip_blocks[i].status.hw = false;
continue;
}
/* XXX handle errors */ /* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev); r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */ /* XXX handle errors */
......
...@@ -34,6 +34,8 @@ ...@@ -34,6 +34,8 @@
#include "psp_v11_0.h" #include "psp_v11_0.h"
#include "psp_v12_0.h" #include "psp_v12_0.h"
#include "amdgpu_ras.h"
static void psp_set_funcs(struct amdgpu_device *adev); static void psp_set_funcs(struct amdgpu_device *adev);
static int psp_early_init(void *handle) static int psp_early_init(void *handle)
...@@ -167,6 +169,13 @@ psp_cmd_submit_buf(struct psp_context *psp, ...@@ -167,6 +169,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
while (*((unsigned int *)psp->fence_buf) != index) { while (*((unsigned int *)psp->fence_buf) != index) {
if (--timeout == 0) if (--timeout == 0)
break; break;
/*
* Shouldn't wait for timeout when err_event_athub occurs,
* because gpu reset thread triggered and lock resource should
* be released for psp resume sequence.
*/
if (amdgpu_ras_intr_triggered())
break;
msleep(1); msleep(1);
amdgpu_asic_invalidate_hdp(psp->adev, NULL); amdgpu_asic_invalidate_hdp(psp->adev, NULL);
} }
......
...@@ -558,15 +558,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, ...@@ -558,15 +558,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
return 0; return 0;
ret = psp_ras_enable_features(&adev->psp, &info, enable); if (!amdgpu_ras_intr_triggered()) {
if (ret) { ret = psp_ras_enable_features(&adev->psp, &info, enable);
DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n", if (ret) {
enable ? "enable":"disable", DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
ras_block_str(head->block), enable ? "enable":"disable",
ret); ras_block_str(head->block),
if (ret == TA_RAS_STATUS__RESET_NEEDED) ret);
return -EAGAIN; if (ret == TA_RAS_STATUS__RESET_NEEDED)
return -EINVAL; return -EAGAIN;
return -EINVAL;
}
} }
/* setup the obj */ /* setup the obj */
......
...@@ -3736,8 +3736,10 @@ static int gfx_v9_0_hw_fini(void *handle) ...@@ -3736,8 +3736,10 @@ static int gfx_v9_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
/* disable KCQ to avoid CPC touch memory not valid anymore */ /* DF freeze and kcq disable will fail */
gfx_v9_0_kcq_disable(adev); if (!amdgpu_ras_intr_triggered())
/* disable KCQ to avoid CPC touch memory not valid anymore */
gfx_v9_0_kcq_disable(adev);
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false); gfx_v9_0_cp_gfx_enable(adev, false);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment