Commit 17b2e332 authored by Monk Liu's avatar Monk Liu Committed by Alex Deucher

drm/amdgpu:need som change on vega10 mailbox

if sriov gpu reset is invoked by job timeout, it is run
in a global work-queue which is very slow and better not call
msleep ortherwise it takes long time to get back CPU.

so make below changes:

1: Change msleep 1 to mdelay 5
2: Ignore the ack fail from pf after time out,
   because VF FLR will clear ack, sometime VF FLR is done
   prior to the beginning of poll_ack so we can ignore this ack

TODO:
Put job_timedout (and the following gpu reset) in a driver thread,
instead of the global work_struct.
Signed-off-by: default avatarMonk Liu <Monk.Liu@amd.com>
Reviewed-by: default avatarXiangliang Yu <Xiangliang.Yu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3af906f0
...@@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev) ...@@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
r = -ETIME; r = -ETIME;
break; break;
} }
msleep(1); mdelay(5);
timeout -= 1; timeout -= 5;
reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_PF0_MAILBOX_CONTROL)); mmBIF_BX_PF0_MAILBOX_CONTROL));
...@@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) ...@@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
r = xgpu_ai_mailbox_rcv_msg(adev, event); r = xgpu_ai_mailbox_rcv_msg(adev, event);
while (r) { while (r) {
if (timeout <= 0) { if (timeout <= 0) {
pr_err("Doesn't get ack from pf.\n"); pr_err("Doesn't get msg:%d from pf.\n", event);
r = -ETIME; r = -ETIME;
break; break;
} }
msleep(1); mdelay(5);
timeout -= 1; timeout -= 5;
r = xgpu_ai_mailbox_rcv_msg(adev, event); r = xgpu_ai_mailbox_rcv_msg(adev, event);
} }
...@@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, ...@@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
/* start to poll ack */ /* start to poll ack */
r = xgpu_ai_poll_ack(adev); r = xgpu_ai_poll_ack(adev);
if (r) if (r)
return r; pr_err("Doesn't get ack from pf, continue\n");
xgpu_ai_mailbox_set_valid(adev, false); xgpu_ai_mailbox_set_valid(adev, false);
...@@ -174,9 +174,11 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, ...@@ -174,9 +174,11 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
req == IDH_REQ_GPU_FINI_ACCESS || req == IDH_REQ_GPU_FINI_ACCESS ||
req == IDH_REQ_GPU_RESET_ACCESS) { req == IDH_REQ_GPU_RESET_ACCESS) {
r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
if (r) if (r) {
pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
return r; return r;
} }
}
return 0; return 0;
} }
...@@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev, ...@@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
DRM_DEBUG("get ack intr and do nothing.\n"); printk("get ack intr and do nothing.\n");
return 0; return 0;
} }
......
...@@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) ...@@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
r = -ETIME; r = -ETIME;
break; break;
} }
msleep(1); mdelay(5);
timeout -= 1; timeout -= 5;
reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
} }
...@@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event) ...@@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
r = -ETIME; r = -ETIME;
break; break;
} }
msleep(1); mdelay(5);
timeout -= 1; timeout -= 5;
r = xgpu_vi_mailbox_rcv_msg(adev, event); r = xgpu_vi_mailbox_rcv_msg(adev, event);
} }
...@@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, ...@@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
request == IDH_REQ_GPU_RESET_ACCESS) { request == IDH_REQ_GPU_RESET_ACCESS) {
r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
if (r) if (r)
return r; pr_err("Doesn't get ack from pf, continue\n");
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment