Commit 7225f873 authored by Monk Liu's avatar Monk Liu Committed by Alex Deucher

drm/amdgpu:use job* to replace voluntary

that way we can know which job cause hang and
can do per sched reset/recovery instead of all
sched.
Signed-off-by: default avatarMonk Liu <Monk.Liu@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4fbf87e2
...@@ -2609,14 +2609,13 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, ...@@ -2609,14 +2609,13 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
* amdgpu_sriov_gpu_reset - reset the asic * amdgpu_sriov_gpu_reset - reset the asic
* *
* @adev: amdgpu device pointer * @adev: amdgpu device pointer
* @voluntary: if this reset is requested by guest. * @job: which job trigger hang
* (true means by guest and false means by HYPERVISOR )
* *
* Attempt the reset the GPU if it has hung (all asics). * Attempt the reset the GPU if it has hung (all asics).
* for SRIOV case. * for SRIOV case.
* Returns 0 for success or an error on failure. * Returns 0 for success or an error on failure.
*/ */
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
{ {
int i, r = 0; int i, r = 0;
int resched; int resched;
...@@ -2646,7 +2645,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) ...@@ -2646,7 +2645,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
amdgpu_fence_driver_force_completion(adev); amdgpu_fence_driver_force_completion(adev);
/* request to take full control of GPU before re-initialization */ /* request to take full control of GPU before re-initialization */
if (voluntary) if (job)
amdgpu_virt_reset_gpu(adev); amdgpu_virt_reset_gpu(adev);
else else
amdgpu_virt_request_full_gpu(adev, true); amdgpu_virt_request_full_gpu(adev, true);
......
...@@ -38,7 +38,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) ...@@ -38,7 +38,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
job->ring->fence_drv.sync_seq); job->ring->fence_drv.sync_seq);
if (amdgpu_sriov_vf(job->adev)) if (amdgpu_sriov_vf(job->adev))
amdgpu_sriov_gpu_reset(job->adev, true); amdgpu_sriov_gpu_reset(job->adev, job);
else else
amdgpu_gpu_reset(job->adev); amdgpu_gpu_reset(job->adev);
} }
......
...@@ -96,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); ...@@ -96,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
......
...@@ -243,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) ...@@ -243,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
} }
/* Trigger recovery due to world switch failure */ /* Trigger recovery due to world switch failure */
amdgpu_sriov_gpu_reset(adev, false); amdgpu_sriov_gpu_reset(adev, NULL);
} }
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
......
...@@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) ...@@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
} }
/* Trigger recovery due to world switch failure */ /* Trigger recovery due to world switch failure */
amdgpu_sriov_gpu_reset(adev, false); amdgpu_sriov_gpu_reset(adev, NULL);
} }
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment