Commit 681260df authored by Andrey Grodzovsky's avatar Andrey Grodzovsky

drm/amdgpu: Drop hive->in_reset

Since we serialize all resets no need to protect from concurrent
resets.
Signed-off-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Link: https://www.spinics.net/lists/amd-gfx/msg74115.html
parent 02599bc7
...@@ -5067,26 +5067,10 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, ...@@ -5067,26 +5067,10 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
dev_info(adev->dev, "GPU %s begin!\n", dev_info(adev->dev, "GPU %s begin!\n",
need_emergency_restart ? "jobs stop":"reset"); need_emergency_restart ? "jobs stop":"reset");
/*
* Here we trylock to avoid chain of resets executing from
* either trigger by jobs on different adevs in XGMI hive or jobs on
* different schedulers for same device while this TO handler is running.
* We always reset all schedulers for device and all devices for XGMI
* hive so that should take care of them too.
*/
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev))
hive = amdgpu_get_xgmi_hive(adev); hive = amdgpu_get_xgmi_hive(adev);
if (hive) { if (hive)
if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
job ? job->base.id : -1, hive->hive_id);
amdgpu_put_xgmi_hive(hive);
if (job && job->vm)
drm_sched_increase_karma(&job->base);
return 0;
}
mutex_lock(&hive->hive_lock); mutex_lock(&hive->hive_lock);
}
reset_context.method = AMD_RESET_METHOD_NONE; reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev; reset_context.reset_req_dev = adev;
...@@ -5282,7 +5266,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, ...@@ -5282,7 +5266,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
skip_recovery: skip_recovery:
if (hive) { if (hive) {
atomic_set(&hive->in_reset, 0);
mutex_unlock(&hive->hive_lock); mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive); amdgpu_put_xgmi_hive(hive);
} }
......
...@@ -410,7 +410,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) ...@@ -410,7 +410,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
INIT_LIST_HEAD(&hive->device_list); INIT_LIST_HEAD(&hive->device_list);
INIT_LIST_HEAD(&hive->node); INIT_LIST_HEAD(&hive->node);
mutex_init(&hive->hive_lock); mutex_init(&hive->hive_lock);
atomic_set(&hive->in_reset, 0);
atomic_set(&hive->number_devices, 0); atomic_set(&hive->number_devices, 0);
task_barrier_init(&hive->tb); task_barrier_init(&hive->tb);
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN; hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
......
...@@ -33,7 +33,6 @@ struct amdgpu_hive_info { ...@@ -33,7 +33,6 @@ struct amdgpu_hive_info {
struct list_head node; struct list_head node;
atomic_t number_devices; atomic_t number_devices;
struct mutex hive_lock; struct mutex hive_lock;
atomic_t in_reset;
int hi_req_count; int hi_req_count;
struct amdgpu_device *hi_req_gpu; struct amdgpu_device *hi_req_gpu;
struct task_barrier tb; struct task_barrier tb;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment