Commit 9a1c1339 authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher

drm/amdkfd: Run restore_workers on freezable WQs

Make restore workers freezable so we don't have to explicitly flush them
in suspend and GPU reset code paths, and we don't accidentally try to
restore BOs while the GPU is suspended. Not having to flush restore_work
also helps avoid lock/fence dependencies in the GPU reset case where we're
not allowed to wait for fences.

A side effect of this is, that we can now have multiple concurrent threads
trying to signal the same eviction fence. Rework eviction fence signaling
and replacement to account for that.

The GPU reset path can no longer rely on restore_process_worker to resume
queues because evict/restore workers can run independently of it. Instead
call a new restore_process_helper directly.

This is an RFC and request for testing.

v2:
- Reworked eviction fence signaling
- Introduced restore_process_helper

v3:
- Handle unsignaled eviction fences in restore_process_bos
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Tested-by: default avatarEmily Deng <Emily.Deng@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4fc26c2f
...@@ -1384,7 +1384,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ...@@ -1384,7 +1384,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
amdgpu_amdkfd_restore_userptr_worker); amdgpu_amdkfd_restore_userptr_worker);
*process_info = info; *process_info = info;
*ef = dma_fence_get(&info->eviction_fence->base);
} }
vm->process_info = *process_info; vm->process_info = *process_info;
...@@ -1415,6 +1414,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ...@@ -1415,6 +1414,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
list_add_tail(&vm->vm_list_node, list_add_tail(&vm->vm_list_node,
&(vm->process_info->vm_list_head)); &(vm->process_info->vm_list_head));
vm->process_info->n_vms++; vm->process_info->n_vms++;
*ef = dma_fence_get(&vm->process_info->eviction_fence->base);
mutex_unlock(&vm->process_info->lock); mutex_unlock(&vm->process_info->lock);
return 0; return 0;
...@@ -1426,10 +1427,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ...@@ -1426,10 +1427,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
reserve_pd_fail: reserve_pd_fail:
vm->process_info = NULL; vm->process_info = NULL;
if (info) { if (info) {
/* Two fence references: one in info and one in *ef */
dma_fence_put(&info->eviction_fence->base); dma_fence_put(&info->eviction_fence->base);
dma_fence_put(*ef);
*ef = NULL;
*process_info = NULL; *process_info = NULL;
put_pid(info->pid); put_pid(info->pid);
create_evict_fence_fail: create_evict_fence_fail:
...@@ -1623,7 +1621,8 @@ int amdgpu_amdkfd_criu_resume(void *p) ...@@ -1623,7 +1621,8 @@ int amdgpu_amdkfd_criu_resume(void *p)
goto out_unlock; goto out_unlock;
} }
WRITE_ONCE(pinfo->block_mmu_notifications, false); WRITE_ONCE(pinfo->block_mmu_notifications, false);
schedule_delayed_work(&pinfo->restore_userptr_work, 0); queue_delayed_work(system_freezable_wq,
&pinfo->restore_userptr_work, 0);
out_unlock: out_unlock:
mutex_unlock(&pinfo->lock); mutex_unlock(&pinfo->lock);
...@@ -2426,7 +2425,8 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, ...@@ -2426,7 +2425,8 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
KFD_QUEUE_EVICTION_TRIGGER_USERPTR); KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
if (r) if (r)
pr_err("Failed to quiesce KFD\n"); pr_err("Failed to quiesce KFD\n");
schedule_delayed_work(&process_info->restore_userptr_work, queue_delayed_work(system_freezable_wq,
&process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
} }
mutex_unlock(&process_info->notifier_lock); mutex_unlock(&process_info->notifier_lock);
...@@ -2749,7 +2749,8 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) ...@@ -2749,7 +2749,8 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
/* If validation failed, reschedule another attempt */ /* If validation failed, reschedule another attempt */
if (evicted_bos) { if (evicted_bos) {
schedule_delayed_work(&process_info->restore_userptr_work, queue_delayed_work(system_freezable_wq,
&process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
kfd_smi_event_queue_restore_rescheduled(mm); kfd_smi_event_queue_restore_rescheduled(mm);
...@@ -2758,6 +2759,23 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) ...@@ -2758,6 +2759,23 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
put_task_struct(usertask); put_task_struct(usertask);
} }
static void replace_eviction_fence(struct dma_fence **ef,
struct dma_fence *new_ef)
{
struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
/* protected by process_info->lock */);
/* If we're replacing an unsignaled eviction fence, that fence will
* never be signaled, and if anyone is still waiting on that fence,
* they will hang forever. This should never happen. We should only
* replace the fence in restore_work that only gets scheduled after
* eviction work signaled the fence.
*/
WARN_ONCE(!dma_fence_is_signaled(old_ef),
"Replacing unsignaled eviction fence");
dma_fence_put(old_ef);
}
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
* KFD process identified by process_info * KFD process identified by process_info
* *
...@@ -2781,7 +2799,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) ...@@ -2781,7 +2799,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
struct amdkfd_process_info *process_info = info; struct amdkfd_process_info *process_info = info;
struct amdgpu_vm *peer_vm; struct amdgpu_vm *peer_vm;
struct kgd_mem *mem; struct kgd_mem *mem;
struct amdgpu_amdkfd_fence *new_fence;
struct list_head duplicate_save; struct list_head duplicate_save;
struct amdgpu_sync sync_obj; struct amdgpu_sync sync_obj;
unsigned long failed_size = 0; unsigned long failed_size = 0;
...@@ -2907,22 +2924,35 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) ...@@ -2907,22 +2924,35 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
/* Wait for validate and PT updates to finish */ /* Wait for validate and PT updates to finish */
amdgpu_sync_wait(&sync_obj, false); amdgpu_sync_wait(&sync_obj, false);
/* Release old eviction fence and create new one, because fence only /* The old eviction fence may be unsignaled if restore happens
* goes from unsignaled to signaled, fence cannot be reused. * after a GPU reset or suspend/resume. Keep the old fence in that
* Use context and mm from the old fence. * case. Otherwise release the old eviction fence and create new
* one, because fence only goes from unsignaled to signaled once
* and cannot be reused. Use context and mm from the old fence.
*
* If an old eviction fence signals after this check, that's OK.
* Anyone signaling an eviction fence must stop the queues first
* and schedule another restore worker.
*/ */
new_fence = amdgpu_amdkfd_fence_create( if (dma_fence_is_signaled(&process_info->eviction_fence->base)) {
struct amdgpu_amdkfd_fence *new_fence =
amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context, process_info->eviction_fence->base.context,
process_info->eviction_fence->mm, process_info->eviction_fence->mm,
NULL); NULL);
if (!new_fence) {
pr_err("Failed to create eviction fence\n"); if (!new_fence) {
ret = -ENOMEM; pr_err("Failed to create eviction fence\n");
goto validate_map_fail; ret = -ENOMEM;
goto validate_map_fail;
}
dma_fence_put(&process_info->eviction_fence->base);
process_info->eviction_fence = new_fence;
replace_eviction_fence(ef, dma_fence_get(&new_fence->base));
} else {
WARN_ONCE(*ef != &process_info->eviction_fence->base,
"KFD eviction fence doesn't match KGD process_info");
} }
dma_fence_put(&process_info->eviction_fence->base);
process_info->eviction_fence = new_fence;
*ef = dma_fence_get(&new_fence->base);
/* Attach new eviction fence to all BOs except pinned ones */ /* Attach new eviction fence to all BOs except pinned ones */
list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
......
...@@ -664,7 +664,8 @@ int kfd_process_create_wq(void) ...@@ -664,7 +664,8 @@ int kfd_process_create_wq(void)
if (!kfd_process_wq) if (!kfd_process_wq)
kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
if (!kfd_restore_wq) if (!kfd_restore_wq)
kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq",
WQ_FREEZABLE);
if (!kfd_process_wq || !kfd_restore_wq) { if (!kfd_process_wq || !kfd_restore_wq) {
kfd_process_destroy_wq(); kfd_process_destroy_wq();
...@@ -1642,6 +1643,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, ...@@ -1642,6 +1643,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct amdgpu_fpriv *drv_priv; struct amdgpu_fpriv *drv_priv;
struct amdgpu_vm *avm; struct amdgpu_vm *avm;
struct kfd_process *p; struct kfd_process *p;
struct dma_fence *ef;
struct kfd_node *dev; struct kfd_node *dev;
int ret; int ret;
...@@ -1661,11 +1663,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, ...@@ -1661,11 +1663,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm, ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
&p->kgd_process_info, &p->kgd_process_info,
&p->ef); &ef);
if (ret) { if (ret) {
pr_err("Failed to create process VM object\n"); pr_err("Failed to create process VM object\n");
return ret; return ret;
} }
RCU_INIT_POINTER(p->ef, ef);
pdd->drm_priv = drm_file->private_data; pdd->drm_priv = drm_file->private_data;
ret = kfd_process_device_reserve_ib_mem(pdd); ret = kfd_process_device_reserve_ib_mem(pdd);
...@@ -1908,6 +1911,21 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, ...@@ -1908,6 +1911,21 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
return -EINVAL; return -EINVAL;
} }
static int signal_eviction_fence(struct kfd_process *p)
{
struct dma_fence *ef;
int ret;
rcu_read_lock();
ef = dma_fence_get_rcu_safe(&p->ef);
rcu_read_unlock();
ret = dma_fence_signal(ef);
dma_fence_put(ef);
return ret;
}
static void evict_process_worker(struct work_struct *work) static void evict_process_worker(struct work_struct *work)
{ {
int ret; int ret;
...@@ -1920,31 +1938,46 @@ static void evict_process_worker(struct work_struct *work) ...@@ -1920,31 +1938,46 @@ static void evict_process_worker(struct work_struct *work)
* lifetime of this thread, kfd_process p will be valid * lifetime of this thread, kfd_process p will be valid
*/ */
p = container_of(dwork, struct kfd_process, eviction_work); p = container_of(dwork, struct kfd_process, eviction_work);
WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
"Eviction fence mismatch\n");
/* Narrow window of overlap between restore and evict work
* item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
* unreserves KFD BOs, it is possible to evicted again. But
* restore has few more steps of finish. So lets wait for any
* previous restore work to complete
*/
flush_delayed_work(&p->restore_work);
pr_debug("Started evicting pasid 0x%x\n", p->pasid); pr_debug("Started evicting pasid 0x%x\n", p->pasid);
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM); ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
if (!ret) { if (!ret) {
dma_fence_signal(p->ef); /* If another thread already signaled the eviction fence,
dma_fence_put(p->ef); * they are responsible stopping the queues and scheduling
p->ef = NULL; * the restore work.
queue_delayed_work(kfd_restore_wq, &p->restore_work, */
if (!signal_eviction_fence(p))
queue_delayed_work(kfd_restore_wq, &p->restore_work,
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
else
kfd_process_restore_queues(p);
pr_debug("Finished evicting pasid 0x%x\n", p->pasid); pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
} else } else
pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
} }
static int restore_process_helper(struct kfd_process *p)
{
int ret = 0;
/* VMs may not have been acquired yet during debugging. */
if (p->kgd_process_info) {
ret = amdgpu_amdkfd_gpuvm_restore_process_bos(
p->kgd_process_info, &p->ef);
if (ret)
return ret;
}
ret = kfd_process_restore_queues(p);
if (!ret)
pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
else
pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
return ret;
}
static void restore_process_worker(struct work_struct *work) static void restore_process_worker(struct work_struct *work)
{ {
struct delayed_work *dwork; struct delayed_work *dwork;
...@@ -1970,24 +2003,15 @@ static void restore_process_worker(struct work_struct *work) ...@@ -1970,24 +2003,15 @@ static void restore_process_worker(struct work_struct *work)
*/ */
p->last_restore_timestamp = get_jiffies_64(); p->last_restore_timestamp = get_jiffies_64();
/* VMs may not have been acquired yet during debugging. */
if (p->kgd_process_info) ret = restore_process_helper(p);
ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
&p->ef);
if (ret) { if (ret) {
pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
p->pasid, PROCESS_BACK_OFF_TIME_MS); p->pasid, PROCESS_BACK_OFF_TIME_MS);
ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
WARN(!ret, "reschedule restore work failed\n"); WARN(!ret, "reschedule restore work failed\n");
return;
} }
ret = kfd_process_restore_queues(p);
if (!ret)
pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
else
pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
} }
void kfd_suspend_all_processes(void) void kfd_suspend_all_processes(void)
...@@ -1998,14 +2022,9 @@ void kfd_suspend_all_processes(void) ...@@ -1998,14 +2022,9 @@ void kfd_suspend_all_processes(void)
WARN(debug_evictions, "Evicting all processes"); WARN(debug_evictions, "Evicting all processes");
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
cancel_delayed_work_sync(&p->eviction_work);
flush_delayed_work(&p->restore_work);
if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND)) if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
pr_err("Failed to suspend process 0x%x\n", p->pasid); pr_err("Failed to suspend process 0x%x\n", p->pasid);
dma_fence_signal(p->ef); signal_eviction_fence(p);
dma_fence_put(p->ef);
p->ef = NULL;
} }
srcu_read_unlock(&kfd_processes_srcu, idx); srcu_read_unlock(&kfd_processes_srcu, idx);
} }
...@@ -2017,7 +2036,7 @@ int kfd_resume_all_processes(void) ...@@ -2017,7 +2036,7 @@ int kfd_resume_all_processes(void)
int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { if (restore_process_helper(p)) {
pr_err("Restore process %d failed during resume\n", pr_err("Restore process %d failed during resume\n",
p->pasid); p->pasid);
ret = -EFAULT; ret = -EFAULT;
......
...@@ -1870,7 +1870,7 @@ static void svm_range_restore_work(struct work_struct *work) ...@@ -1870,7 +1870,7 @@ static void svm_range_restore_work(struct work_struct *work)
/* If validation failed, reschedule another attempt */ /* If validation failed, reschedule another attempt */
if (evicted_ranges) { if (evicted_ranges) {
pr_debug("reschedule to restore svm range\n"); pr_debug("reschedule to restore svm range\n");
schedule_delayed_work(&svms->restore_work, queue_delayed_work(system_freezable_wq, &svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
kfd_smi_event_queue_restore_rescheduled(mm); kfd_smi_event_queue_restore_rescheduled(mm);
...@@ -1946,7 +1946,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, ...@@ -1946,7 +1946,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
pr_debug("failed to quiesce KFD\n"); pr_debug("failed to quiesce KFD\n");
pr_debug("schedule to restore svm %p ranges\n", svms); pr_debug("schedule to restore svm %p ranges\n", svms);
schedule_delayed_work(&svms->restore_work, queue_delayed_work(system_freezable_wq, &svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else { } else {
unsigned long s, l; unsigned long s, l;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment