Commit 9527b9ca authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher

drm/amdkfd: evict svm bo worker handle error

Migrate vram to ram may fail to find the vma if process is exiting
and vma is removed, evict svm bo worker sets prange->svm_bo to NULL
and warn svm_bo ref count != 1 only if migrating vram to ram
successfully.
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4e2f50e2
...@@ -638,6 +638,22 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -638,6 +638,22 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
return r; return r;
} }
/**
* svm_migrate_vma_to_ram - migrate range inside one vma from device to system
*
* @adev: amdgpu device to migrate from
* @prange: svm range structure
* @vma: vm_area_struct that range [start, end] belongs to
* @start: range start virtual address in pages
* @end: range end virtual address in pages
*
* Context: Process context, caller hold mmap read lock, prange->migrate_mutex
*
* Return:
* 0 - success with all pages migrated
* negative values - indicate error
* positive values - partial migration, number of pages not migrated
*/
static long static long
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, uint64_t end) struct vm_area_struct *vma, uint64_t start, uint64_t end)
...@@ -709,8 +725,6 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -709,8 +725,6 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
pdd = svm_range_get_pdd_by_adev(prange, adev); pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd) if (pdd)
WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
return upages;
} }
return r ? r : upages; return r ? r : upages;
} }
...@@ -720,7 +734,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -720,7 +734,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
* @prange: range structure * @prange: range structure
* @mm: process mm, use current->mm if NULL * @mm: process mm, use current->mm if NULL
* *
* Context: Process context, caller hold mmap read lock, svms lock, prange lock * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
* *
* Return: * Return:
* 0 - OK, otherwise error code * 0 - OK, otherwise error code
...@@ -759,13 +773,16 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) ...@@ -759,13 +773,16 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
unsigned long next; unsigned long next;
vma = find_vma(mm, addr); vma = find_vma(mm, addr);
if (!vma || addr < vma->vm_start) if (!vma || addr < vma->vm_start) {
pr_debug("failed to find vma for prange %p\n", prange);
r = -EFAULT;
break; break;
}
next = min(vma->vm_end, end); next = min(vma->vm_end, end);
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
if (r < 0) { if (r < 0) {
pr_debug("failed %ld to migrate\n", r); pr_debug("failed %ld to migrate prange %p\n", r, prange);
break; break;
} else { } else {
upages += r; upages += r;
...@@ -773,7 +790,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) ...@@ -773,7 +790,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
addr = next; addr = next;
} }
if (!upages) { if (r >= 0 && !upages) {
svm_range_vram_node_free(prange); svm_range_vram_node_free(prange);
prange->actual_loc = 0; prange->actual_loc = 0;
} }
......
...@@ -3155,6 +3155,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) ...@@ -3155,6 +3155,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
struct svm_range_bo *svm_bo; struct svm_range_bo *svm_bo;
struct kfd_process *p; struct kfd_process *p;
struct mm_struct *mm; struct mm_struct *mm;
int r = 0;
svm_bo = container_of(work, struct svm_range_bo, eviction_work); svm_bo = container_of(work, struct svm_range_bo, eviction_work);
if (!svm_bo_ref_unless_zero(svm_bo)) if (!svm_bo_ref_unless_zero(svm_bo))
...@@ -3170,7 +3171,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) ...@@ -3170,7 +3171,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mmap_read_lock(mm); mmap_read_lock(mm);
spin_lock(&svm_bo->list_lock); spin_lock(&svm_bo->list_lock);
while (!list_empty(&svm_bo->range_list)) { while (!list_empty(&svm_bo->range_list) && !r) {
struct svm_range *prange = struct svm_range *prange =
list_first_entry(&svm_bo->range_list, list_first_entry(&svm_bo->range_list,
struct svm_range, svm_bo_list); struct svm_range, svm_bo_list);
...@@ -3184,15 +3185,18 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) ...@@ -3184,15 +3185,18 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex); mutex_lock(&prange->migrate_mutex);
do { do {
svm_migrate_vram_to_ram(prange, r = svm_migrate_vram_to_ram(prange,
svm_bo->eviction_fence->mm); svm_bo->eviction_fence->mm);
} while (prange->actual_loc && --retries); } while (!r && prange->actual_loc && --retries);
WARN(prange->actual_loc, "Migration failed during eviction");
mutex_lock(&prange->lock); if (!r && prange->actual_loc)
prange->svm_bo = NULL; pr_info_once("Migration failed during eviction");
mutex_unlock(&prange->lock);
if (!prange->actual_loc) {
mutex_lock(&prange->lock);
prange->svm_bo = NULL;
mutex_unlock(&prange->lock);
}
mutex_unlock(&prange->migrate_mutex); mutex_unlock(&prange->migrate_mutex);
spin_lock(&svm_bo->list_lock); spin_lock(&svm_bo->list_lock);
...@@ -3201,10 +3205,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) ...@@ -3201,10 +3205,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mmap_read_unlock(mm); mmap_read_unlock(mm);
dma_fence_signal(&svm_bo->eviction_fence->base); dma_fence_signal(&svm_bo->eviction_fence->base);
/* This is the last reference to svm_bo, after svm_range_vram_node_free /* This is the last reference to svm_bo, after svm_range_vram_node_free
* has been called in svm_migrate_vram_to_ram * has been called in svm_migrate_vram_to_ram
*/ */
WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n"); WARN_ONCE(!r && kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
svm_range_bo_unref(svm_bo); svm_range_bo_unref(svm_bo);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment