Commit eb3c357b authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher

drm/amdkfd: Handle errors from svm validate and map

If new range is splited to multiple pranges with max_svm_range_pages
alignment and added to update_list, svm validate and map should keep
going after error to make sure prange->mapped_to_gpu flag is up to date
for the whole range.

svm validate and map update set prange->mapped_to_gpu after mapping to
GPUs successfully, otherwise clear prange->mapped_to_gpu flag (for
update mapping case) instead of setting error flag, we can remove
the redundant error flag to simpliy code.

Refactor to remove goto and update prange->mapped_to_gpu flag inside
svm_range_lock, to guarant we always evict queues or unmap from GPUs if
there are invalid ranges.

After svm validate and map return error -EAGIN, the caller retry will
update the mapping for the whole range again.

Fixes: c22b0440 ("drm/amdkfd: flag added to handle errors from svm validate and map")
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Tested-by: default avatarJames Zhu <james.zhu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 21e43386
...@@ -829,7 +829,7 @@ svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange, ...@@ -829,7 +829,7 @@ svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
} }
} }
return !prange->is_error_flag; return true;
} }
/** /**
...@@ -1679,71 +1679,66 @@ static int svm_range_validate_and_map(struct mm_struct *mm, ...@@ -1679,71 +1679,66 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
start = prange->start << PAGE_SHIFT; start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT;
for (addr = start; addr < end && !r; ) { for (addr = start; !r && addr < end; ) {
struct hmm_range *hmm_range; struct hmm_range *hmm_range;
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long next; unsigned long next = 0;
unsigned long offset; unsigned long offset;
unsigned long npages; unsigned long npages;
bool readonly; bool readonly;
vma = vma_lookup(mm, addr); vma = vma_lookup(mm, addr);
if (!vma) { if (vma) {
readonly = !(vma->vm_flags & VM_WRITE);
next = min(vma->vm_end, end);
npages = (next - addr) >> PAGE_SHIFT;
WRITE_ONCE(p->svms.faulting_task, current);
r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
readonly, owner, NULL,
&hmm_range);
WRITE_ONCE(p->svms.faulting_task, NULL);
if (r) {
pr_debug("failed %d to get svm range pages\n", r);
if (r == -EBUSY)
r = -EAGAIN;
}
} else {
r = -EFAULT; r = -EFAULT;
goto unreserve_out;
}
readonly = !(vma->vm_flags & VM_WRITE);
next = min(vma->vm_end, end);
npages = (next - addr) >> PAGE_SHIFT;
WRITE_ONCE(p->svms.faulting_task, current);
r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
readonly, owner, NULL,
&hmm_range);
WRITE_ONCE(p->svms.faulting_task, NULL);
if (r) {
pr_debug("failed %d to get svm range pages\n", r);
if (r == -EBUSY)
r = -EAGAIN;
goto unreserve_out;
} }
offset = (addr - start) >> PAGE_SHIFT; if (!r) {
r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, offset = (addr - start) >> PAGE_SHIFT;
hmm_range->hmm_pfns); r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
if (r) { hmm_range->hmm_pfns);
pr_debug("failed %d to dma map range\n", r); if (r)
goto unreserve_out; pr_debug("failed %d to dma map range\n", r);
} }
svm_range_lock(prange); svm_range_lock(prange);
if (amdgpu_hmm_range_get_pages_done(hmm_range)) { if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) {
pr_debug("hmm update the range, need validate again\n"); pr_debug("hmm update the range, need validate again\n");
r = -EAGAIN; r = -EAGAIN;
goto unlock_out;
} }
if (!list_empty(&prange->child_list)) {
if (!r && !list_empty(&prange->child_list)) {
pr_debug("range split by unmap in parallel, validate again\n"); pr_debug("range split by unmap in parallel, validate again\n");
r = -EAGAIN; r = -EAGAIN;
goto unlock_out;
} }
r = svm_range_map_to_gpus(prange, offset, npages, readonly, if (!r)
ctx->bitmap, wait, flush_tlb); r = svm_range_map_to_gpus(prange, offset, npages, readonly,
ctx->bitmap, wait, flush_tlb);
if (!r && next == end)
prange->mapped_to_gpu = true;
unlock_out:
svm_range_unlock(prange); svm_range_unlock(prange);
addr = next; addr = next;
} }
if (addr == end)
prange->mapped_to_gpu = true;
unreserve_out:
svm_range_unreserve_bos(ctx); svm_range_unreserve_bos(ctx);
prange->is_error_flag = !!r;
if (!r) if (!r)
prange->validate_timestamp = ktime_get_boottime(); prange->validate_timestamp = ktime_get_boottime();
...@@ -2112,7 +2107,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, ...@@ -2112,7 +2107,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
next = interval_tree_iter_next(node, start, last); next = interval_tree_iter_next(node, start, last);
next_start = min(node->last, last) + 1; next_start = min(node->last, last) + 1;
if (svm_range_is_same_attrs(p, prange, nattr, attrs)) { if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
prange->mapped_to_gpu) {
/* nothing to do */ /* nothing to do */
} else if (node->start < start || node->last > last) { } else if (node->start < start || node->last > last) {
/* node intersects the update range and its attributes /* node intersects the update range and its attributes
...@@ -3525,7 +3521,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, ...@@ -3525,7 +3521,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
struct svm_range *next; struct svm_range *next;
bool update_mapping = false; bool update_mapping = false;
bool flush_tlb; bool flush_tlb;
int r = 0; int r, ret = 0;
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
p->pasid, &p->svms, start, start + size - 1, size); p->pasid, &p->svms, start, start + size - 1, size);
...@@ -3613,7 +3609,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, ...@@ -3613,7 +3609,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
out_unlock_range: out_unlock_range:
mutex_unlock(&prange->migrate_mutex); mutex_unlock(&prange->migrate_mutex);
if (r) if (r)
break; ret = r;
} }
dynamic_svm_range_dump(svms); dynamic_svm_range_dump(svms);
...@@ -3626,7 +3622,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, ...@@ -3626,7 +3622,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
&p->svms, start, start + size - 1, r); &p->svms, start, start + size - 1, r);
return r; return ret ? ret : r;
} }
static int static int
......
...@@ -133,7 +133,6 @@ struct svm_range { ...@@ -133,7 +133,6 @@ struct svm_range {
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
bool mapped_to_gpu; bool mapped_to_gpu;
bool is_error_flag;
}; };
static inline void svm_range_lock(struct svm_range *prange) static inline void svm_range_lock(struct svm_range *prange)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment