Commit 8a7c184a authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher

drm/amdkfd: svm range eviction and restore

HMM interval notifier callback notify CPU page table will be updated,
stop process queues if the updated address belongs to svm range
registered in process svms objects tree. Scheduled restore work to
update GPU page table using new pages address in the updated svm range.

The restore worker flushes any deferred work to make sure it restores
an up-to-date svm_range_list.
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent f80fe9d3
...@@ -738,6 +738,8 @@ struct svm_range_list { ...@@ -738,6 +738,8 @@ struct svm_range_list {
struct work_struct deferred_list_work; struct work_struct deferred_list_work;
struct list_head deferred_range_list; struct list_head deferred_range_list;
spinlock_t deferred_list_lock; spinlock_t deferred_list_lock;
atomic_t evicted_ranges;
struct delayed_work restore_work;
}; };
/* Process data */ /* Process data */
......
...@@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, ...@@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work); cancel_delayed_work_sync(&p->restore_work);
cancel_delayed_work_sync(&p->svms.restore_work);
mutex_lock(&p->mutex); mutex_lock(&p->mutex);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
*/ */
#include <linux/types.h> #include <linux/types.h>
#include <linux/sched/task.h>
#include "amdgpu_sync.h" #include "amdgpu_sync.h"
#include "amdgpu_object.h" #include "amdgpu_object.h"
#include "amdgpu_vm.h" #include "amdgpu_vm.h"
...@@ -29,6 +30,8 @@ ...@@ -29,6 +30,8 @@
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_svm.h" #include "kfd_svm.h"
#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
static bool static bool
svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range, const struct mmu_notifier_range *range,
...@@ -251,6 +254,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, ...@@ -251,6 +254,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
INIT_LIST_HEAD(&prange->insert_list); INIT_LIST_HEAD(&prange->insert_list);
INIT_LIST_HEAD(&prange->deferred_list); INIT_LIST_HEAD(&prange->deferred_list);
INIT_LIST_HEAD(&prange->child_list); INIT_LIST_HEAD(&prange->child_list);
atomic_set(&prange->invalid, 0);
mutex_init(&prange->lock); mutex_init(&prange->lock);
svm_range_set_default_attributes(&prange->preferred_loc, svm_range_set_default_attributes(&prange->preferred_loc,
&prange->prefetch_loc, &prange->prefetch_loc,
...@@ -963,6 +967,129 @@ svm_range_list_lock_and_flush_work(struct svm_range_list *svms, ...@@ -963,6 +967,129 @@ svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
goto retry_flush_work; goto retry_flush_work;
} }
static void svm_range_restore_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct amdkfd_process_info *process_info;
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
struct mm_struct *mm;
int evicted_ranges;
int invalid;
int r;
svms = container_of(dwork, struct svm_range_list, restore_work);
evicted_ranges = atomic_read(&svms->evicted_ranges);
if (!evicted_ranges)
return;
pr_debug("restore svm ranges\n");
/* kfd_process_notifier_release destroys this worker thread. So during
* the lifetime of this thread, kfd_process and mm will be valid.
*/
p = container_of(svms, struct kfd_process, svms);
process_info = p->kgd_process_info;
mm = p->mm;
if (!mm)
return;
mutex_lock(&process_info->lock);
svm_range_list_lock_and_flush_work(svms, mm);
mutex_lock(&svms->lock);
evicted_ranges = atomic_read(&svms->evicted_ranges);
list_for_each_entry(prange, &svms->list, list) {
invalid = atomic_read(&prange->invalid);
if (!invalid)
continue;
pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
prange->svms, prange, prange->start, prange->last,
invalid);
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
false, true);
if (r) {
pr_debug("failed %d to map 0x%lx to gpus\n", r,
prange->start);
goto unlock_out;
}
if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
goto unlock_out;
}
if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
evicted_ranges)
goto unlock_out;
evicted_ranges = 0;
r = kgd2kfd_resume_mm(mm);
if (r) {
/* No recovery from this failure. Probably the CP is
* hanging. No point trying again.
*/
pr_debug("failed %d to resume KFD\n", r);
}
pr_debug("restore svm ranges successfully\n");
unlock_out:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
mutex_unlock(&process_info->lock);
/* If validation failed, reschedule another attempt */
if (evicted_ranges) {
pr_debug("reschedule to restore svm range\n");
schedule_delayed_work(&svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
}
}
/**
* svm_range_evict - evict svm range
*
* Stop all queues of the process to ensure GPU doesn't access the memory, then
* return to let CPU evict the buffer and proceed CPU pagetable update.
*
* Don't need use lock to sync cpu pagetable invalidation with GPU execution.
* If invalidation happens while restore work is running, restore work will
* restart to ensure to get the latest CPU pages mapping to GPU, then start
* the queues.
*/
static int
svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
unsigned long start, unsigned long last)
{
struct svm_range_list *svms = prange->svms;
int evicted_ranges;
int r = 0;
atomic_inc(&prange->invalid);
evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
if (evicted_ranges != 1)
return r;
pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
prange->svms, prange->start, prange->last);
/* First eviction, stop the queues */
r = kgd2kfd_quiesce_mm(mm);
if (r)
pr_debug("failed to quiesce KFD\n");
pr_debug("schedule to restore svm %p ranges\n", svms);
schedule_delayed_work(&svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
return r;
}
static struct svm_range *svm_range_clone(struct svm_range *old) static struct svm_range *svm_range_clone(struct svm_range *old)
{ {
struct svm_range *new; struct svm_range *new;
...@@ -1331,6 +1458,11 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, ...@@ -1331,6 +1458,11 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
* svm_range_cpu_invalidate_pagetables - interval notifier callback * svm_range_cpu_invalidate_pagetables - interval notifier callback
* *
* MMU range unmap notifier to remove svm ranges * MMU range unmap notifier to remove svm ranges
*
* If GPU vm fault retry is not enabled, evict the svm range, then restore
* work will update GPU mapping.
* If GPU vm fault retry is enabled, unmap the svm range from GPU, vm fault
* will update GPU mapping.
*/ */
static bool static bool
svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
...@@ -1364,6 +1496,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, ...@@ -1364,6 +1496,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
svm_range_unmap_from_cpu(mni->mm, prange, start, last); svm_range_unmap_from_cpu(mni->mm, prange, start, last);
break; break;
default: default:
svm_range_evict(prange, mni->mm, start, last);
break; break;
} }
...@@ -1389,6 +1522,8 @@ int svm_range_list_init(struct kfd_process *p) ...@@ -1389,6 +1522,8 @@ int svm_range_list_init(struct kfd_process *p)
svms->objects = RB_ROOT_CACHED; svms->objects = RB_ROOT_CACHED;
mutex_init(&svms->lock); mutex_init(&svms->lock);
INIT_LIST_HEAD(&svms->list); INIT_LIST_HEAD(&svms->list);
atomic_set(&svms->evicted_ranges, 0);
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
INIT_LIST_HEAD(&svms->deferred_range_list); INIT_LIST_HEAD(&svms->deferred_range_list);
spin_lock_init(&svms->deferred_list_lock); spin_lock_init(&svms->deferred_list_lock);
......
...@@ -67,6 +67,7 @@ struct svm_work_list_item { ...@@ -67,6 +67,7 @@ struct svm_work_list_item {
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
* @actual_loc: the actual location, 0 for CPU, or GPU id * @actual_loc: the actual location, 0 for CPU, or GPU id
* @granularity:migration granularity, log2 num pages * @granularity:migration granularity, log2 num pages
* @invalid: not 0 means cpu page table is invalidated
* @notifier: register mmu interval notifier * @notifier: register mmu interval notifier
* @work_item: deferred work item information * @work_item: deferred work item information
* @deferred_list: list header used to add range to deferred list * @deferred_list: list header used to add range to deferred list
...@@ -97,6 +98,7 @@ struct svm_range { ...@@ -97,6 +98,7 @@ struct svm_range {
uint32_t prefetch_loc; uint32_t prefetch_loc;
uint32_t actual_loc; uint32_t actual_loc;
uint8_t granularity; uint8_t granularity;
atomic_t invalid;
struct mmu_interval_notifier notifier; struct mmu_interval_notifier notifier;
struct svm_work_list_item work_item; struct svm_work_list_item work_item;
struct list_head deferred_list; struct list_head deferred_list;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment