Commit cda0f85b authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher

drm/amdkfd: refine migration policy with xnack on

With xnack on, GPU vm fault handler decide the best restore location,
then migrate range to the best restore location and update GPU mapping
to recover the GPU vm fault.
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Signed-off-by: default avatarAlex Sierra <alex.sierra@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 485bea1f
...@@ -480,18 +480,19 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -480,18 +480,19 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
* svm_migrate_ram_to_vram - migrate svm range from system to device * svm_migrate_ram_to_vram - migrate svm range from system to device
* @prange: range structure * @prange: range structure
* @best_loc: the device to migrate to * @best_loc: the device to migrate to
* @mm: the process mm structure
* *
* Context: Process context, caller hold mmap read lock, svms lock, prange lock * Context: Process context, caller hold mmap read lock, svms lock, prange lock
* *
* Return: * Return:
* 0 - OK, otherwise error code * 0 - OK, otherwise error code
*/ */
int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
{ {
unsigned long addr, start, end; unsigned long addr, start, end;
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct amdgpu_device *adev; struct amdgpu_device *adev;
struct mm_struct *mm;
int r = 0; int r = 0;
if (prange->actual_loc == best_loc) { if (prange->actual_loc == best_loc) {
...@@ -515,8 +516,6 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) ...@@ -515,8 +516,6 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc)
start = prange->start << PAGE_SHIFT; start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT;
mm = current->mm;
for (addr = start; addr < end;) { for (addr = start; addr < end;) {
unsigned long next; unsigned long next;
......
...@@ -38,7 +38,8 @@ enum MIGRATION_COPY_DIR { ...@@ -38,7 +38,8 @@ enum MIGRATION_COPY_DIR {
FROM_VRAM_TO_RAM FROM_VRAM_TO_RAM
}; };
int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc); int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
unsigned long unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
......
...@@ -864,6 +864,9 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); ...@@ -864,6 +864,9 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
int kfd_process_gpuid_from_kgd(struct kfd_process *p,
struct amdgpu_device *adev, uint32_t *gpuid,
uint32_t *gpuidx);
static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
uint32_t gpuidx, uint32_t *gpuid) { uint32_t gpuidx, uint32_t *gpuid) {
return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL; return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
......
...@@ -1676,6 +1676,22 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) ...@@ -1676,6 +1676,22 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
return -EINVAL; return -EINVAL;
} }
int
kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
uint32_t *gpuid, uint32_t *gpuidx)
{
struct kgd_dev *kgd = (struct kgd_dev *)adev;
int i;
for (i = 0; i < p->n_pdds; i++)
if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
*gpuid = p->pdds[i]->dev->id;
*gpuidx = i;
return 0;
}
return -EINVAL;
}
static void evict_process_worker(struct work_struct *work) static void evict_process_worker(struct work_struct *work)
{ {
int ret; int ret;
......
...@@ -1317,6 +1317,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm, ...@@ -1317,6 +1317,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
if (gpuidx < MAX_GPU_INSTANCE) { if (gpuidx < MAX_GPU_INSTANCE) {
bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE); bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE);
bitmap_set(ctx.bitmap, gpuidx, 1); bitmap_set(ctx.bitmap, gpuidx, 1);
} else if (ctx.process->xnack_enabled) {
bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
/* If prefetch range to GPU, or GPU retry fault migrate range to
* GPU, which has ACCESS attribute to the range, create mapping
* on that GPU.
*/
if (prange->actual_loc) {
gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process,
prange->actual_loc);
if (gpuidx < 0) {
WARN_ONCE(1, "failed get device by id 0x%x\n",
prange->actual_loc);
return -EINVAL;
}
if (test_bit(gpuidx, prange->bitmap_access))
bitmap_set(ctx.bitmap, gpuidx, 1);
}
} else { } else {
bitmap_or(ctx.bitmap, prange->bitmap_access, bitmap_or(ctx.bitmap, prange->bitmap_access,
prange->bitmap_aip, MAX_GPU_INSTANCE); prange->bitmap_aip, MAX_GPU_INSTANCE);
...@@ -2046,15 +2064,75 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, ...@@ -2046,15 +2064,75 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
return NULL; return NULL;
} }
/* svm_range_best_restore_location - decide the best fault restore location
* @prange: svm range structure
* @adev: the GPU on which vm fault happened
*
* This is only called when xnack is on, to decide the best location to restore
* the range mapping after GPU vm fault. Caller uses the best location to do
* migration if actual loc is not best location, then update GPU page table
* mapping to the best location.
*
* If vm fault gpu is range preferred loc, the best_loc is preferred loc.
* If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu
* If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then
* if range actual loc is cpu, best_loc is cpu
* if vm fault gpu is on xgmi same hive of range actual loc gpu, best_loc is
* range actual loc.
* Otherwise, GPU no access, best_loc is -1.
*
* Return:
* -1 means vm fault GPU no access
* 0 for CPU or GPU id
*/
static int32_t
svm_range_best_restore_location(struct svm_range *prange,
struct amdgpu_device *adev,
int32_t *gpuidx)
{
struct amdgpu_device *bo_adev;
struct kfd_process *p;
uint32_t gpuid;
int r;
p = container_of(prange->svms, struct kfd_process, svms);
r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx);
if (r < 0) {
pr_debug("failed to get gpuid from kgd\n");
return -1;
}
if (prange->preferred_loc == gpuid)
return prange->preferred_loc;
if (test_bit(*gpuidx, prange->bitmap_access))
return gpuid;
if (test_bit(*gpuidx, prange->bitmap_aip)) {
if (!prange->actual_loc)
return 0;
bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
if (amdgpu_xgmi_same_hive(adev, bo_adev))
return prange->actual_loc;
else
return 0;
}
return -1;
}
int int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint64_t addr) uint64_t addr)
{ {
int r = 0;
struct mm_struct *mm = NULL; struct mm_struct *mm = NULL;
struct svm_range *prange;
struct svm_range_list *svms; struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p; struct kfd_process *p;
int32_t best_loc, gpuidx;
int r = 0;
p = kfd_lookup_process_by_pasid(pasid); p = kfd_lookup_process_by_pasid(pasid);
if (!p) { if (!p) {
...@@ -2089,11 +2167,48 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ...@@ -2089,11 +2167,48 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
mutex_lock(&prange->migrate_mutex); mutex_lock(&prange->migrate_mutex);
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, false, false); best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
if (r) if (best_loc == -1) {
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpu\n", r, pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
svms, prange->start, prange->last); svms, prange->start, prange->last);
r = -EACCES;
goto out_unlock_range;
}
pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
svms, prange->start, prange->last, best_loc,
prange->actual_loc);
if (prange->actual_loc != best_loc) {
if (best_loc) {
r = svm_migrate_ram_to_vram(prange, best_loc, mm);
if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr);
/* Fallback to system memory if migration to
* VRAM failed
*/
if (prange->actual_loc)
r = svm_migrate_vram_to_ram(prange, mm);
else
r = 0;
}
} else {
r = svm_migrate_vram_to_ram(prange, mm);
}
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
r, svms, prange->start, prange->last);
goto out_unlock_range;
}
}
r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, prange->start, prange->last);
out_unlock_range:
mutex_unlock(&prange->migrate_mutex); mutex_unlock(&prange->migrate_mutex);
out_unlock_svms: out_unlock_svms:
mutex_unlock(&svms->lock); mutex_unlock(&svms->lock);
...@@ -2230,7 +2345,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, ...@@ -2230,7 +2345,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
return 0; return 0;
} }
/* svm_range_best_location - decide the best actual location /* svm_range_best_prefetch_location - decide the best prefetch location
* @prange: svm range structure * @prange: svm range structure
* *
* For xnack off: * For xnack off:
...@@ -2252,7 +2367,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, ...@@ -2252,7 +2367,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
* Return: * Return:
* 0 for CPU or GPU id * 0 for CPU or GPU id
*/ */
static uint32_t svm_range_best_location(struct svm_range *prange) static uint32_t
svm_range_best_prefetch_location(struct svm_range *prange)
{ {
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
uint32_t best_loc = prange->prefetch_loc; uint32_t best_loc = prange->prefetch_loc;
...@@ -2354,7 +2470,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, ...@@ -2354,7 +2470,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
int r = 0; int r = 0;
*migrated = false; *migrated = false;
best_loc = svm_range_best_location(prange); best_loc = svm_range_best_prefetch_location(prange);
if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
best_loc == prange->actual_loc) best_loc == prange->actual_loc)
...@@ -2366,10 +2482,10 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, ...@@ -2366,10 +2482,10 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
if (best_loc) { if (best_loc) {
pr_debug("migrate from ram to vram\n"); pr_debug("migrate from ram to vram\n");
r = svm_migrate_ram_to_vram(prange, best_loc); r = svm_migrate_ram_to_vram(prange, best_loc, mm);
} else { } else {
pr_debug("migrate from vram to ram\n"); pr_debug("migrate from vram to ram\n");
r = svm_migrate_vram_to_ram(prange, current->mm); r = svm_migrate_vram_to_ram(prange, mm);
} }
if (!r) if (!r)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment