Commit e095fc17 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: explicitely sync to VM updates v2

Allows us to reduce the overhead while syncing to fences a bit.

v2: also drop adev parameter from the functions
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6ceeb144
...@@ -358,7 +358,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) ...@@ -358,7 +358,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret) if (ret)
return ret; return ret;
return amdgpu_sync_fence(NULL, sync, vm->last_update, false); return amdgpu_sync_fence(sync, vm->last_update, false);
} }
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
...@@ -751,7 +751,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, ...@@ -751,7 +751,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
return 0; return 0;
} }
...@@ -770,7 +770,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, ...@@ -770,7 +770,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
return ret; return ret;
} }
return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
} }
static int map_bo_to_gpuvm(struct amdgpu_device *adev, static int map_bo_to_gpuvm(struct amdgpu_device *adev,
...@@ -2045,7 +2045,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) ...@@ -2045,7 +2045,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
pr_debug("Memory eviction: Validate BOs failed. Try again\n"); pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail; goto validate_map_fail;
} }
ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
if (ret) { if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail; goto validate_map_fail;
......
...@@ -797,29 +797,23 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -797,29 +797,23 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_fence(adev, &p->job->sync, r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
fpriv->prt_va->last_pt_update, false);
if (r) if (r)
return r; return r;
if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
struct dma_fence *f;
bo_va = fpriv->csa_va; bo_va = fpriv->csa_va;
BUG_ON(!bo_va); BUG_ON(!bo_va);
r = amdgpu_vm_bo_update(adev, bo_va, false); r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r) if (r)
return r; return r;
f = bo_va->last_pt_update; r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
if (r) if (r)
return r; return r;
} }
amdgpu_bo_list_for_each_entry(e, p->bo_list) { amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct dma_fence *f;
/* ignore duplicates */ /* ignore duplicates */
bo = ttm_to_amdgpu_bo(e->tv.bo); bo = ttm_to_amdgpu_bo(e->tv.bo);
if (!bo) if (!bo)
...@@ -833,8 +827,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -833,8 +827,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
f = bo_va->last_pt_update; r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
if (r) if (r)
return r; return r;
} }
...@@ -847,7 +840,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -847,7 +840,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
if (r) if (r)
return r; return r;
...@@ -989,7 +982,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, ...@@ -989,7 +982,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
dma_fence_put(old); dma_fence_put(old);
} }
r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); r = amdgpu_sync_fence(&p->job->sync, fence, true);
dma_fence_put(fence); dma_fence_put(fence);
if (r) if (r)
return r; return r;
...@@ -1011,7 +1004,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, ...@@ -1011,7 +1004,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
return r; return r;
} }
r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); r = amdgpu_sync_fence(&p->job->sync, fence, true);
dma_fence_put(fence); dma_fence_put(fence);
return r; return r;
......
...@@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, ...@@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
int r; int r;
if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false); return amdgpu_sync_fence(sync, ring->vmid_wait, false);
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences) if (!fences)
...@@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, ...@@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
return -ENOMEM; return -ENOMEM;
} }
r = amdgpu_sync_fence(adev, sync, &array->base, false); r = amdgpu_sync_fence(sync, &array->base, false);
dma_fence_put(ring->vmid_wait); dma_fence_put(ring->vmid_wait);
ring->vmid_wait = &array->base; ring->vmid_wait = &array->base;
return r; return r;
...@@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, ...@@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) { if (tmp) {
*id = NULL; *id = NULL;
r = amdgpu_sync_fence(adev, sync, tmp, false); r = amdgpu_sync_fence(sync, tmp, false);
return r; return r;
} }
needs_flush = true; needs_flush = true;
...@@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, ...@@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
/* Good we can use this VMID. Remember this submission as /* Good we can use this VMID. Remember this submission as
* user of the VMID. * user of the VMID.
*/ */
r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); r = amdgpu_sync_fence(&(*id)->active, fence, false);
if (r) if (r)
return r; return r;
...@@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, ...@@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
/* Good, we can use this VMID. Remember this submission as /* Good, we can use this VMID. Remember this submission as
* user of the VMID. * user of the VMID.
*/ */
r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); r = amdgpu_sync_fence(&(*id)->active, fence, false);
if (r) if (r)
return r; return r;
...@@ -435,8 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, ...@@ -435,8 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
id = idle; id = idle;
/* Remember this submission as user of the VMID */ /* Remember this submission as user of the VMID */
r = amdgpu_sync_fence(ring->adev, &id->active, r = amdgpu_sync_fence(&id->active, fence, false);
fence, false);
if (r) if (r)
goto error; goto error;
......
...@@ -193,8 +193,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, ...@@ -193,8 +193,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
fence = amdgpu_sync_get_fence(&job->sync, &explicit); fence = amdgpu_sync_get_fence(&job->sync, &explicit);
if (fence && explicit) { if (fence && explicit) {
if (drm_sched_dependency_optimized(fence, s_entity)) { if (drm_sched_dependency_optimized(fence, s_entity)) {
r = amdgpu_sync_fence(ring->adev, &job->sched_sync, r = amdgpu_sync_fence(&job->sched_sync, fence, false);
fence, false);
if (r) if (r)
DRM_ERROR("Error adding fence (%d)\n", r); DRM_ERROR("Error adding fence (%d)\n", r);
} }
......
...@@ -129,7 +129,8 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep, ...@@ -129,7 +129,8 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
* Tries to add the fence to an existing hash entry. Returns true when an entry * Tries to add the fence to an existing hash entry. Returns true when an entry
* was found, false otherwise. * was found, false otherwise.
*/ */
static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit) static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
bool explicit)
{ {
struct amdgpu_sync_entry *e; struct amdgpu_sync_entry *e;
...@@ -151,19 +152,18 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, ...@@ -151,19 +152,18 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
* amdgpu_sync_fence - remember to sync to this fence * amdgpu_sync_fence - remember to sync to this fence
* *
* @sync: sync object to add fence to * @sync: sync object to add fence to
* @fence: fence to sync to * @f: fence to sync to
* @explicit: if this is an explicit dependency
* *
* Add the fence to the sync object.
*/ */
int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
struct dma_fence *f, bool explicit) bool explicit)
{ {
struct amdgpu_sync_entry *e; struct amdgpu_sync_entry *e;
if (!f) if (!f)
return 0; return 0;
if (amdgpu_sync_same_dev(adev, f) &&
amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
amdgpu_sync_keep_later(&sync->last_vm_update, f);
if (amdgpu_sync_add_later(sync, f, explicit)) if (amdgpu_sync_add_later(sync, f, explicit))
return 0; return 0;
...@@ -179,6 +179,24 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, ...@@ -179,6 +179,24 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0; return 0;
} }
/**
* amdgpu_sync_vm_fence - remember to sync to this VM fence
*
* @adev: amdgpu device
* @sync: sync object to add fence to
* @fence: the VM fence to add
*
* Add the fence to the sync object and remember it as VM update.
*/
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
{
if (!fence)
return 0;
amdgpu_sync_keep_later(&sync->last_vm_update, fence);
return amdgpu_sync_fence(sync, fence, false);
}
/** /**
* amdgpu_sync_resv - sync to a reservation object * amdgpu_sync_resv - sync to a reservation object
* *
...@@ -204,7 +222,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -204,7 +222,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
/* always sync to the exclusive fence */ /* always sync to the exclusive fence */
f = dma_resv_get_excl(resv); f = dma_resv_get_excl(resv);
r = amdgpu_sync_fence(adev, sync, f, false); r = amdgpu_sync_fence(sync, f, false);
flist = dma_resv_get_list(resv); flist = dma_resv_get_list(resv);
if (!flist || r) if (!flist || r)
...@@ -239,7 +257,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -239,7 +257,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
continue; continue;
} }
r = amdgpu_sync_fence(adev, sync, f, false); r = amdgpu_sync_fence(sync, f, false);
if (r) if (r)
break; break;
} }
...@@ -340,7 +358,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) ...@@ -340,7 +358,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) { hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence; f = e->fence;
if (!dma_fence_is_signaled(f)) { if (!dma_fence_is_signaled(f)) {
r = amdgpu_sync_fence(NULL, clone, f, e->explicit); r = amdgpu_sync_fence(clone, f, e->explicit);
if (r) if (r)
return r; return r;
} else { } else {
......
...@@ -40,8 +40,9 @@ struct amdgpu_sync { ...@@ -40,8 +40,9 @@ struct amdgpu_sync {
}; };
void amdgpu_sync_create(struct amdgpu_sync *sync); void amdgpu_sync_create(struct amdgpu_sync *sync);
int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
struct dma_fence *f, bool explicit); bool explicit);
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
int amdgpu_sync_resv(struct amdgpu_device *adev, int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync, struct amdgpu_sync *sync,
struct dma_resv *resv, struct dma_resv *resv,
...@@ -49,7 +50,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -49,7 +50,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
bool explicit_sync); bool explicit_sync);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring); struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync,
bool *explicit);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync); void amdgpu_sync_free(struct amdgpu_sync *sync);
......
...@@ -71,7 +71,7 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, ...@@ -71,7 +71,7 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
p->num_dw_left = ndw; p->num_dw_left = ndw;
/* Wait for moves to be completed */ /* Wait for moves to be completed */
r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); r = amdgpu_sync_fence(&p->job->sync, exclusive, false);
if (r) if (r)
return r; return r;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment