Commit d490ecf5 authored by Thomas Hellström's avatar Thomas Hellström Committed by Rodrigo Vivi

drm/xe: Rework xe_exec and the VM rebind worker to use the drm_exec helper

Replace the calls to ttm_eu_reserve_buffers() by using the drm_exec
helper instead. Also make sure the locking loop covers any calls to
xe_bo_validate() / ttm_bo_validate() so that these function calls may
easily benefit from being called from within an unsealed locking
transaction and may thus perform blocking dma_resv locks in the future.

For the unlock we remove an assert that the vm->rebind_list is empty
when locks are released. Since if the error path is hit with a partly
locked list, that assert may no longer hold true we chose to remove it.

v3:
- Don't accept duplicate bo locks in the rebind worker.
v5:
- Loop over drm_exec objects in reverse when unlocking.
v6:
- We can't keep the WW ticket when retrying validation on OOM. Fix.
Signed-off-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-5-thomas.hellstrom@linux.intel.comSigned-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent b7ab8c4f
......@@ -8,6 +8,7 @@ config DRM_XE
select SHMEM
select TMPFS
select DRM_BUDDY
select DRM_EXEC
select DRM_KMS_HELPER
select DRM_PANEL
select DRM_SUBALLOC_HELPER
......@@ -21,6 +22,7 @@ config DRM_XE
select VMAP_PFN
select DRM_TTM
select DRM_TTM_HELPER
select DRM_EXEC
select DRM_GPUVM
select DRM_SCHED
select MMU_NOTIFIER
......
......@@ -6,6 +6,7 @@
#include "xe_exec.h"
#include <drm/drm_device.h>
#include <drm/drm_exec.h>
#include <drm/drm_file.h>
#include <drm/xe_drm.h>
#include <linux/delay.h>
......@@ -93,25 +94,16 @@
* Unlock all
*/
#define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000
static int xe_exec_begin(struct xe_exec_queue *q, struct ww_acquire_ctx *ww,
struct ttm_validate_buffer tv_onstack[],
struct ttm_validate_buffer **tv,
struct list_head *objs)
static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm)
{
struct xe_vm *vm = q->vm;
struct xe_vma *vma;
LIST_HEAD(dups);
ktime_t end = 0;
int err = 0;
*tv = NULL;
if (xe_vm_no_dma_fences(q->vm))
if (xe_vm_no_dma_fences(vm))
return 0;
retry:
err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
err = xe_vm_lock_dma_resv(vm, exec, 1, true);
if (err)
return err;
......@@ -127,42 +119,13 @@ static int xe_exec_begin(struct xe_exec_queue *q, struct ww_acquire_ctx *ww,
continue;
err = xe_bo_validate(xe_vma_bo(vma), vm, false);
if (err) {
xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
*tv = NULL;
if (err)
break;
}
}
/*
* With multiple active VMs, under memory pressure, it is possible that
* ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
* Until ttm properly handles locking in such scenarios, best thing the
* driver can do is retry with a timeout.
*/
if (err == -ENOMEM) {
ktime_t cur = ktime_get();
end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS);
if (ktime_before(cur, end)) {
msleep(20);
goto retry;
}
}
return err;
}
static void xe_exec_end(struct xe_exec_queue *q,
struct ttm_validate_buffer *tv_onstack,
struct ttm_validate_buffer *tv,
struct ww_acquire_ctx *ww,
struct list_head *objs)
{
if (!xe_vm_no_dma_fences(q->vm))
xe_vm_unlock_dma_resv(q->vm, tv_onstack, tv, ww, objs);
}
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
......@@ -173,15 +136,13 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct xe_exec_queue *q;
struct xe_sync_entry *syncs = NULL;
u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
struct ttm_validate_buffer *tv = NULL;
struct drm_exec exec;
u32 i, num_syncs = 0;
struct xe_sched_job *job;
struct dma_fence *rebind_fence;
struct xe_vm *vm;
struct ww_acquire_ctx ww;
struct list_head objs;
bool write_locked;
ktime_t end = 0;
int err = 0;
if (XE_IOCTL_DBG(xe, args->extensions) ||
......@@ -294,26 +255,34 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto err_unlock_list;
}
err = xe_exec_begin(q, &ww, tv_onstack, &tv, &objs);
if (err)
goto err_unlock_list;
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
drm_exec_until_all_locked(&exec) {
err = xe_exec_begin(&exec, vm);
drm_exec_retry_on_contention(&exec);
if (err && xe_vm_validate_should_retry(&exec, err, &end)) {
err = -EAGAIN;
goto err_unlock_list;
}
if (err)
goto err_exec;
}
if (xe_vm_is_closed_or_banned(q->vm)) {
drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n");
err = -ECANCELED;
goto err_exec_queue_end;
goto err_exec;
}
if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
err = -EWOULDBLOCK;
goto err_exec_queue_end;
goto err_exec;
}
job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ?
addresses : &args->address);
if (IS_ERR(job)) {
err = PTR_ERR(job);
goto err_exec_queue_end;
goto err_exec;
}
/*
......@@ -412,8 +381,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
err_put_job:
if (err)
xe_sched_job_put(job);
err_exec_queue_end:
xe_exec_end(q, tv_onstack, tv, &ww, &objs);
err_exec:
drm_exec_fini(&exec);
err_unlock_list:
if (write_locked)
up_write(&vm->lock);
......
This diff is collapsed.
......@@ -21,6 +21,7 @@ struct ttm_validate_buffer;
struct xe_exec_queue;
struct xe_file;
struct xe_sync_entry;
struct drm_exec;
struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
......@@ -208,23 +209,10 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma);
int xe_vma_userptr_check_repin(struct xe_vma *vma);
/*
* XE_ONSTACK_TV is used to size the tv_onstack array that is input
* to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv().
*/
#define XE_ONSTACK_TV 20
int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
struct ttm_validate_buffer *tv_onstack,
struct ttm_validate_buffer **tv,
struct list_head *objs,
bool intr,
unsigned int num_shared);
void xe_vm_unlock_dma_resv(struct xe_vm *vm,
struct ttm_validate_buffer *tv_onstack,
struct ttm_validate_buffer *tv,
struct ww_acquire_ctx *ww,
struct list_head *objs);
bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
unsigned int num_shared, bool lock_vm);
void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
enum dma_resv_usage usage);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment