Commit 34f89ac8 authored by Niranjana Vishwanathapura's avatar Niranjana Vishwanathapura Committed by Rodrigo Vivi

drm/xe: Handle -EDEADLK case in exec ioctl

With multiple active VMs, under memory pressure, it is possible that
ttm_bo_validate() run into -EDEADLK in ttm_mem_evict_wait_busy() and
return -ENOMEM.

Until ttm properly handles locking in such scenarios, best thing the
driver can do is unwind the lock and retry.

Update xe_exec_begin to retry validating BOs with a timeout upon
-ENOMEM.
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: default avatarNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent 9ca14f94
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <drm/drm_device.h> #include <drm/drm_device.h>
#include <drm/drm_file.h> #include <drm/drm_file.h>
#include <drm/xe_drm.h> #include <drm/xe_drm.h>
#include <linux/delay.h>
#include "xe_bo.h" #include "xe_bo.h"
#include "xe_device.h" #include "xe_device.h"
...@@ -91,6 +92,8 @@ ...@@ -91,6 +92,8 @@
* Unlock all * Unlock all
*/ */
#define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000
static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
struct ttm_validate_buffer tv_onstack[], struct ttm_validate_buffer tv_onstack[],
struct ttm_validate_buffer **tv, struct ttm_validate_buffer **tv,
...@@ -99,12 +102,14 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, ...@@ -99,12 +102,14 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
struct xe_vm *vm = e->vm; struct xe_vm *vm = e->vm;
struct xe_vma *vma; struct xe_vma *vma;
LIST_HEAD(dups); LIST_HEAD(dups);
int err; ktime_t end = 0;
int err = 0;
*tv = NULL; *tv = NULL;
if (xe_vm_no_dma_fences(e->vm)) if (xe_vm_no_dma_fences(e->vm))
return 0; return 0;
retry:
err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1); err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
if (err) if (err)
return err; return err;
...@@ -122,11 +127,27 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, ...@@ -122,11 +127,27 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
if (err) { if (err) {
xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs); xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
*tv = NULL; *tv = NULL;
return err; break;
} }
} }
return 0; /*
* With multiple active VMs, under memory pressure, it is possible that
* ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
* Until ttm properly handles locking in such scenarios, best thing the
* driver can do is retry with a timeout.
*/
if (err == -ENOMEM) {
ktime_t cur = ktime_get();
end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS);
if (ktime_before(cur, end)) {
msleep(20);
goto retry;
}
}
return err;
} }
static void xe_exec_end(struct xe_engine *e, static void xe_exec_end(struct xe_engine *e,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment